diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake index 0eeccbf7d8a1df17351c8914df6dabf005802787..0002a470d90f722e3f9106ca56d70e6bf2cea339 100644 --- a/cmake/external/openblas.cmake +++ b/cmake/external/openblas.cmake @@ -25,7 +25,12 @@ IF(NOT ${CBLAS_FOUND}) "${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}" CACHE FILEPATH "openblas library." FORCE) - SET(COMMON_ARGS CC=${CMAKE_C_COMPILER} NO_SHARED=1 NO_LAPACK=1 libs) + IF(APPLE) + SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -isysroot ${CMAKE_OSX_SYSROOT}") + SET(COMMON_ARGS CC=${OPENBLAS_CC} NO_SHARED=1 NO_LAPACK=1 libs) + ELSE() + SET(COMMON_ARGS CC=${CMAKE_C_COMPILER} NO_SHARED=1 NO_LAPACK=1 libs) + ENDIF() IF(CMAKE_CROSSCOMPILING) IF(ANDROID) @@ -40,11 +45,11 @@ IF(NOT ${CBLAS_FOUND}) SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER} TARGET=${TARGET} ARM_SOFTFP_ABI=1 USE_THREAD=0) ELSEIF(RPI) # use hardfp - SET(OPENBLAS_COMMIT "v0.2.19") + SET(OPENBLAS_COMMIT "v0.2.20") SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER} TARGET=ARMV7 USE_THREAD=0) ENDIF() ELSE() - SET(OPENBLAS_COMMIT "v0.2.19") + SET(OPENBLAS_COMMIT "v0.2.20") SET(OPTIONAL_ARGS "") IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^x86(_64)?$") SET(OPTIONAL_ARGS DYNAMIC_ARCH=1 NUM_THREADS=64) diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index 7d7263b899afb7a2128548f264065a8013b6f0c9..7893e233b776425a61d9e3edd43d944a27743188 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -117,6 +117,8 @@ inline void Tensor::CopyFrom(const Tensor& src, memory::Copy(boost::get(dst_place), dst_ptr, boost::get(src_place), src_ptr, size, 0); } + PADDLE_ENFORCE(cudaStreamSynchronize(0), + "cudaStreamSynchronize failed in Tensor CopyFrom"); #endif } diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/gserver/layers/CrossEntropyOverBeam.cpp index 4acc077035b17fdf5ec06e0d4d916fa0a62f6cba..578bdbbe72120abccc63ed13d11e1dec65d41e44 100644 --- a/paddle/gserver/layers/CrossEntropyOverBeam.cpp +++ b/paddle/gserver/layers/CrossEntropyOverBeam.cpp @@ -223,7 +223,7 @@ void CrossEntropyOverBeam::checkInputs() { << inputLayers_[i * 3]->getName() << " should be a nested sequence"; CHECK_EQ(getInputValue(i * 3 + 1)->getWidth(), beamSize_); - CHECK_EQ(scores.getNumSequences(), batchSize_); + CHECK_EQ(batchSize_, static_cast(scores.getNumSequences())); CHECK_EQ(scores.getNumSubSequences(), selCandidates.getBatchSize()); } else { CHECK(scores.hasSeq()) << "input " << i << " " @@ -231,10 +231,10 @@ void CrossEntropyOverBeam::checkInputs() { << " should be a sequence"; batchSize_ = scores.getNumSequences(); beamSize_ = getInputValue(i * 3 + 1)->getWidth(); - CHECK_EQ(batchSize_, selCandidates.getBatchSize()); + CHECK_EQ(batchSize_, static_cast(selCandidates.getBatchSize())); } CHECK_EQ(1U, scores.value->getWidth()); - CHECK_EQ(batchSize_, goldSeq.getBatchSize()); + CHECK_EQ(batchSize_, static_cast(goldSeq.getBatchSize())); } } @@ -377,8 +377,8 @@ void CrossEntropyOverBeam::forward(PassType passType) { MatrixPtr outputValue = getOutputValue(); for (size_t i = 0; i < batchSize_; ++i) { - beamCosts_[i].setData( - std::move(std::make_shared(beamPerSeq_[i])), beamSize_); + BeamExpansionPtr ptr = std::make_shared(beamPerSeq_[i]); + beamCosts_[i].setData(std::move(ptr), beamSize_); outputValue->getData()[i] = beamCosts_[i].forward(); } } diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index f0fd12f1b5276d033ea086c60c80616fb1be7585..e5efcccb0e219a1c9df888cfec7f8902806676d4 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -1,7 +1,10 @@ +file(GLOB GENERAL_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_op.cc") +string(REPLACE ".cc" "" GENERAL_OPS "${GENERAL_OPS}") function(op_library TARGET) # op_library is a function to create op library. The interface is same as # cc_library. But it handle split GPU/CPU code and link some common library # for ops. + set(OP_LIBRARY ${TARGET} ${OP_LIBRARY} PARENT_SCOPE) set(cc_srcs) set(cu_srcs) set(op_common_deps operator op_registry) @@ -43,33 +46,26 @@ endfunction() add_subdirectory(math) -cc_test(gather_test SRCS gather_test.cc DEPS tensor) -op_library(gather_op SRCS gather_op.cc gather_op.cu) - -cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) -op_library(scatter_op SRCS scatter_op.cc scatter_op.cu) - -cc_library(net_op SRCS net_op.cc DEPS op_registry) -cc_test(net_op_test SRCS net_op_test.cc DEPS net_op) - -op_library(add_op SRCS add_op.cc add_op.cu) - -op_library(mean_op SRCS mean_op.cc mean_op.cu) +list(REMOVE_ITEM GENERAL_OPS + net_op + minus_op + mul_op + recurrent_op + scale_op) +op_library(net_op SRCS net_op.cc) +op_library(minus_op SRCS minus_op.cc minus_op.cu DEPS scale_op) op_library(mul_op SRCS mul_op.cc mul_op.cu DEPS math_function) -op_library(rowwise_add_op SRCS rowwise_add_op.cu rowwise_add_op.cc) +op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc + DEPS framework_proto tensor operator net_op) +op_library(scale_op SRCS scale_op.cc scale_op.cu DEPS net_op) -op_library(sigmoid_op SRCS sigmoid_op.cc sigmoid_op.cu) -op_library(softmax_op SRCS softmax_op.cc softmax_op.cu) -op_library(gaussian_random_op SRCS gaussian_random_op.cc gaussian_random_op.cu) -op_library(cross_entropy_op SRCS cross_entropy_op.cc cross_entropy_op.cu) -op_library(fill_zeros_like_op SRCS fill_zeros_like_op.cc fill_zeros_like_op.cu) +foreach(src ${GENERAL_OPS}) + op_library(${src} SRCS ${src}.cc ${src}.cu) +endforeach() -op_library(sgd_op SRCS sgd_op.cc sgd_op.cu) +set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") -op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc - DEPS framework_proto tensor op_registry operator net_op) -op_library(uniform_random_op SRCS uniform_random_op.cc uniform_random_op.cu) -op_library(lookup_table_op SRCS lookup_table_op.cc lookup_table_op.cu) -op_library(scale_op SRCS scale_op.cc scale_op.cu DEPS net_op) -op_library(minus_op SRCS minus_op.cc minus_op.cu DEPS scale_op) +cc_test(gather_test SRCS gather_test.cc DEPS tensor) +cc_test(net_op_test SRCS net_op_test.cc DEPS net_op) +cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt index 37e186a408ff5f560b5878e3e51ea81ca5810bc7..00030050700bfb2cee224124d090b0027d456ba0 100644 --- a/paddle/pybind/CMakeLists.txt +++ b/paddle/pybind/CMakeLists.txt @@ -2,21 +2,5 @@ if(WITH_PYTHON) cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python backward - sgd_op - gather_op - scatter_op - add_op - mul_op - rowwise_add_op - sigmoid_op - softmax_op - mean_op - cross_entropy_op - recurrent_op - uniform_random_op - gaussian_random_op - fill_zeros_like_op - lookup_table_op - scale_op - minus_op) + ${GLOB_OP_LIB}) endif(WITH_PYTHON) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 21e42f5e89bb7e0f5cfc8f3263dd0aa298b049c0..b167a3f78af0a9a34fbf1ddf7bf5486c25b2462c 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -137,7 +137,7 @@ __all__ = [ 'clip_layer', 'slice_projection', 'seq_slice_layer', - 'kmax_sequence_score_layer', + 'kmax_seq_score_layer', 'img_pool3d_layer', 'scale_shift_layer', 'img_conv3d_layer', @@ -5996,7 +5996,7 @@ def cross_entropy_over_beam(input, name=None): Note that, if gold falls off the beam at search step t, then the cost is calculated over the beam at step t. - This cost layer always works together with kmax_sequence_score_layer, + This cost layer always works together with kmax_seq_score_layer, sub_nested_seq_layer, and sequence_slice_layer to trim the input to form a sub-search space. @@ -6599,14 +6599,14 @@ def seq_slice_layer(input, starts, ends, name=None): @wrap_name_default() @layer_support() -def kmax_sequence_score_layer(input, name=None, beam_size=1): +def kmax_seq_score_layer(input, name=None, beam_size=1): """ This layer accepts one input which are scores over a sequence or a nested sequence, and returns indices of beam_size sequences with highest scores. .. code-block:: python - kmax_indices = kmax_sequence_score_layer(input=input_layer, beam_size) + kmax_indices = kmax_seq_score_layer(input=input_layer, beam_size) :param name: The Layer Name. @@ -6619,10 +6619,10 @@ def kmax_sequence_score_layer(input, name=None, beam_size=1): :return: LayerOutput object. :rtype: LayerOutput """ - assert isinstance(input, LayerOutput), ("kmax_sequence_score_layer " + assert isinstance(input, LayerOutput), ("kmax_seq_score_layer " "accepts only one input.") assert input.size == 1, ( - "input of kmax_sequence_score_layer is a score" + "input of kmax_seq_score_layer is a score " "over a sequence or a nested sequence, so its width must be 1.") Layer( diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index 4aa519104f61445e0e6cdca0e0eef1a0ec6bbbf6..8a204a96f3ef57673cef65306d0bf8e8c3409751 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -8,7 +8,7 @@ test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer -test_kmax_seq_socre_layer test_seq_select_layers test_scale_shift_layer +test_kmax_seq_socre_layer test_sub_nested_seq_select_layer test_scale_shift_layer test_seq_slice_layer test_cross_entropy_over_beam test_pooling3D_layer test_conv3d_layer test_deconv3d_layer test_BatchNorm3D) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr index c43fc48e222044b65d83b6162e7dc3954e119887..a602569697e91b11b8d421ac359c2e523a00fa98 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr @@ -12,7 +12,7 @@ layers { active_type: "" } layers { - name: "__kmax_sequence_score_layer_0__" + name: "__kmax_seq_score_layer_0__" type: "kmax_seq_score" active_type: "" inputs { @@ -29,7 +29,7 @@ layers { input_layer_name: "sentence_states" } inputs { - input_layer_name: "__kmax_sequence_score_layer_0__" + input_layer_name: "__kmax_seq_score_layer_0__" } } layers { @@ -44,7 +44,7 @@ layers { bias_parameter_name: "___fc_layer_0__.wbias" } layers { - name: "__kmax_sequence_score_layer_1__" + name: "__kmax_seq_score_layer_1__" type: "kmax_seq_score" active_type: "" inputs { @@ -61,7 +61,7 @@ layers { input_layer_name: "__sub_nested_seq_layer_0__" } inputs { - input_layer_name: "__kmax_sequence_score_layer_1__" + input_layer_name: "__kmax_seq_score_layer_1__" } select_first: true } @@ -77,7 +77,7 @@ layers { bias_parameter_name: "___fc_layer_1__.wbias" } layers { - name: "__kmax_sequence_score_layer_2__" + name: "__kmax_seq_score_layer_2__" type: "kmax_seq_score" active_type: "" inputs { @@ -111,7 +111,7 @@ layers { input_layer_name: "sentence_scores" } inputs { - input_layer_name: "__kmax_sequence_score_layer_0__" + input_layer_name: "__kmax_seq_score_layer_0__" } inputs { input_layer_name: "sentences_ids" @@ -120,7 +120,7 @@ layers { input_layer_name: "__fc_layer_0__" } inputs { - input_layer_name: "__kmax_sequence_score_layer_1__" + input_layer_name: "__kmax_seq_score_layer_1__" } inputs { input_layer_name: "start_ids" @@ -129,7 +129,7 @@ layers { input_layer_name: "__fc_layer_1__" } inputs { - input_layer_name: "__kmax_sequence_score_layer_2__" + input_layer_name: "__kmax_seq_score_layer_2__" } inputs { input_layer_name: "end_ids" @@ -185,13 +185,13 @@ sub_models { name: "root" layer_names: "sentence_states" layer_names: "sentence_scores" - layer_names: "__kmax_sequence_score_layer_0__" + layer_names: "__kmax_seq_score_layer_0__" layer_names: "__sub_nested_seq_layer_0__" layer_names: "__fc_layer_0__" - layer_names: "__kmax_sequence_score_layer_1__" + layer_names: "__kmax_seq_score_layer_1__" layer_names: "__seq_slice_layer_0__" layer_names: "__fc_layer_1__" - layer_names: "__kmax_sequence_score_layer_2__" + layer_names: "__kmax_seq_score_layer_2__" layer_names: "sentences_ids" layer_names: "start_ids" layer_names: "end_ids" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr index 3d32220bfbf5f4c67f88303cb9773ecfa484da4b..f93d368c8687573db80106b9cc4defa56a881e46 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr @@ -17,7 +17,7 @@ layers { bias_parameter_name: "___fc_layer_0__.wbias" } layers { - name: "__kmax_sequence_score_layer_0__" + name: "__kmax_seq_score_layer_0__" type: "kmax_seq_score" active_type: "" inputs { @@ -46,14 +46,14 @@ parameters { initial_smart: false } input_layer_names: "input_seq" -output_layer_names: "__kmax_sequence_score_layer_0__" +output_layer_names: "__kmax_seq_score_layer_0__" sub_models { name: "root" layer_names: "input_seq" layer_names: "__fc_layer_0__" - layer_names: "__kmax_sequence_score_layer_0__" + layer_names: "__kmax_seq_score_layer_0__" input_layer_names: "input_seq" - output_layer_names: "__kmax_sequence_score_layer_0__" + output_layer_names: "__kmax_seq_score_layer_0__" is_recurrent_layer_group: false } diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py b/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py index 240e703dc904e718c2c1ddaf2b6d7dccb4dabf41..4a5bdf1181dc4538418a8b89b41a1ff713e423c8 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py @@ -7,14 +7,14 @@ beam_size = 5 # the first beam expansion. sentence_states = data_layer(name="sentence_states", size=32) sentence_scores = data_layer(name="sentence_scores", size=1) -topk_sentence_ids = kmax_sequence_score_layer( +topk_sentence_ids = kmax_seq_score_layer( input=sentence_scores, beam_size=beam_size) # the second beam expansion. topk_sen = sub_nested_seq_layer( input=sentence_states, selected_indices=topk_sentence_ids) start_pos_scores = fc_layer(input=topk_sen, size=1, act=LinearActivation()) -topk_start_pos_ids = kmax_sequence_score_layer( +topk_start_pos_ids = kmax_seq_score_layer( input=sentence_scores, beam_size=beam_size) # the final beam expansion. @@ -22,7 +22,7 @@ topk_start_spans = seq_slice_layer( input=topk_sen, starts=topk_start_pos_ids, ends=None) end_pos_scores = fc_layer( input=topk_start_spans, size=1, act=LinearActivation()) -topk_end_pos_ids = kmax_sequence_score_layer( +topk_end_pos_ids = kmax_seq_score_layer( input=end_pos_scores, beam_size=beam_size) # define the cost diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py index 48d0cd55da2481743de66ea95190c0856e7ddc39..171da10f75dae03eed7e110d0efd07d6a18e1ecf 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py @@ -4,6 +4,6 @@ from paddle.trainer_config_helpers import * data = data_layer(name="input_seq", size=128) scores = fc_layer(input=data, size=1, act=ExpActivation()) -kmax_seq_id = kmax_sequence_score_layer(input=scores, beam_size=5) +kmax_seq_id = kmax_seq_score_layer(input=scores, beam_size=5) outputs(kmax_seq_id) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_seq_select_layers.py b/python/paddle/trainer_config_helpers/tests/configs/test_sub_nested_seq_select_layer.py similarity index 100% rename from python/paddle/trainer_config_helpers/tests/configs/test_seq_select_layers.py rename to python/paddle/trainer_config_helpers/tests/configs/test_sub_nested_seq_select_layer.py diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index 5bea980611904b37a4a5d4e2cbbee13503a61ff0..1c8d8f4b2f626bea5d9a44d01de7c2c9c45dc2fb 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -78,6 +78,8 @@ def init(**kwargs): if 'use_gpu' in kwargs: cp.g_command_config_args['use_gpu'] = kwargs['use_gpu'] + if 'use_mkldnn' in kwargs: + cp.g_command_config_args['use_mkldnn'] = kwargs['use_mkldnn'] assert 'parallel_nn' not in kwargs, ("currently 'parallel_nn' is not " "supported in v2 APIs.")