提交 8f4ca2d1 编写于 作者: C caoying03

add implementations.

上级 a037b099
develop 1.8.5 2.0.1-rocm-post 2.4.1 Ligoml-patch-1 OliverLPH-patch-1 OliverLPH-patch-2 PaddlePM-patch-1 PaddlePM-patch-2 ZHUI-patch-1 add_default_att add_kylinv10 add_model_benchmark_ci add_some_yaml_config addfile all_new_design_exec ascendrc ascendrelease bugfix-eval-frame-leakgae cherry-pick-fix-customOP-random-fail cherry_undefined_var compile_windows cp_2.4_fix_numpy delete_2.0.1-rocm-post delete_add_default_att delete_all_new_design_exec delete_ascendrc delete_compile_windows delete_delete_addfile delete_disable_iterable_dataset_unittest delete_fix_dataloader_memory_leak delete_fix_imperative_dygraph_error delete_fix_retry_ci delete_fix_undefined_var delete_improve_sccache delete_incubate/lite delete_paddle_tiny_install delete_paralleltest delete_prv-disable-more-cache delete_revert-31068-fix_conv3d_windows delete_revert-31562-mean delete_revert-33630-bug-fix delete_revert-34159-add_npu_bce_logical_dev delete_revert-34910-spinlocks_for_allocator delete_revert-35069-revert-34910-spinlocks_for_allocator delete_revert-36057-dev/read_flags_in_ut dingjiaweiww-patch-1 disable_iterable_dataset_unittest dy2static enable_eager_model_test final_state_gen_python_c final_state_intermediate fix-numpy-issue fix-run-program-grad-node-mem fix_check fix_concat_slice fix_custom_device_copy_sync fix_dataloader_memory_leak fix_dlpack_for fix_imperative_dygraph_error fix_newexe_gc fix_npu_ci fix_op_flops fix_retry_ci fix_rnn_docs fix_tensor_type fix_undefined_var fix_var_stop_gradient_error fixiscan fixiscan1 fixiscan2 fixiscan3 github/fork/123malin/netifaces github/fork/123malin/tdm_abacus github/fork/AshburnLee/dev_unique github/fork/ForFishes/fix_memory_matmul github/fork/ForFishes/rm_fluid github/fork/LielinJiang/move-2.0-api github/fork/LielinJiang/visual-dl-cb github/fork/LiuChiachi/add-transformer-generate-square-subsequent-mask-api github/fork/LiuChiachi/fix-example-code-for-hapi-Model github/fork/LiuChiachi/remove-input-requirment-in-dygraph-Model github/fork/MrChengmo/fix_ps_profiler github/fork/MrChengmo/update_ps_heter github/fork/PWhiddy/patch-1 github/fork/Shixiaowei02/dev/save_load_upgrade github/fork/TCChenlong/fix_hapi github/fork/TCChenlong/fix_inden github/fork/Thunderbrook/xpu_slice github/fork/XieYunshen/disable_ut_test_parallel_executor_fetch_isolated_var github/fork/XieYunshen/disable_ut_test_parallel_executor_fetch_isolated_var_2 github/fork/XieYunshen/disable_ut_test_parallel_executor_fetch_isolated_var_3 github/fork/XieYunshen/timeout_20S_ut github/fork/ZeyuChen/remove-nltk github/fork/arlesniak/arlesniak/selective__mkldnn_flags github/fork/baiyfbupt/code_doc_mig github/fork/chalsliu/set_timeout github/fork/chen-zhiyu/develop github/fork/chenwhql/ci/try_to_find_test_buffer_shared_memory_reuse_pass_error github/fork/chenwhql/dygraph/remove_scale_loss_and_apply_collective_grads github/fork/chenwhql/saveload/add_get_inference_program github/fork/chenwhql/saveload/remove_save_load_config github/fork/cryoco/pass-compatibility-trt github/fork/danleifeng/isempty_api2.0 github/fork/frankwhzhang/api_transfer github/fork/hbwx24/error_msg/cuda_kernel_error_msg github/fork/heavengate/cherry_yolo_box github/fork/heavengate/update_yolo_box github/fork/iclementine/rnn_fix github/fork/iducn/testestse github/fork/jczaja/prv-25537-fix github/fork/jeff41404/release/1.8 github/fork/jiweibo/api_2.0 github/fork/jiweibo/fix_lite_resnet50_test github/fork/juncaipeng/fix_doc_1 github/fork/lfchener/sample_code github/fork/littletomatodonkey/fix_reg_doc github/fork/liym27/dy2stat_update_assign_to_rc20 github/fork/luotao1/profiler_ut github/fork/mapingshuo/add_wait github/fork/mapingshuo/doc_2.0 github/fork/mapingshuo/zero-0.5 github/fork/miraiwk/dev github/fork/pangyoki/add-Categorical-class-branch github/fork/pangyoki/add-multinomial-op-branch github/fork/pangyoki/fix-test_distritbution-CI github/fork/qjing666/doublegrad github/fork/qjing666/fix_hdfs_download github/fork/sandyhouse/add_gather_etc github/fork/sandyhouse/add_send_recv_alltoall_etc github/fork/sandyhouse/pipeline_exe_run github/fork/seiriosPlus/feature/large_scale_kv_save_delta github/fork/seiriosPlus/fix/paddle_errors_fix github/fork/seiriosPlus/fix/paddle_op_errors github/fork/shangzhizhou/fix_test_activation_op_random_bug github/fork/smallv0221/yxp0924 github/fork/smallv0221/yxp0925 github/fork/swtkiwi/del-matplotlib github/fork/tianshuo78520a/kunlun_test github/fork/tianshuo78520a/update_dockerfile github/fork/wanghaoshuang/bert_fuse github/fork/wanghaoshuang/label_smooth github/fork/wanghuancoder/develop_CUDASynchronize github/fork/wanghuancoder/develop_Layer_doc github/fork/wanghuancoder/develop_ParameterList_doc github/fork/wanghuancoder/develop_Sequential_doc github/fork/wanghuancoder/develop_bilinear_tensor_product github/fork/wanghuancoder/develop_coverage_build_sh github/fork/wanghuancoder/develop_in_dynamic_mode_doc github/fork/wanghuancoder/develop_unique_name_doc github/fork/wangxicoding/fleet_meta_combine github/fork/wawltor/error_message_fix_5 github/fork/willthefrog/remove_l2_norm github/fork/windstamp/momentum_op github/fork/windstamp/mv_op_5 github/fork/windstamp/normal_api github/fork/wojtuss/wojtuss/fusion_gru_quantization github/fork/wojtuss/wojtuss/quantization-with-shift github/fork/wzzju/fix_err_info github/fork/wzzju/pure_fp16 github/fork/xiemoyuan/op_error_message github/fork/xiemoyuan/optimize_error_message github/fork/yaoxuefeng6/fix_doc github/fork/yaoxuefeng6/mod_dataset_v2 github/fork/yongqiangma/lod github/fork/ysh329/fix-clip-by-norm-error github/fork/ysh329/fix-error-clip-by-value github/fork/yukavio/error_info github/fork/zhangting2020/conv_filter_grad github/fork/zhangting2020/is_compile_with_cuda github/fork/zhangting2020/place_doc github/fork/zhangting2020/program github/fork/zhhsplendid/fix_any github/fork/zhhsplendid/refine_api2 github/fork/zhhsplendid/refine_api2_test github/fork/zhhsplendid/refine_api_test_ptb_lm github/fork/zhhsplendid/refine_api_test_resnet github/fork/zhhsplendid/refine_api_test_simnet github/fork/zhiqiu/dev/refine_initializer github/fork/zhiqiu/dev/remove_inplace_argument github/fork/zlsh80826/nvinfer_plugin_var_len_cuda11 hack_event improve_sccache incuabte/new_frl incubate/frl_train_eval incubate/infrt incubate/lite incubate/new_frl incubate/new_frl_rc incubate/stride inplace_addto layer_norm make_flag_adding_easier master matmul_double_grad move_embedding_to_phi move_histogram_to_pten move_sgd_to_phi move_slice_to_pten move_temporal_shift_to_phi move_yolo_box_to_phi npu_fix_alloc numel operator_opt paddle_tiny_install paralleltest pass-compile-eval-frame preln_ernie prv-disable-more-cache prv-md-even-more prv-onednn-2.5 prv-reshape-mkldnn-ut2 pten_tensor_refactor release-deleted/2.5 release-rc/2.5 release/0.11.0 release/0.12.0 release/0.13.0 release/0.14.0 release/0.15.0 release/1.0.0 release/1.1 release/1.2 release/1.3 release/1.4 release/1.5 release/1.6 release/1.7 release/1.8 release/2.0 release/2.0-alpha release/2.0-beta release/2.0-rc release/2.0-rc1 release/2.1 release/2.2 release/2.3 release/2.3-fc-ernie-fix release/2.4 release/2.5 release/lite-0.1 release/llm_2.5 revert-24981-add_device_attr_for_regulization revert-26856-strategy_example2 revert-27520-disable_pr revert-31068-fix_conv3d_windows revert-31562-mean revert-32290-develop-hardlabel revert-33037-forci revert-33475-fix_cifar_label_dimension revert-33630-bug-fix revert-34159-add_npu_bce_logical_dev revert-34406-add_copy_from_tensor revert-34910-spinlocks_for_allocator revert-35069-revert-34910-spinlocks_for_allocator revert-36057-dev/read_flags_in_ut revert-36201-refine_fast_threaded_ssa_graph_executor revert-36985-add_license revert-37318-refactor_dygraph_to_eager revert-37926-eager_coreops_500 revert-37956-revert-37727-pylayer_support_tuple revert-38100-mingdong revert-38301-allocation_rearrange_pr revert-38703-numpy_bf16_package_reupload revert-38732-remove_useless_header_in_elementwise_mul_grad revert-38959-Reduce_Grad revert-39143-adjust_empty revert-39227-move_trace_op_to_pten revert-39268-dev/remove_concat_fluid_kernel revert-40170-support_partial_grad revert-41056-revert-40727-move_some_activaion_to_phi revert-41065-revert-40993-mv_ele_floordiv_pow revert-41068-revert-40790-phi_new revert-41944-smaller_inference_api_test revert-42149-do-not-reset-default-stream-for-stream-safe-cuda-allocator revert-43155-fix_ut_tempfile revert-43882-revert-41944-smaller_inference_api_test revert-45808-phi/simplify_size_op revert-46827-deform_comment revert-47325-remove_cudnn_hardcode revert-47645-add_npu_storage_dims revert-48815-set_free_when_no_cache_hit_default_value_true revert-49499-test_ninja_on_ci revert-49654-prim_api_gen revert-49673-modify_get_single_cov revert-49763-fix_static_composite_gen revert-50158-fix_found_inf_bug_for_custom_optimizer revert-50188-refine_optimizer_create_accumulators revert-50335-fix_optminizer_set_auxiliary_var_bug revert-51676-flag_delete revert-51850-fix_softmaxce_dev revert-52175-dev_peak_memory revert-52186-deve revert-52523-test_py38 revert-52912-develop revert-53248-set_cmake_policy revert-54029-fix_windows_compile_bug revert-54068-support_translating_op_attribute revert-54214-modify_cmake_dependencies revert-54370-offline_pslib revert-54391-fix_cmake_md5error revert-54411-fix_cpp17_compile revert-54466-offline_pslib revert-54480-cmake-rocksdb revert-55568-fix_BF16_bug1 revert-56328-new_ir_support_vector_type_place_transfer revert-56366-fix_openssl_bug revert-56545-revert-56366-fix_openssl_bug revert-56620-fix_new_ir_ocr_bug revert-56925-check_inputs_grad_semantic revert-57005-refine_stride_flag rocm_dev_0217 sd_conv_linear_autocast semi-auto/rule-base support-0D-sort support_weight_transpose test_benchmark_ci test_feature_precision_test_c test_for_Filtetfiles test_model_benchmark test_model_benchmark_ci zhiqiu-patch-1 v2.5.1 v2.5.0 v2.5.0-rc1 v2.5.0-rc0 v2.4.2 v2.4.1 v2.4.0 v2.4.0-rc0 v2.3.2 v2.3.1 v2.3.0 v2.3.0-rc0 v2.2.2 v2.2.1 v2.2.0 v2.2.0-rc0 v2.2.0-bak0 v2.1.3 v2.1.2 v2.1.1 v2.1.0 v2.1.0-rc0 v2.0.2 v2.0.1 v2.0.0 v2.0.0-rc1 v2.0.0-rc0 v2.0.0-beta0 v2.0.0-alpha0 v1.8.5 v1.8.4 v1.8.3 v1.8.2 v1.8.1 v1.8.0 v1.7.2 v1.7.1 v1.7.0 v1.6.3 v1.6.2 v1.6.1 v1.6.0 v1.6.0-rc0 v1.5.2 v1.5.1 v1.5.0 v1.4.1 v1.4.0 v1.3.2 v1.3.1 v1.3.0 v1.2.1 v1.2.0 v1.1.0 v1.0.2 v1.0.1 v1.0.0 v1.0.0-rc0 v0.15.0 v0.15.0-rc0 v0.14.0 v0.13.0 v0.12.0 v0.11.1a2 v0.11.1a1 v0.11.0 lite-v0.1
7 合并请求!11636[IMPORTANT] MKLDNN layout: Support for sum operator,!8482Release/0.11.0,!8190Release/0.11.0,!8189Release/0.11.0,!6633给线性回归的get-started代码加上了预测的示例~~,!4615Feature/tensor array add python binding,!3639add a cross_entropy_over_beam layer.
......@@ -16,6 +16,168 @@ limitations under the License. */
namespace paddle {
void CostForOneSequence::calValidExpandStep() {
validExpansionCount_ = 0;
goldAsExtraPath_ = true;
for (size_t i = 0; i < beams_->expansionCount; ++i) {
real gold = static_cast<real>(beams_->gold[i]);
if (i) {
real* start = beams_->candidateIds[i - 1]->getData();
goldRowIds_[i] = std::count_if(
start,
start + goldRowIds_[i - 1] * beamSize_ + goldColIds_[i - 1],
[](const real& val) { return val != -1.; });
} else
goldRowIds_[i] = 0;
real* start =
beams_->candidateIds[i]->getData() + goldRowIds_[i] * beamSize_;
real* findEnd = std::find(start, start + beamSize_, gold);
validExpansionCount_++;
if (start + beamSize_ == findEnd) return;
goldColIds_[i] = findEnd - start;
}
if (goldColIds_[beams_->expansionCount - 1] != -1) goldAsExtraPath_ = false;
}
size_t CostForOneSequence::initLastExpansion() {
int beamId = validExpansionCount_ - 1;
const MatrixPtr candidates = beams_->candidateIds[beamId];
size_t height = candidates->getHeight();
/* initialization the last expansion. */
size_t pathCount = std::count_if(candidates->getData(),
candidates->getData() + height * beamSize_,
[](const real& val) { return val != -1; });
/*
* if the gold sequence falls off the beam during search,
* add the gold sequence as the last path into all expanded paths.
*/
if (goldAsExtraPath_) goldIdsInFinalExpansion_ = pathCount++;
pathRowIdsInEachBeam_.clear();
pathRowIdsInEachBeam_.resize(validExpansionCount_,
std::vector<int>(pathCount, 0));
parentIdsInBeam_.clear();
parentIdsInBeam_.resize(pathCount, 0);
if (goldAsExtraPath_) {
/* add gold sequence into the total expansion. */
pathRowIdsInEachBeam_[beamId].back() =
beams_->gold[beamId] +
getSeqStartPos(beamId, goldRowIds_[validExpansionCount_ - 1]);
parentIdsInBeam_.back() = goldRowIds_[validExpansionCount_ - 1];
} else {
size_t goldOffset = goldRowIds_[beamId] * beamSize_ + goldColIds_[beamId];
goldIdsInFinalExpansion_ =
std::count_if(candidates->getData(),
candidates->getData() + goldOffset,
[](const real& val) { return val != -1.; });
}
/*
* TODO(caoying): fix this, store the indices of selected candidate
* paths into Argument.ids
*/
real* ids = candidates->getData();
size_t curIdx = 0;
for (size_t i = 0; i < height; ++i) {
int basePos = getSeqStartPos(beamId, i);
for (size_t j = 0; j < beamSize_; ++j) {
int id = ids[i * beamSize_ + j];
if (id == -1) continue;
pathRowIdsInEachBeam_[beamId][curIdx] = id + basePos;
parentIdsInBeam_[curIdx++] = i;
}
}
return pathCount;
}
void CostForOneSequence::constructTotalExpansion() {
/*
* construct the entire expanded beam by begining with the last search
* in which gold falls off the beam.
*/
size_t totalPathCount = initLastExpansion();
for (int beamId = validExpansionCount_ - 2; beamId >= 0; --beamId) {
const MatrixPtr candidates = beams_->candidateIds[beamId];
real* ids = candidates->getData();
int lastParentIdInBeam = -1;
int basePos = -1;
for (size_t i = 0;
i < (goldAsExtraPath_ ? totalPathCount - 1 : totalPathCount);
++i) {
int id = ids[parentIdsInBeam_[i]];
int parentRowId = std::div(parentIdsInBeam_[i], beamSize_).quot;
if (parentIdsInBeam_[i] != lastParentIdInBeam)
basePos = getSeqStartPos(beamId, parentRowId);
pathRowIdsInEachBeam_[beamId][i] = id + basePos;
lastParentIdInBeam = parentIdsInBeam_[i];
parentIdsInBeam_[i] = parentRowId;
if (goldAsExtraPath_)
pathRowIdsInEachBeam_[beamId][totalPathCount - 1] =
beams_->gold[beamId] + getSeqStartPos(beamId, goldRowIds_[beamId]);
}
}
}
real CostForOneSequence::globallyNormalizedScore() {
expandedPathScores_.resize(validExpansionCount_);
Matrix::resizeOrCreate(
softmaxOut_, 1, pathRowIdsInEachBeam_[0].size(), false, false);
softmaxOut_->zero();
MatrixPtr tmp = Matrix::create(
softmaxOut_->getData(), softmaxOut_->getWidth(), 1, false, false);
for (size_t i = 0; i < validExpansionCount_; ++i) {
Matrix::resizeOrCreate(expandedPathScores_[i],
pathRowIdsInEachBeam_[i].size(),
1,
false,
false);
IVectorPtr rowIds = IVector::create(pathRowIdsInEachBeam_[i].data(),
pathRowIdsInEachBeam_[i].size(),
false);
expandedPathScores_[i]->selectRows(*(beams_->scores[i]), *rowIds);
tmp->add(*expandedPathScores_[i]);
}
softmaxOut_->softmax(*softmaxOut_);
return -std::log(softmaxOut_->getData()[goldIdsInFinalExpansion_]);
}
real CostForOneSequence::forward() {
calValidExpandStep();
constructTotalExpansion();
return globallyNormalizedScore();
}
void CostForOneSequence::backward() {
softmaxOut_->getData()[goldIdsInFinalExpansion_] -= 1.;
MatrixPtr tmp = Matrix::create(
softmaxOut_->getData(), softmaxOut_->getWidth(), 1, false, false);
for (size_t i = 0; i < validExpansionCount_; ++i) {
IVectorPtr rowIds = IVector::create(pathRowIdsInEachBeam_[i].data(),
pathRowIdsInEachBeam_[i].size(),
false);
/*
beams_->scoreGrad[i] has been intialized outside this class, this
class only keeps a pointer pointing to the original input gradients,
so here does not need to allocate or initalize the memory.
*/
tmp->addToRows(*beams_->scoreGrad[i], *rowIds);
}
}
REGISTER_LAYER(cross_entropy_over_beam, CrossEntropyOverBeam);
bool CrossEntropyOverBeam::init(const LayerMap& layerMap,
......@@ -24,13 +186,189 @@ bool CrossEntropyOverBeam::init(const LayerMap& layerMap,
Layer::init(layerMap, parameterMap);
CHECK_EQ(0U, inputLayers_.size() % 3) << "Error input number.";
setNeedSequenceInfo(false);
beamExpanCount_ = inputLayers_.size() / 3;
candidateScores_.resize(beamExpanCount_);
candidateScoreGrad_.resize(beamExpanCount_);
candidateInBeam_.resize(beamExpanCount_);
goldSequence_.resize(beamExpanCount_);
gradToInputs_.resize(beamExpanCount_);
setNeedSequenceInfo(false);
return true;
}
void CrossEntropyOverBeam::forward(PassType passType) {}
void CrossEntropyOverBeam::checkInputs() {
batchSize_ = 0;
for (size_t i = 0; i < beamExpanCount_; ++i) {
const Argument& scores = getInput(i * 3);
const Argument& selCandidates = getInput(i * 3 + 1);
const Argument& goldSeq = getInput(i * 3 + 2);
if (i) {
CHECK(scores.hasSubseq()) << "Beam expansion expect the first one, "
"should be a nested sequence";
CHECK_EQ(getInputValue(i * 3 + 1)->getWidth(), beamSize_);
CHECK_EQ(scores.getNumSequences(), batchSize_);
CHECK_EQ(scores.getNumSubSequences(), selCandidates.getBatchSize());
} else {
CHECK(scores.hasSeq()) << "The first beam expansion should be a sequence";
batchSize_ = scores.getNumSequences();
beamSize_ = getInputValue(i * 3 + 1)->getWidth();
CHECK_EQ(batchSize_, selCandidates.getBatchSize());
}
CHECK_EQ(1U, scores.value->getWidth());
CHECK_EQ(batchSize_, goldSeq.getBatchSize());
}
}
void CrossEntropyOverBeam::copyInputsToCpu() {
auto copyValue = [](const MatrixPtr& src, MatrixPtr& trg) {
if (dynamic_cast<GpuMatrix*>(src.get())) {
Matrix::resizeOrCreate(
trg, src->getHeight(), src->getWidth(), false, false);
trg->copyFrom(*src);
} else {
trg = std::move(src);
}
};
auto copyIds = [](const IVectorPtr& src, IVectorPtr& trg) {
if (dynamic_cast<GpuIVector*>(src.get())) {
IVector::resizeOrCreate(trg, src->getSize(), false);
trg->copyFrom(*src);
} else {
trg = std::move(src);
}
};
beamSplitPos_.clear();
beamSplitPos_.resize(batchSize_, std::vector<int>(beamExpanCount_, 0));
for (size_t i = 0; i < beamExpanCount_; ++i) {
copyValue(getInputValue(i * 3), candidateScores_[i]);
copyValue(getInputValue(i * 3 + 1), candidateInBeam_[i]);
copyIds(getInput(i * 3 + 2).ids, goldSequence_[i]);
if (i) {
ICpuGpuVectorPtr seqInfo = getInput(i * 3).sequenceStartPositions;
const int* seqStarts = seqInfo->getMutableData(false);
ICpuGpuVectorPtr subSeqInfo = getInput(i * 3).subSequenceStartPositions;
const int* subSeqStarts = subSeqInfo->getMutableData(false);
size_t seqId = 1;
for (size_t subSeqId = 0; subSeqId < subSeqInfo->getSize() - 1;
++subSeqId) {
CHECK_LT(seqId, seqInfo->getSize());
if (subSeqStarts[subSeqId] == seqStarts[seqId]) {
beamSplitPos_[seqId][i] = beamSplitPos_[seqId - 1][i];
seqId++;
}
beamSplitPos_[seqId - 1][i]++;
}
} else {
for (size_t j = 0; j < batchSize_; ++j) beamSplitPos_[j][i] = j + 1;
}
}
}
void CrossEntropyOverBeam::splitBatchBeams() {
beamCosts_.resize(batchSize_);
beamPerSeq_.resize(batchSize_, beamExpanCount_);
for (size_t i = 0; i < beamExpanCount_; ++i) {
int* seqStarts =
getInput(i * 3).sequenceStartPositions->getMutableData(false);
int* subSeqStarts = nullptr;
int maxLen = 0;
if (i) {
subSeqStarts =
getInput(i * 3).subSequenceStartPositions->getMutableData(false);
maxLen = getInput(i * 3).subSequenceStartPositions->getSize() - 1;
} else
maxLen = getInput(i).sequenceStartPositions->getSize() - 1;
for (size_t j = 0; j < batchSize_; ++j) {
beamPerSeq_[j].scores[i] =
Matrix::create(candidateScores_[i]->getData() + seqStarts[j],
seqStarts[j + 1] - seqStarts[j],
1,
false,
false);
beamPerSeq_[j].scoreGrad[i] =
Matrix::create(candidateScoreGrad_[i]->getData() + seqStarts[j],
seqStarts[j + 1] - seqStarts[j],
1,
false,
false);
int offset = j ? beamSplitPos_[j - 1][i] : 0;
int height = beamSplitPos_[j][i] - (j ? beamSplitPos_[j - 1][i] : 0);
CHECK_GE(maxLen, offset + height);
beamPerSeq_[j].seqInfo[i] = IVector::create(
(i ? subSeqStarts : seqStarts) + offset, height + 1, false);
void CrossEntropyOverBeam::backward(const UpdateCallback& callback) {}
beamPerSeq_[j].candidateIds[i] =
Matrix::create(candidateInBeam_[i]->getData() + offset * beamSize_,
height,
beamSize_,
false,
false);
beamPerSeq_[j].gold[i] = goldSequence_[i]->getData()[j];
}
}
}
void CrossEntropyOverBeam::resizeOutput() {
Matrix::resizeOrCreate(output_.value, batchSize_, 1, false, false);
output_.value->zero();
for (size_t i = 0; i < beamExpanCount_; ++i) {
MatrixPtr inGrad = getInputGrad(i * 3);
if (dynamic_cast<GpuMatrix*>(inGrad.get())) {
Matrix::resizeOrCreate(candidateScoreGrad_[i],
inGrad->getHeight(),
inGrad->getWidth(),
false,
false);
} else
candidateScoreGrad_[i] = std::move(inGrad);
candidateScoreGrad_[i]->zero();
}
}
void CrossEntropyOverBeam::copyGradToGpu(size_t copyCount) {
for (size_t i = 0; i < beamExpanCount_; ++i) {
if (dynamic_cast<GpuMatrix*>(getInputGrad(i * 3).get()))
getInputGrad(i * 3)->copyFrom(*candidateScoreGrad_[i]);
if (i == copyCount - 1) break;
}
}
void CrossEntropyOverBeam::forward(PassType passType) {
Layer::forward(passType);
checkInputs();
copyInputsToCpu();
resizeOutput();
splitBatchBeams();
MatrixPtr outputValue = getOutputValue();
for (size_t i = 0; i < batchSize_; ++i) {
beamCosts_[i].setData(
std::move(std::make_shared<BeamExpansion>(beamPerSeq_[i])), beamSize_);
outputValue->getData()[i] = beamCosts_[i].forward();
}
}
void CrossEntropyOverBeam::backward(const UpdateCallback& callback) {
for (size_t i = 0; i < batchSize_; ++i) {
beamCosts_[i].backward();
copyGradToGpu(beamCosts_[i].getValidExpansionCount());
}
}
} // namespace paddle
......@@ -19,6 +19,79 @@ limitations under the License. */
namespace paddle {
struct BeamExpansion {
// store the entire beam expansion for a single sequence
std::vector<MatrixPtr> scores;
std::vector<IVectorPtr> seqInfo;
std::vector<MatrixPtr> candidateIds;
std::vector<int> gold;
std::vector<MatrixPtr> scoreGrad;
size_t expansionCount;
BeamExpansion(int n) {
expansionCount = n;
scores.resize(expansionCount);
seqInfo.resize(expansionCount);
candidateIds.resize(expansionCount);
scoreGrad.resize(expansionCount);
gold.resize(expansionCount);
};
};
typedef std::shared_ptr<BeamExpansion> BeamExpansionPtr;
class CostForOneSequence {
public:
CostForOneSequence()
: beamSize_(0), validExpansionCount_(0), goldAsExtraPath_(false) {}
void setData(const BeamExpansionPtr bPtr, size_t beamSize) {
beams_ = bPtr;
beamSize_ = beamSize;
expandedPathScores_.clear();
expandedPathScores_.resize(beams_->expansionCount);
goldRowIds_.clear();
goldRowIds_.resize(beams_->expansionCount, 0);
goldColIds_.clear();
goldColIds_.resize(beams_->expansionCount, -1);
}
size_t getValidExpansionCount() { return validExpansionCount_; }
real forward();
void backward();
private:
void calValidExpandStep();
void constructTotalExpansion();
size_t initLastExpansion();
real globallyNormalizedScore();
int getSeqStartPos(size_t beamId, size_t rowId) {
CHECK_GT(beams_->seqInfo[beamId]->getSize() - 1, rowId);
int* starts = beams_->seqInfo[beamId]->getData();
return starts[rowId] - starts[0];
};
size_t beamSize_;
size_t validExpansionCount_;
bool goldAsExtraPath_;
std::vector<int> goldRowIds_;
std::vector<int> goldColIds_;
BeamExpansionPtr beams_;
std::vector<std::vector<int>> pathRowIdsInEachBeam_;
std::vector<int> parentIdsInBeam_;
size_t goldIdsInFinalExpansion_;
std::vector<MatrixPtr> expandedPathScores_;
MatrixPtr softmaxOut_;
};
class CrossEntropyOverBeam : public Layer {
public:
explicit CrossEntropyOverBeam(const LayerConfig& config) : Layer(config) {}
......@@ -26,6 +99,31 @@ public:
const ParameterMap& parameterMap) override;
void forward(PassType passType) override;
void backward(const UpdateCallback& callback) override;
private:
void checkInputs();
void copyInputsToCpu();
void resizeOutput();
void copyGradToGpu(size_t copyCount);
void splitBatchBeams();
size_t beamExpanCount_;
size_t batchSize_;
size_t beamSize_;
// Currently, this layer only works on CPU, if its inputs is on GPU,
// copy them to CPU memory.
std::vector<MatrixPtr> candidateScores_;
std::vector<MatrixPtr> candidateScoreGrad_;
std::vector<MatrixPtr> candidateInBeam_;
std::vector<MatrixPtr> gradToInputs_;
std::vector<IVectorPtr> goldSequence_;
std::vector<std::vector<int>> beamSplitPos_;
// split entire bath of beams into beam per sequnence.
std::vector<BeamExpansion> beamPerSeq_;
// beamCosts_ is used to propagate error in one sequence.
std::vector<CostForOneSequence> beamCosts_;
};
} // namespace paddle
......@@ -28,9 +28,17 @@ using namespace paddle; // NOLINT
DECLARE_int32(gpu_id);
DECLARE_bool(thread_local_rand_use_global_seed);
const size_t MAX_SEQ_NUM = 10;
const size_t MAX_SEQ_LEN = 27;
const size_t MAX_BEAM_SIZE = 10;
// const size_t MAX_SEQ_NUM = 5;
// const size_t MAX_SEQ_LEN = 10;
// const size_t MAX_BEAM_SIZE = 3;
const size_t MAX_SEQ_NUM = 23;
const size_t MAX_SEQ_LEN = 50;
const size_t MAX_BEAM_SIZE = 27;
// const size_t SEED = 1503391792;
// const size_t SEED = 1;
const size_t SEED = (size_t)(time(NULL));
struct SingleBeamExpansion {
vector<int> seqStartPos;
......@@ -43,11 +51,30 @@ struct SingleBeamExpansion {
vector<int> groundTruth;
vector<size_t> inBeam;
vector<int> rowIdxInBeam;
vector<int> colIdxInBeam;
void resetGroundTruth(size_t n) {
groundTruth.clear();
groundTruth.resize(n, -1);
inBeam.clear();
inBeam.resize(n, 0);
rowIdxInBeam.clear();
rowIdxInBeam.resize(n, -1);
colIdxInBeam.clear();
colIdxInBeam.resize(n, -1);
}
};
inline float randFloat() {
return static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
}
void genRand(real* numbers, size_t n) {
default_random_engine generator;
uniform_real_distribution<double> distribution(0.0, 1.0);
uniform_real_distribution<real> distribution(0.0, 1.0);
for (size_t i = 0; i < n; ++i) numbers[i] = distribution(generator);
}
......@@ -72,8 +99,7 @@ void genCandidateScores(bool hasSubseq,
vector<int>& subSeqStartPos = curBeam.subSeqStartPos;
subSeqStartPos.resize(1, 0);
srand((size_t)(time(NULL)));
// srand(1);
srand(SEED);
if (prevBeam.selectedIndices.size()) {
if (prevBeam.subSeqStartPos.size() > 1) {
int seqIdx = 1;
......@@ -81,9 +107,8 @@ void genCandidateScores(bool hasSubseq,
for (size_t i = 1; i < prevBeam.subSeqStartPos.size(); ++i) {
for (size_t j = 0; j < beamSize; ++j) {
if (prevBeam.selectedIndices[(i - 1) * beamSize + j] == -1.) break;
for (size_t k = 0; k < beamSize; ++k)
subSeqStartPos.push_back(1 + (rand() % MAX_SEQ_LEN) +
subSeqStartPos.back());
subSeqStartPos.push_back(1 + (rand() % MAX_SEQ_LEN) +
subSeqStartPos.back());
}
if (prevBeam.seqStartPos[seqIdx] == prevBeam.subSeqStartPos[i]) {
seqStartPos.push_back(subSeqStartPos.back());
......@@ -91,7 +116,6 @@ void genCandidateScores(bool hasSubseq,
}
}
} else {
// samples in previous beam are sequences.
for (size_t i = 0; i <= prevBeam.selectedIndices.size(); ++i) {
if (i && i % beamSize == 0) {
seqStartPos.push_back(subSeqStartPos.back());
......@@ -141,27 +165,41 @@ void genSelectedIndices(size_t beamSize,
void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions,
size_t beamSize) {
size_t seqNum = beamExpansions[1].seqStartPos.size() - 1;
SingleBeamExpansion& beam = beamExpansions[1];
size_t seqNum = beam.seqStartPos.size() - 1;
for (size_t i = 2; i < beamExpansions.size(); ++i)
CHECK_EQ(seqNum, beamExpansions[i - 1].seqStartPos.size() - 1);
CHECK_EQ(seqNum, beamExpansions[i].seqStartPos.size() - 1);
// srand(1);
srand((size_t)(time(NULL)));
srand(SEED);
// initialize the first beam.
SingleBeamExpansion& beam = beamExpansions[1];
beam.groundTruth.resize(seqNum, 0);
beam.inBeam.resize(seqNum, 0);
beam.rowIdxInBeam.resize(seqNum, -1);
auto begPos = beam.selectedIndices.begin();
beam.resetGroundTruth(seqNum);
for (size_t i = 0; i < seqNum; ++i) {
int seqLen = beam.seqStartPos[i + 1] - beam.seqStartPos[i];
int label = rand() % seqLen;
auto endPos = begPos + beamSize;
beam.groundTruth[i] = label;
if (find(begPos, endPos, real(label)) != endPos) beam.inBeam[i] = 1;
begPos = endPos;
if (randFloat() > 0.5) {
// force the randomly generated label falls in the beam by chance 0.5.
// otherwise, when sequence length is relatively long and beam size is
// relatively small, the gold sequences falls off the beam at in
// the first search.
real* begPos = beam.selectedIndices.data() + i * beamSize;
beam.colIdxInBeam[i] =
rand() % count_if(begPos, begPos + beamSize, [](const real& val) {
return val != -1.;
});
beam.groundTruth[i] =
beam.selectedIndices[i * beamSize + beam.colIdxInBeam[i]];
beam.inBeam[i] = 1;
} else {
int label = rand() % (beam.seqStartPos[i + 1] - beam.seqStartPos[i]);
beam.groundTruth[i] = label;
real* begPos = beam.selectedIndices.data() + i * beamSize;
real* endPos = begPos + beamSize;
real* lblPos = find(begPos, endPos, real(label));
if (lblPos != endPos) {
beam.inBeam[i] = 1;
beam.colIdxInBeam[i] = lblPos - begPos;
}
}
beam.rowIdxInBeam[i] = i;
}
......@@ -169,22 +207,33 @@ void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions,
for (size_t i = 2; i < beamExpansions.size(); ++i) {
SingleBeamExpansion& curBeam = beamExpansions[i];
SingleBeamExpansion& prevBeam = beamExpansions[i - 1];
curBeam.groundTruth.resize(seqNum, 0);
curBeam.inBeam.resize(seqNum, 0);
curBeam.rowIdxInBeam.resize(seqNum, -1);
curBeam.resetGroundTruth(seqNum);
// iterate over each sequence
for (size_t j = 0; j < seqNum; ++j) {
if (prevBeam.inBeam[j]) {
// gold sequence falls in the beam in previous search.
auto begPos = prevBeam.selectedIndices.begin();
auto endPos = begPos + prevBeam.rowIdxInBeam[j] * beamSize;
size_t totalExpansion =
prevBeam.rowIdxInBeam[j] * beamSize - count(begPos, endPos, -1.);
curBeam.rowIdxInBeam[j] = totalExpansion + prevBeam.groundTruth[j];
if (!prevBeam.inBeam[j]) continue;
// gold sequence falls in the beam in previous search.
real* begPos = prevBeam.selectedIndices.data();
int offset =
prevBeam.rowIdxInBeam[j] * beamSize + prevBeam.colIdxInBeam[j];
curBeam.rowIdxInBeam[j] = count_if(
begPos, begPos + offset, [](const real& val) { return val != -1.; });
if (randFloat() > 0.5) {
// force the randomly generated label falls in the beam by chance 0.5.
// otherwise, when sequence length is relatively long and beam size is
// relatively small, the gold sequences falls off the beam at in
// the first search.
real* start =
curBeam.selectedIndices.data() + curBeam.rowIdxInBeam[j] * beamSize;
int n = rand() % count_if(start, start + beamSize, [](const real& val) {
return val != -1.;
});
curBeam.colIdxInBeam[j] = n;
curBeam.groundTruth[j] = *(start + n);
curBeam.inBeam[j] = 1;
} else {
CHECK_LE(curBeam.rowIdxInBeam[j] + 1,
curBeam.subSeqStartPos.size() - 1);
int start = curBeam.subSeqStartPos[curBeam.rowIdxInBeam[j]];
......@@ -193,16 +242,14 @@ void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions,
int label = rand() % (end - start);
curBeam.groundTruth[j] = label;
auto findBeg = curBeam.selectedIndices.begin() +
curBeam.rowIdxInBeam[j] * beamSize;
auto findEnd = findBeg + beamSize;
if (find(findBeg, findEnd, real(label)) != findEnd)
real* findBeg =
curBeam.selectedIndices.data() + curBeam.rowIdxInBeam[j] * beamSize;
real* lblPos =
find(findBeg, findBeg + beamSize, static_cast<real>(label));
if (lblPos != (findBeg + beamSize)) {
curBeam.inBeam[j] = 1;
} else {
// in previous search, gold sequence has fallen off the beam,
// the beam search stops, here use -1 as a dummy label.
// It will not used in calculation the cost.
beamExpansions[i].groundTruth[j] = -1;
curBeam.colIdxInBeam[j] = lblPos - findBeg;
}
}
}
}
......@@ -230,15 +277,12 @@ void genRandomBeamExpansion(size_t expansionCount,
genGroundTruth(beamExpansions, beamSize);
}
void testCrossEntropyOverBeam(bool useGpu) {
void testCrossEntropyOverBeam(bool useGpu,
size_t beamSize,
vector<SingleBeamExpansion>& beams) {
TestConfig config;
config.layerConfig.set_type("cross_entropy_over_beam");
const size_t expansionCount = 3;
const size_t beamSize = MAX_BEAM_SIZE;
vector<SingleBeamExpansion> beams;
genRandomBeamExpansion(expansionCount, beamSize, beams);
size_t seqNum = 0;
for (size_t i = 1; i < beams.size(); ++i) {
const SingleBeamExpansion& beam = beams[i];
......@@ -291,7 +335,17 @@ void testCrossEntropyOverBeam(bool useGpu) {
}
TEST(Layer, CrossEntropyOverBeam) {
for (bool useGpu : {false, true}) testCrossEntropyOverBeam(useGpu);
LOG(INFO) << "SEED = " << SEED;
const size_t beamSize = 1 + rand() % MAX_BEAM_SIZE;
LOG(INFO) << "beamSize = " << beamSize;
// TODO(caoying): test with more beam expansions.
const size_t expansionCount = 3;
vector<SingleBeamExpansion> beams;
genRandomBeamExpansion(expansionCount, beamSize, beams);
for (bool useGpu : {false, true})
testCrossEntropyOverBeam(useGpu, beamSize, beams);
}
int main(int argc, char** argv) {
......@@ -299,7 +353,7 @@ int main(int argc, char** argv) {
hl_start();
hl_init(FLAGS_gpu_id);
FLAGS_thread_local_rand_use_global_seed = true;
srand(1);
srand(SEED);
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
反馈
建议
客服 返回
顶部