diff --git a/paddlepalm/task_paradigm/mlm.py b/paddlepalm/task_paradigm/mlm.py index 2e17dfe914c87c67508eaa5676dcc2b38a4122ef..c82641ac591ac4bf46d0d19abb031a2fb5cdf8fa 100644 --- a/paddlepalm/task_paradigm/mlm.py +++ b/paddlepalm/task_paradigm/mlm.py @@ -57,6 +57,7 @@ class TaskParadigm(task_paradigm): # 多任务学习时才需要引入这个,防止其他run其他任务时导致seqlen过小,gather超范围 max_position = inputs["reader"]["batchsize_x_seqlen"] - 1 mask_pos = fluid.layers.elementwise_min(mask_pos, max_position) + mask_pos.stop_gradient = True word_emb = inputs["backbone"]["embedding_table"] enc_out = inputs["backbone"]["encoder_outputs"] diff --git a/paddlepalm/task_paradigm/mrc.py b/paddlepalm/task_paradigm/mrc.py index ad2e5f9f2fabdb1ba4111e57fede0bd1c92d2db3..795dd5b57ad4f9b02cf06b4ebb6a2c6cc9036a81 100644 --- a/paddlepalm/task_paradigm/mrc.py +++ b/paddlepalm/task_paradigm/mrc.py @@ -80,6 +80,8 @@ class TaskParadigm(task_paradigm): max_position = inputs["reader"]["seqlen"] - 1 start_positions = fluid.layers.elementwise_min(start_positions, max_position) end_positions = fluid.layers.elementwise_min(end_positions, max_position) + start_positions.stop_gradient = True + end_positions.stop_gradient = True else: unique_id = inputs['reader']['unique_ids'] diff --git a/run_demo2.sh b/run_demo2.sh index bb0f2f119bd5a29e74552764b7ec2d36d3bc88c6..128910ebe9b365c4dd7f8a3712093cea4b04444a 100755 --- a/run_demo2.sh +++ b/run_demo2.sh @@ -1,7 +1,4 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 -while true -do - python -u demo2.py -done +python -u demo2.py # GLOG_vmodule=lookup_table_op=4 python -u demo2.py > debug2.log 2>&1