reduce_sum多卡并行计算问题
Created by: dyning
@qingqing01 @panyx0718
reduce_sum在CPU和单卡计算时正常,多卡并行计算时与预期的结果不一致,具体代码如下,test_reduce_sum_functions是入口函数:
import numpy as np
def test_data_gen(place):
import paddle
import paddle.fluid as fluid
import random
input_name = "select_random_idx"
feed_dict = {}
feed_dict[input_name] = fluid.LoDTensor()
lod = [0]
tmp_data_list = []
cur_len = 0
for bno in range(0, 18):
curdata = np.zeros((1000000, 1), dtype=np.float32)
select_idx = random.sample(range(1000000), 100)
curdata[select_idx] = 1.0
tmp_data_list.append(curdata)
cur_len += curdata.shape[0]
lod.append(cur_len)
tmp_data_set = np.vstack(tmp_data_list)
total_num = np.sum(tmp_data_set)
feed_dict[input_name].set(tmp_data_set, place)
feed_dict[input_name].set_lod([lod])
return feed_dict, total_num
def test_reduce_sum_functions():
import paddle
import paddle.fluid as fluid
select_random_idx = fluid.layers.data(name='select_random_idx', shape=[1], dtype='float32', lod_level=1)
sample_num = fluid.layers.reduce_sum(select_random_idx, dim=0)
place = fluid.CUDAPlace(0)
#place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
#fluid.memory_optimize(fluid.default_main_program())
train_exe = fluid.ParallelExecutor(use_cuda=True)
while True:
feed_dict, total_num = test_data_gen(place)
results = train_exe.run([sample_num.name], feed=feed_dict)
tmp = np.array(results[0])
tmp_sum = np.sum(tmp)
if tmp_sum != total_num:
print tmp, total_num
sys.exit(-1)
print "ok"
test_reduce_sum_functions()