when using layers.embedding, is_sparse=True is much slower than is_sparse=False
Created by: LihangLiu
使用layers.embedding的时候,发现is_sparse=True 比 is_sparse=False 还慢很多。 代码如下,环境为fluid1.1。net函数中用到embedding。
import paddle
import paddle.fluid as fluid
from paddle.fluid import layers
from paddle.fluid.param_attr import ParamAttr
import numpy as np
import time
def fluid_create_lod_tensor(array, lod, place):
tensor = fluid.LoDTensor()
tensor.set(np.array(array), place)
tensor.set_lod(lod)
return tensor
def net():
x = layers.data(name='x', shape=[-1, 1], dtype='int64')
xavier_initializer = fluid.initializer.Xavier()
param_clip = fluid.clip.GradientClipByValue(1.0)
embed = layers.embedding(x,
size=[1000000, 16],
param_attr=ParamAttr(name='embed',
initializer=xavier_initializer,
gradient_clip=param_clip
),
is_sparse=False)
# is_sparse=True)
out = layers.fc(embed, size=1)
loss = layers.reduce_mean(layers.square(out))
opt = fluid.optimizer.AdamOptimizer(
learning_rate=0.01, beta1=0.9, beta2=0.999)
opt.minimize(loss)
return loss
def train():
myprogram = fluid.Program()
with fluid.program_guard(myprogram):
out = net()
test_program = myprogram.clone(for_test=True)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
# data
x_data = np.ones([10, 1]).astype('int64')
x_lod = []
x_tensor = fluid_create_lod_tensor(x_data, x_lod, place)
time_list = []
for _ in range(10):
s = time.time()
results = exe.run(myprogram, feed={'x': x_tensor}, fetch_list=[out])
time_list.append(time.time() - s)
print ('time', np.mean(time_list))
if __name__ == "__main__":
train()