未验证 提交 865a714e 编写于 作者: Y Yu Yang 提交者: GitHub

Merge pull request #7970 from reyoung/feature/test_w2v_parallel.do

Make word2vec uses parallel.do when CI
...@@ -68,7 +68,32 @@ class SumKernel : public framework::OpKernel<T> { ...@@ -68,7 +68,32 @@ class SumKernel : public framework::OpKernel<T> {
} }
} }
} else if (out_var->IsType<framework::SelectedRows>()) { } else if (out_var->IsType<framework::SelectedRows>()) {
PADDLE_ENFORCE(!in_place, "SelectedRows not support inplace sum now"); std::unique_ptr<framework::SelectedRows> in0;
if (in_place) {
// If is in_place, we store the input[0] to in0
auto &in_sel0 = in_vars[0]->Get<SelectedRows>();
auto &rows = in_sel0.rows();
#ifdef PADDLE_WITH_CUDA
std::vector<int64_t> rows_in_cpu;
rows_in_cpu.reserve(rows.size());
for (auto item : rows) {
rows_in_cpu.push_back(item);
}
in0.reset(new framework::SelectedRows(rows_in_cpu, in_sel0.height()));
#else
in0.reset(new framework::SelectedRows(rows, in_sel0.height()));
#endif
in0->mutable_value()->ShareDataWith(in_sel0.value());
}
auto get_selected_row = [&](size_t i) -> const SelectedRows & {
if (i == 0 && in0) {
return *in0.get();
} else {
return in_vars[i]->Get<SelectedRows>();
}
};
auto *out = context.Output<SelectedRows>("Out"); auto *out = context.Output<SelectedRows>("Out");
out->mutable_rows()->clear(); out->mutable_rows()->clear();
auto *out_value = out->mutable_value(); auto *out_value = out->mutable_value();
...@@ -76,24 +101,26 @@ class SumKernel : public framework::OpKernel<T> { ...@@ -76,24 +101,26 @@ class SumKernel : public framework::OpKernel<T> {
// Runtime InferShape // Runtime InferShape
size_t first_dim = 0; size_t first_dim = 0;
for (int i = 0; i < N; i++) { for (int i = 0; i < N; i++) {
first_dim += in_vars[i]->Get<SelectedRows>().rows().size(); auto &sel_row = get_selected_row(i);
first_dim += sel_row.rows().size();
} }
auto in_dim = in_vars[0]->Get<SelectedRows>().value().dims(); auto in_dim =
auto in_dim_vec = framework::vectorize(in_dim); framework::vectorize(get_selected_row(N - 1).value().dims());
in_dim_vec[0] = static_cast<int64_t>(first_dim); in_dim[0] = static_cast<int64_t>(first_dim);
out_value->Resize(framework::make_ddim(in_dim_vec)); out_value->Resize(framework::make_ddim(in_dim));
out_value->mutable_data<T>(context.GetPlace()); out_value->mutable_data<T>(context.GetPlace());
math::SelectedRowsAddTo<DeviceContext, T> functor; math::SelectedRowsAddTo<DeviceContext, T> functor;
int64_t offset = 0; int64_t offset = 0;
for (int i = 0; i < N; i++) { for (int i = 0; i < N; i++) {
PADDLE_ENFORCE_EQ(out->height(), auto &sel_row = get_selected_row(i);
in_vars[i]->Get<SelectedRows>().height());
functor(context.template device_context<DeviceContext>(), PADDLE_ENFORCE_EQ(out->height(), sel_row.height());
in_vars[i]->Get<SelectedRows>(), offset, out); functor(context.template device_context<DeviceContext>(), sel_row,
offset += in_vars[i]->Get<SelectedRows>().value().numel(); offset, out);
offset += sel_row.value().numel();
} }
} else if (out_var->IsType<framework::LoDTensorArray>()) { } else if (out_var->IsType<framework::LoDTensorArray>()) {
auto &out_array = *out_var->GetMutable<framework::LoDTensorArray>(); auto &out_array = *out_var->GetMutable<framework::LoDTensorArray>();
......
...@@ -12,76 +12,145 @@ ...@@ -12,76 +12,145 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import numpy as np
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.fluid as fluid import paddle.v2.fluid as fluid
import unittest
import os
PASS_NUM = 100
EMBED_SIZE = 32 def main(use_cuda, is_sparse, parallel):
HIDDEN_SIZE = 256 if use_cuda and not fluid.core.is_compiled_with_cuda():
N = 5 return
BATCH_SIZE = 32
IS_SPARSE = True PASS_NUM = 100
EMBED_SIZE = 32
word_dict = paddle.dataset.imikolov.build_dict() HIDDEN_SIZE = 256
dict_size = len(word_dict) N = 5
BATCH_SIZE = 32
first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64') IS_SPARSE = is_sparse
second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64')
third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64') def __network__(words):
forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64') embed_first = fluid.layers.embedding(
next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64') input=words[0],
size=[dict_size, EMBED_SIZE],
embed_first = fluid.layers.embedding( dtype='float32',
input=first_word, is_sparse=IS_SPARSE,
size=[dict_size, EMBED_SIZE], param_attr='shared_w')
dtype='float32', embed_second = fluid.layers.embedding(
is_sparse=IS_SPARSE, input=words[1],
param_attr='shared_w') size=[dict_size, EMBED_SIZE],
embed_second = fluid.layers.embedding( dtype='float32',
input=second_word, is_sparse=IS_SPARSE,
size=[dict_size, EMBED_SIZE], param_attr='shared_w')
dtype='float32', embed_third = fluid.layers.embedding(
is_sparse=IS_SPARSE, input=words[2],
param_attr='shared_w') size=[dict_size, EMBED_SIZE],
embed_third = fluid.layers.embedding( dtype='float32',
input=third_word, is_sparse=IS_SPARSE,
size=[dict_size, EMBED_SIZE], param_attr='shared_w')
dtype='float32', embed_forth = fluid.layers.embedding(
is_sparse=IS_SPARSE, input=words[3],
param_attr='shared_w') size=[dict_size, EMBED_SIZE],
embed_forth = fluid.layers.embedding( dtype='float32',
input=forth_word, is_sparse=IS_SPARSE,
size=[dict_size, EMBED_SIZE], param_attr='shared_w')
dtype='float32',
is_sparse=IS_SPARSE, concat_embed = fluid.layers.concat(
param_attr='shared_w') input=[embed_first, embed_second, embed_third, embed_forth], axis=1)
hidden1 = fluid.layers.fc(input=concat_embed,
concat_embed = fluid.layers.concat( size=HIDDEN_SIZE,
input=[embed_first, embed_second, embed_third, embed_forth], axis=1) act='sigmoid')
hidden1 = fluid.layers.fc(input=concat_embed, size=HIDDEN_SIZE, act='sigmoid') predict_word = fluid.layers.fc(input=hidden1,
predict_word = fluid.layers.fc(input=hidden1, size=dict_size, act='softmax') size=dict_size,
cost = fluid.layers.cross_entropy(input=predict_word, label=next_word) act='softmax')
avg_cost = fluid.layers.mean(x=cost) cost = fluid.layers.cross_entropy(input=predict_word, label=words[4])
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) avg_cost = fluid.layers.mean(x=cost)
sgd_optimizer.minimize(avg_cost) return avg_cost
train_reader = paddle.batch( word_dict = paddle.dataset.imikolov.build_dict()
paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE) dict_size = len(word_dict)
place = fluid.CPUPlace() first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64')
exe = fluid.Executor(place) second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64')
feeder = fluid.DataFeeder( third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64')
feed_list=[first_word, second_word, third_word, forth_word, next_word], forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64')
place=place) next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
exe.run(fluid.default_startup_program()) if not parallel:
avg_cost = __network__(
for pass_id in range(PASS_NUM): [first_word, second_word, third_word, forth_word, next_word])
for data in train_reader(): else:
avg_cost_np = exe.run(fluid.default_main_program(), places = fluid.layers.get_places()
feed=feeder.feed(data), pd = fluid.layers.ParallelDo(places)
fetch_list=[avg_cost]) with pd.do():
if avg_cost_np[0] < 5.0: avg_cost = __network__(
exit(0) # if avg cost less than 10.0, we think our code is good. map(pd.read_input, [
exit(1) first_word, second_word, third_word, forth_word, next_word
]))
pd.write_output(avg_cost)
avg_cost = fluid.layers.mean(x=pd())
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
sgd_optimizer.minimize(avg_cost)
train_reader = paddle.batch(
paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(
feed_list=[first_word, second_word, third_word, forth_word, next_word],
place=place)
exe.run(fluid.default_startup_program())
for pass_id in range(PASS_NUM):
for data in train_reader():
avg_cost_np = exe.run(fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[avg_cost])
if avg_cost_np[0] < 5.0:
return
raise AssertionError("Cost is too large {0:2.2}".format(avg_cost_np[0]))
FULL_TEST = os.getenv('FULL_TEST',
'0').lower() in ['true', '1', 't', 'y', 'yes', 'on']
SKIP_REASON = "Only run minimum number of tests in CI server, to make CI faster"
class W2VTest(unittest.TestCase):
pass
def inject_test_method(use_cuda, is_sparse, parallel):
fn_name = "test_{0}_{1}_{2}".format("cuda" if use_cuda else "cpu", "sparse"
if is_sparse else "dense", "parallel"
if parallel else "normal")
def __impl__(*args, **kwargs):
prog = fluid.Program()
startup_prog = fluid.Program()
scope = fluid.core.Scope()
with fluid.scope_guard(scope):
with fluid.program_guard(prog, startup_prog):
main(use_cuda=use_cuda, is_sparse=is_sparse, parallel=parallel)
if use_cuda and is_sparse and parallel:
fn = __impl__
else:
# skip the other test when on CI server
fn = unittest.skipUnless(
condition=FULL_TEST, reason=SKIP_REASON)(__impl__)
setattr(W2VTest, fn_name, fn)
for use_cuda in (False, True):
for is_sparse in (False, True):
for parallel in (False, True):
inject_test_method(use_cuda, is_sparse, parallel)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册