From 3f6fc10b9fc6da75961bab0f7a473dc388d07f51 Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Tue, 10 Apr 2018 14:23:09 +0800 Subject: [PATCH] new op that init table value randomly --- .../operators/uniform_random_table_op.cc | 144 ++++++++++++++++++ .../unittests/test_uniform_random_table_op.py | 66 ++++++++ 2 files changed, 210 insertions(+) create mode 100644 paddle/fluid/operators/uniform_random_table_op.cc create mode 100644 python/paddle/fluid/tests/unittests/test_uniform_random_table_op.py diff --git a/paddle/fluid/operators/uniform_random_table_op.cc b/paddle/fluid/operators/uniform_random_table_op.cc new file mode 100644 index 00000000000..4664cc5d93f --- /dev/null +++ b/paddle/fluid/operators/uniform_random_table_op.cc @@ -0,0 +1,144 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/platform/device_context.h" + +namespace paddle { +namespace operators { + +class UniformRandomTableInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *ctx) const override { + VLOG(3) << "Infershape..."; + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of UniformRandomTableOp should not be null."); + + PADDLE_ENFORCE( + ctx->Attrs().Get("min") < ctx->Attrs().Get("max"), + "uniform_random's min must less then max"); + auto &shape = ctx->Attrs().Get>("shape"); + std::vector temp; + temp.reserve(shape.size()); + for (auto dim : shape) { + temp.push_back(static_cast(dim)); + } + ctx->SetOutputDim("Out", framework::make_ddim(temp)); + } +}; + +class UniformRandomTableOp : public framework::OperatorBase { + public: + using framework::OperatorBase::OperatorBase; + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &dev_place) const override { + VLOG(3) << "RunImpl..."; + auto out = + scope.FindVar(Output("Out"))->GetMutable(); + auto shard_cnt = Attr("shard_cnt"); + auto shard_id = Attr("shard_id"); + auto max_id = Attr("max_id"); + auto shape = Attr>("shape"); + + auto tensor = out->mutable_value(); + tensor->Resize(framework::make_ddim(shape)); + // Only allocate the memory of large table on CPU + auto cpu = platform::CPUPlace(); + float *data = tensor->mutable_data(cpu); + VLOG(3) << "generate seed"; + unsigned int seed = static_cast(Attr("seed")); + std::minstd_rand engine; + if (seed == 0) { + seed = std::random_device()(); + } + engine.seed(seed); + std::uniform_real_distribution dist(Attr("min"), + Attr("max")); + int64_t size = tensor->numel(); + for (int64_t i = 0; i < size; ++i) { + data[i] = dist(engine); + } + // initialize rows by round-robin + // TODO(Yancey1989): need to support other way to distribute Ids + VLOG(3) << "calculate rows_size..."; + int64_t rows_size = 0; + if (max_id % shard_cnt == 0) { + rows_size = max_id / shard_cnt; + } else { + rows_size = max_id / shard_cnt + 1; + } + auto *rows = out->mutable_rows(); + rows->resize(rows_size); + (*rows)[0] = shard_id; + for (int64_t idx = 1; idx < rows_size; ++idx) { + (*rows)[idx] = (*rows)[idx - 1] + shard_cnt; + } + out->set_height(max_id); + } +}; + +class UniformRandomTableOpMaker : public framework::OpProtoAndCheckerMaker { + public: + UniformRandomTableOpMaker(OpProto *proto, OpAttrChecker *op_checker) + : framework::OpProtoAndCheckerMaker(proto, op_checker) { + AddOutput("Out", + "(SelectedRows)" + "The output table of uniform random table op."); + AddComment(R"DOC( +Uniform random operator for initializing a table. + +This operator initializes a SelectedRows with random values sampled from a +uniform distribution. + +)DOC"); + AddAttr("max_id", + "(int, required)" + "The maximal Id for the table."); + AddAttr("shard_cnt", + "(int, required)" + "The count of shards for distributing the table."); + AddAttr("shard_id", "(int, required) The current shard ID."); + AddAttr>("shape", + "(vector) The shape of the output tensor"); + AddAttr("min", + "(float, default -1.0) " + "Minimum value of uniform random") + .SetDefault(-1.0f); + AddAttr("max", + "(float, default 1.0) " + "Maximun value of uniform random") + .SetDefault(1.0f); + AddAttr("seed", + "(int, default 0) " + "Random seed used for generating samples. " + "0 means use a seed generated by the system." + "Note that if seed is not 0, this operator will always " + "generate the same random numbers every time.") + .SetDefault(0); + AddAttr("dtype", "(int, default 5(FP32)) Output tensor data type") + .SetDefault(framework::proto::VarType::FP32); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(uniform_random_table, ops::UniformRandomTableOp, + ops::UniformRandomTableInferShape, + ops::UniformRandomTableOpMaker, + paddle::framework::EmptyGradOpMaker); diff --git a/python/paddle/fluid/tests/unittests/test_uniform_random_table_op.py b/python/paddle/fluid/tests/unittests/test_uniform_random_table_op.py new file mode 100644 index 00000000000..0474c51e49d --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_uniform_random_table_op.py @@ -0,0 +1,66 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +from op_test import OpTest +import paddle.fluid.core as core +from paddle.fluid.op import Operator + + +def output_hist(out): + hist, _ = np.histogram(out, range=(-5, 10)) + hist = hist.astype("float32") + hist /= float(out.size) + prob = 0.1 * np.ones((10)) + return hist, prob + + +class TestUniformRandomTableOp(unittest.TestCase): + def get_places(self): + places = [core.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(core.CUDAPlace(0)) + return places + + def test_check_output(self): + for place in self.get_places(): + self.check_with_place(place) + + def check_with_place(self, place): + scope = core.Scope() + out = scope.var("X").get_selected_rows() + + op = Operator( + "uniform_random_table", + Out="X", + shape=[4, 784], + min=-5.0, + max=10.0, + seed=10, + shard_cnt=3, + shard_id=1, + max_id=10) + op.run(scope, place) + self.assertEqual(out.rows(), [1, 4, 7, 10]) + self.assertEqual(out.height(), 10) + self.assertEqual(out.get_tensor().shape(), [4, 784]) + hist, prob = output_hist(np.array(out.get_tensor())) + self.assertTrue( + np.allclose( + hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + + +if __name__ == "__main__": + unittest.main() -- GitLab