提交 d5900853 编写于 作者: J jhjiangcs

add mpc-fm demo and mpc_elementwise_mul op.

上级 1efedf94
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "mpc_elementwise_mul_op.h"
namespace paddle {
namespace operators {
class MpcElementwiseMulOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE_EQ(
ctx->HasInput("X"), true,
platform::errors::NotFound("Input(X) of MpcElementwiseMulOp should not be null."));
PADDLE_ENFORCE_EQ(
ctx->HasInput("Y"), true,
platform::errors::NotFound("Input(Y) of MpcElementwiseMulOp should not be null."));
PADDLE_ENFORCE_EQ(
ctx->HasOutput("Out"), true,
platform::errors::NotFound("Output(Out) of MpcElementwiseMulOp should not be null."));
PADDLE_ENFORCE_GE(
ctx->GetInputDim("X").size(), ctx->GetInputDim("Y").size(),
platform::errors::InvalidArgument(
"The dimensions of X should be equal with the dimensions of Y. "
"But received the dimensions of X is [%s], the dimensions of Y is [%s]",
ctx->GetInputDim("X"), ctx->GetInputDim("Y")));
ctx->ShareDim("X", /*->*/ "Out");
ctx->ShareLoD("X", /*->*/ "Out");
}
};
class MpcElementwiseMulOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X", "(Tensor), The first input tensor of mpc elementwise mul op.");
AddInput("Y", "(Tensor), The second input tensor of mpc elementwise mul op.");
AddOutput("Out", "(Tensor), The output tensor of mpc elementwise mul op.");
AddAttr<int>("axis",
"(int, default -1). If X.dimension != Y.dimension,"
"Y.dimension must be a subsequence of x.dimension. And axis "
"is the start dimension index "
"for broadcasting Y onto X. ")
.SetDefault(-1)
.EqualGreaterThan(-1);
AddComment(R"DOC(
MPC elementwise mul Operator.
)DOC");
}
};
class MpcElementwiseMulGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {
auto out_grad_name = framework::GradVarName("Out");
PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, "Input(X) should not be null.");
PADDLE_ENFORCE_EQ(ctx->HasInput("Y"), true, "Input(Y) should not be null.");
PADDLE_ENFORCE_EQ(ctx->HasInput(out_grad_name), true,
"Input(Out@GRAD) should not be null.");
auto x_grad_name = framework::GradVarName("X");
auto y_grad_name = framework::GradVarName("Y");
if (ctx->HasOutput(x_grad_name)) {
ctx->ShareDim("X", /*->*/ x_grad_name);
ctx->ShareLoD("X", /*->*/ x_grad_name);
}
if (ctx->HasOutput(y_grad_name)) {
ctx->ShareDim("Y", /*->*/ y_grad_name);
ctx->ShareLoD("Y", /*->*/ y_grad_name);
}
}
};
template <typename T>
class MpcElementwiseMulGradMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> grad) const override {
grad->SetType("mpc_elementwise_mul_grad");
grad->SetInput("X", this->Input("X"));
grad->SetInput("Y", this->Input("Y"));
grad->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
grad->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
grad->SetOutput(framework::GradVarName("Y"), this->InputGrad("Y"));
grad->SetAttrMap(this->Attrs());
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(mpc_elementwise_mul, ops::MpcElementwiseMulOp,
ops::MpcElementwiseMulOpMaker,
ops::MpcElementwiseMulGradMaker<paddle::framework::OpDesc>);
REGISTER_OPERATOR(mpc_elementwise_mul_grad, ops::MpcElementwiseMulGradOp);
REGISTER_OP_CPU_KERNEL(
mpc_elementwise_mul,
ops::MpcElementwiseMulKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL(
mpc_elementwise_mul_grad,
ops::MpcElementwiseMulGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
// This op is different with elementwise_add of PaddlePaddle.
// We only consider that the dimensions of X is equal with the dimensions of Y.
#pragma once
#include "mpc_op.h"
#include "paddle/fluid/platform/transform.h"
#include "mpc_elementwise_add_op.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename DeviceContext, typename T>
void Expand(const framework::LoDTensor* in_y_t, int axis, Tensor* y_expand_t, const framework::DDim &expand_dims, const framework::ExecutionContext &ctx) {
T* y_expand_data = y_expand_t->mutable_data<T>(expand_dims, ctx.GetPlace());
std::fill(y_expand_data, y_expand_data + y_expand_t->numel(), static_cast<T>(0));
Tensor in_y_t_slice;
Tensor y_expand_t_slice;
for (size_t i = 0; i < SHARE_NUM; ++i) {
y_expand_t_slice = y_expand_t->Slice(i, i + 1);
in_y_t_slice = in_y_t->Slice(i, i + 1);
auto y_expand_dims = y_expand_t_slice.dims();
auto y_dims = in_y_t_slice.dims();
axis = (axis == -1 ? y_expand_dims.size() - y_dims.size() : axis);
PADDLE_ENFORCE(axis >= 0 && axis < y_expand_dims.size(),
"Axis should be in range [0, x_dims)");
int pre, n, post;
GetMidDims get_mid_dims;
get_mid_dims(y_expand_dims, y_dims, axis, &pre, &n, &post);
auto y_expand_ = y_expand_t_slice.data<T>();
auto y_ = in_y_t_slice.data<T>();
auto nx_ = y_expand_t_slice.numel();
paddle::platform::Transform<DeviceContext> trans;
if (post == 1) {
trans(ctx.template device_context<DeviceContext>(), y_expand_, y_expand_ + nx_,
RowwiseTransformIterator<T, DeviceContext>(y_, n),
y_expand_, AddFunctor<T>());
} else {
trans(ctx.template device_context<DeviceContext>(), y_expand_, y_expand_ + nx_,
MidWiseTransformIterator<T, DeviceContext>(y_, n, post),
y_expand_, AddFunctor<T>());
}
}
}
template <typename DeviceContext, typename T>
class MpcElementwiseMulKernel : public MpcOpKernel<T> {
public:
void ComputeImpl(const framework::ExecutionContext &ctx) const override{
VLOG(3) << "********************";
VLOG(3) << "MpcElementwiseMulKernel";
auto *in_x_t = ctx.Input<framework::LoDTensor>("X");
auto *in_y_t = ctx.Input<framework::LoDTensor>("Y");
auto *out_t = ctx.Output<framework::LoDTensor>("Out");
int axis = ctx.Attr<int>("axis");
auto out = out_t->mutable_data<T>(ctx.GetPlace());
if (in_x_t->dims() == in_y_t->dims()) {
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->mul(in_x_t, in_y_t, out_t);
} else {
Tensor y_expand_t;
// expand input in_y_t into y_expand_t (dims: in_x_t->dims)
Expand<DeviceContext, T>(in_y_t, axis, &y_expand_t, in_x_t->dims(), ctx);
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->mul(in_x_t, &y_expand_t, out_t);
}
}
};
template <typename DeviceContext, typename T>
class MpcElementwiseMulGradKernel : public MpcOpKernel<T> {
public:
void ComputeImpl(const framework::ExecutionContext &ctx) const override {
VLOG(3) << "********************";
VLOG(3) << "MpcElementwiseMulGradKernel";
auto *in_x_t = ctx.Input<framework::LoDTensor>("X");
auto *in_y_t = ctx.Input<framework::LoDTensor>("Y");
auto *dout = ctx.Input<framework::LoDTensor>(framework::GradVarName("Out"));
auto *dx = ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
auto *dy = ctx.Output<framework::LoDTensor>(framework::GradVarName("Y"));
int axis = ctx.Attr<int>("axis");
auto dout_data = dout->data<T>();
if (dx && dy && (in_x_t->dims().size() == in_y_t->dims().size())) {
dx->mutable_data<T>(ctx.GetPlace());
dy->mutable_data<T>(ctx.GetPlace());
// dx = dout * y
// dy = dout * x
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->mul(dout, in_y_t, dx);
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->mul(dout, in_x_t, dy);
}
if (dx) {
// dx = dout * y_expand
auto dx_data = dx->mutable_data<T>(ctx.GetPlace());
Tensor y_expand_t;
// expand in_y_t into y_expand_t (in_x_t->dims)
Expand<DeviceContext, T>(in_y_t, axis, &y_expand_t, in_x_t->dims(), ctx);
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->mul(dout, &y_expand_t, dx);
}
if (dy) {
// dy_expand = dout * x
// dy = reduce(dy_expand)
auto dy_data = dy->mutable_data<T>(ctx.GetPlace());
Tensor dy_expand_t;
T* dy_expand_t_data = dy_expand_t.mutable_data<T>(in_x_t->dims(), ctx.GetPlace());
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->mul(dout, in_x_t, &dy_expand_t);
// reduce: dy_expand_t (dims: in_x_t->dims()) -> dy (dims: in_y_t->dims())
auto x_dims = in_x_t->dims();
auto y_dims = in_y_t->dims();
axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(),
"Axis should be in range [0, x_dims)");
int pre, n, post;
GetMidDims get_mid_dims;
get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post);
std::fill(dy_data, dy_data + dy->numel(), static_cast<T>(0));
for (size_t i = 0; i < SHARE_NUM; ++i) {
int y_offset = i * n;
for (size_t j = 0; j < pre; ++j) {
for (size_t k = 0; k < n; ++k) {
for (size_t m = 0; m < post; ++m) {
int out_offset = i * pre * n * post + j * n * post + k * post + m;
dy_data[k + y_offset] += dy_expand_t_data[out_offset];
}
}
}
}
}
}
};
} // namespace operators
} // namespace paddle
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import distutils.util
import sys
import numpy as np
def parse_args():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('--epoch_num', type=int, default=10, help='epoch_num')
parser.add_argument('--batch_size', type=int, default=5, help='batch_size')
parser.add_argument('--share_num', type=int, default=2, help='share_num')
parser.add_argument('--base_lr', type=float, default=0.01, help='base_lr')
parser.add_argument('--dense_feature_dim', type=int, default=13, help='dense_feature_dim')
parser.add_argument('--sparse_feature_number', type=int, default=100, help='sparse_feature_number')
parser.add_argument('--sparse_feature_dim', type=int, default=26, help='sparse_feature_dim')
parser.add_argument('--embedding_size', type=int, default=9, help='embedding_size')
parser.add_argument('--num_field', type=int, default=39, help='num_field')
parser.add_argument('--reg', type=float, default=0.001, help='reg')
parser.add_argument('--paddle_sample_data_dir', type=str, default='./data/sample_data/train', help='paddle_sample_data_dir')
parser.add_argument('--paddle_train_data_dir', type=str, default='./data/train', help='paddle_train_data_dir')
parser.add_argument('--paddle_test_data_dir', type=str, default='./data/test', help='paddle_test_data_dir')
parser.add_argument('--role', type=int, default=0, help='role')
parser.add_argument('--server', type=str, default='localhost', help='server ip')
parser.add_argument('--port', type=int, default=12345, help='server port')
parser.add_argument('--mpc_data_dir', type=str, default='./mpc_data/', help='mpc_data_dir')
parser.add_argument('--model_dir', type=str, default='./model_dir/', help='model_dir')
parser.add_argument('--watch_vec_size', type=int, default=64, help='watch_vec_size')
parser.add_argument('--search_vec_size', type=int, default=64, help='search_vec_size')
parser.add_argument('--other_feat_size', type=int, default=32, help='other_feat_size')
parser.add_argument('--output_size', type=int, default=3952, help='output_size')
parser.add_argument('--topk', type=int, default=10, help='topk')
args = parser.parse_args()
return args
wget --no-check-certificate https://fleet.bj.bcebos.com/ctr_data.tar.gz
tar -zxvf ctr_data.tar.gz
mv ./raw_data ./train_data_full
mkdir train_data && cd train_data
cp ../train_data_full/part-0 ../train_data_full/part-1 ./ && cd ..
mv ./test_data ./test_data_full
mkdir test_data && cd test_data
cp ../test_data_full/part-220 ./ && cd ..
echo "Complete data download."
echo "Full Train data stored in ./train_data_full "
echo "Full Test data stored in ./test_data_full "
echo "Rapid Verification train data stored in ./train_data "
echo "Rapid Verification test data stored in ./test_data "
0 1 1 5 0 1382 4 15 2 181 1 2 2 68fd1e64 80e26c9b fb936136 7b4723c4 25c83c98 7e0ccccf de7995b8 1f89b562 a73ee510 a8cd5504 b2cb9c98 37c9c164 2824a5f6 1adce6ef 8ba8b39a 891b62e7 e5ba7672 f54016b9 21ddcdc9 b1252a9d 07b5194c 3a171ecb c5c50484 e8b83407 9727dd16
0 2 0 44 1 102 8 2 2 4 1 1 4 68fd1e64 f0cf0024 6f67f7e5 41274cd7 25c83c98 fe6b92e5 922afcc0 0b153874 a73ee510 2b53e5fb 4f1b46f3 623049e6 d7020589 b28479f6 e6c5b5cd c92f3b61 07c540c4 b04e4670 21ddcdc9 5840adea 60f6221e 3a171ecb 43f13e8b e8b83407 731c3655
0 2 0 1 14 767 89 4 2 245 1 3 3 45 287e684f 0a519c5c 02cf9876 c18be181 25c83c98 7e0ccccf c78204a1 0b153874 a73ee510 3b08e48b 5f5e6091 8fe001f4 aa655a2f 07d13a8f 6dc710ed 36103458 8efede7f 3412118d e587c466 ad3062eb 3a171ecb 3b183c5c
0 893 4392 0 0 0 0 68fd1e64 2c16a946 a9a87e68 2e17d6f6 25c83c98 fe6b92e5 2e8a689b 0b153874 a73ee510 efea433b e51ddf94 a30567ca 3516f6e6 07d13a8f 18231224 52b8680f 1e88c74f 74ef3502 6b3a5ca6 3a171ecb 9117a34a
0 3 -1 0 2 0 3 0 0 1 1 0 8cf07265 ae46a29d c81688bb f922efad 25c83c98 13718bbd ad9fa255 0b153874 a73ee510 5282c137 e5d8af57 66a76a26 f06c53ac 1adce6ef 8ff4b403 01adbab4 1e88c74f 26b3c7a7 21c9516a 32c7478e b34f3128
0 -1 12824 0 0 6 0 05db9164 6c9c9cf3 2730ec9c 5400db8b 43b19349 6f6d9be8 53b5f978 0b153874 a73ee510 3b08e48b 91e8fc27 be45b877 9ff13f22 07d13a8f 06969a20 9bc7fff5 776ce399 92555263 242bb710 8ec974f4 be7c41b4 72c78f11
0 1 2 3168 0 1 2 0 439a44a4 ad4527a2 c02372d0 d34ebbaa 43b19349 fe6b92e5 4bc6ffea 0b153874 a73ee510 3b08e48b a4609aab 14d63538 772a00d7 07d13a8f f9d1382e b00d3dc9 776ce399 cdfa8259 20062612 93bad2c0 1b256e61
1 1 4 2 0 0 0 1 0 0 1 1 0 68fd1e64 2c16a946 503b9dbc e4dbea90 f3474129 13718bbd 38eb9cf4 1f89b562 a73ee510 547c0ffe bc8c9f21 60ab2f07 46f42a63 07d13a8f 18231224 e6b6bdc7 e5ba7672 74ef3502 5316a17f 32c7478e 9117a34a
0 44 4 8 19010 249 28 31 141 1 8 05db9164 d833535f d032c263 c18be181 25c83c98 7e0ccccf d5b6acf2 0b153874 a73ee510 2acdcf4e 086ac2d2 dfbb09fb 41a6ae00 b28479f6 e2502ec9 84898b2a e5ba7672 42a2edb9 0014c32a 32c7478e 3b183c5c
0 35 1 33737 21 1 2 3 1 1 05db9164 510b40a5 d03e7c24 eb1fd928 25c83c98 52283d1c 0b153874 a73ee510 015ac893 e51ddf94 951fe4a9 3516f6e6 07d13a8f 2ae4121c 8ec71479 d4bb7bd8 70d0f5f9 0e63fca0 32c7478e 0e8fe315
0 2 632 0 56770 0 5 65 0 2 05db9164 0468d672 7ae80d0f 80d8555a 25c83c98 7e0ccccf 04277bf9 0b153874 7cc72ec2 3b08e48b 7e2c5c15 cfc86806 91a1b611 b28479f6 58251aab 146a70fd 776ce399 0b331314 21ddcdc9 5840adea cbec39db 3a171ecb cedad179 ea9a246c 9a556cfc
0 0 6 6 6 421 109 1 7 107 0 1 6 05db9164 9b5fd12f 4cf72387 111121f4 0b153874 a73ee510 3b08e48b ac9c2e8f 6e2d6a15 07d13a8f 796a1a2e d4bb7bd8 8aaa5b67 32c7478e
1 0 -1 1465 0 17 0 4 0 4 241546e0 38a947a1 fa673455 6a14f9b9 25c83c98 fe6b92e5 1c86e0eb 1f89b562 a73ee510 e7ba2569 755e4a50 208d9687 5978055e 07d13a8f 5182f694 f8b34416 e5ba7672 e5f8f18f f3ddd519 32c7478e b34f3128
1 2 11 5 10262 34 2 4 5 1 5 be589b51 287130e0 cd7a7a22 fb7334df 25c83c98 6cdb3998 361384ce a73ee510 3ff10fb2 5874c9c9 976cbd4c 740c210d 1adce6ef 310d155b 07eb8110 07c540c4 891589e7 18259a83 a458ea53 a0ab60ca 32c7478e a052b1ed 9b3e8820 8967c0d2
0 0 51 84 4 3633 26 1 4 8 0 1 4 5a9ed9b0 80e26c9b 97144401 5dbf0cc5 0942e0a7 13718bbd 9ce6136d 0b153874 a73ee510 2106e595 b5bb9d63 04f55317 ab04d8fe 1adce6ef 0ad47a49 2bd32e5c 3486227d 12195b22 21ddcdc9 b1252a9d fa131867 dbb486d7 8ecc176a e8b83407 c43c3f58
0 2 1 18 20255 0 1 1306 0 20 05db9164 bc6e3dc1 67799c69 d00d0f35 4cf72387 7e0ccccf ca4fd8f8 64523cfa a73ee510 3b08e48b a0060bca b9f28c33 22d23aac 5aebfb83 d702713a 0f655650 776ce399 3a2028fd b426bc93 3a171ecb 2e0a0035
1 1 987 2 105 2 1 2 2 1 1 2 68fd1e64 38d50e09 da603082 431a5096 43b19349 7e0ccccf 3f35b640 0b153874 a73ee510 3b08e48b 3d5fb018 6aaab577 94172618 07d13a8f ee569ce2 2f03ef40 d4bb7bd8 582152eb 21ddcdc9 b1252a9d 3b203ca1 32c7478e b21dc903 001f3601 aa5f0a15
0 0 1 0 16597 557 3 5 123 0 1 1 8cf07265 7cd19acc 77f2f2e5 d16679b9 4cf72387 fbad5c96 8fb24933 0b153874 a73ee510 0095a535 3617b5f5 9f32b866 428332cf b28479f6 83ebd498 31ca40b6 e5ba7672 d0e5eb07 dfcfc3fa ad3062eb 32c7478e aee52b6f
0 0 24 4 2 2056 12 6 10 83 0 1 2 05db9164 f0cf0024 08b45d8b cbb5af1b 384874ce fbad5c96 81bb0302 37e4aa92 a73ee510 175d6c71 b7094596 1c547463 1f9d2c38 1adce6ef 55dc357b 0ca69655 e5ba7672 b04e4670 21ddcdc9 b1252a9d f3caefdd 32c7478e 4c8e5aef ea9a246c 9593bba9
0 7 102 3 780 15 7 15 15 1 1 3 3c9d8785 b0660259 3a960356 15c92ddb 4cf72387 13718bbd 00c46cd1 0b153874 a73ee510 62cfc6bd 8cffe207 656e5413 ff5626de ad1cc976 27b1230c fa8d05aa e5ba7672 5edd90de e12ce348 c3dc6cef 49045073
1 47 0 6399 38 19 10 143 10 6 1464facd 38a947a1 223b0e16 ca55061c 25c83c98 7e0ccccf 6933dec1 5b392875 a73ee510 3b08e48b 860c302b 156f99ef 30735474 1adce6ef 0e78291e 5fbf4a84 e5ba7672 1999bae9 deb9605d 32c7478e e448275f
0 0 1 80 0 1848 287 1 4 46 0 1 4 05db9164 09e68b86 13b87f72 13a91973 25c83c98 7e0ccccf cc5ed2f1 0b153874 a73ee510 3b08e48b 081c279a d25f00b6 9f16a973 07d13a8f 36721ddc 1746d357 d4bb7bd8 5aed7436 a153cea2 a458ea53 dd37e0d1 32c7478e c70a58f2 e8b83407 af7ece63
0 0 14 6 7132 171 2 2 6 1 6 05db9164 38a947a1 e88a1d4c 8eb9aec7 25c83c98 fbad5c96 3fd38f3b 5b392875 a73ee510 5162b19c 7c430b79 4ac05ba7 7f0d7407 b28479f6 d1128331 ce881087 07c540c4 5d93f8ab 57d0811b 3a171ecb 1793a828
0 9 9 17 11774 0 23 128 0 17 05db9164 08d6d899 cf59444f 60d5f5a7 25c83c98 7e0ccccf 38850d41 0b153874 a73ee510 6e7947ce 49aeb6a9 1d00cbc4 8f7e5dc7 07d13a8f 41f10449 b93ac0ad 1e88c74f 698d1c68 bf8efd4c c7dc6720 f96a556f
0 0 1 2 6190 84 1 27 71 0 1 5a9ed9b0 3df44d94 d032c263 c18be181 25c83c98 7e0ccccf a0845add 0b153874 a73ee510 967857d1 e469acef dfbb09fb 849a0a56 07d13a8f 72d05a1c 84898b2a d4bb7bd8 e7648a8f 0014c32a c9d4222a 3a171ecb 3b183c5c
0 4 16 5925 2 2 0 0 1 5a9ed9b0 09e68b86 64094ddd b0a4d1e3 25c83c98 b87f4a4a 0b153874 a73ee510 2124a520 319687c9 b51dc799 62036f49 64c94865 91126f30 1a00d73c 07c540c4 5aed7436 aa0cf899 a458ea53 c30dce78 32c7478e 3fdb382b e8b83407 49d68486
1 0 1 20 16 1548 93 42 32 912 0 15 1 16 8cf07265 942f9a8d a8e40bcf 0365276a 25c83c98 7e0ccccf 3f4ec687 1f89b562 a73ee510 726f00fd c4adf918 27c604a6 85dbe138 07d13a8f a8e962af c449f783 27c07bd6 1f868fdd 21ddcdc9 a458ea53 7eee76d1 32c7478e 9af06ad9 9d93af03 cdfe5ab7
1 0 20 2 2 7188 170 2 3 24 0 2 0 2 68fd1e64 38a947a1 ee6e4611 30d9fc77 4cf72387 7e0ccccf bf9d4f90 0b153874 a73ee510 b7c4dad5 81cae03e 5332e3fb d413ef3e 07d13a8f a6d97bf2 ec676ace 3486227d 02e8d897 b055c31b 3a171ecb ae2cd100
1 0 78 2 15 4311 85 4 18 230 0 3 15 68fd1e64 1287a654 5ed035c9 5b5365b2 4cf72387 6f6d9be8 1b1aa9ea 0b153874 a73ee510 c3e69838 7a3651f5 df8b1dea 95bc260c b28479f6 ced5be3a 4cc0abe4 e5ba7672 df00d249 f520f961 32c7478e 27b60b01
1 3 0 4 13 224 28 3 35 27 1 1 13 05db9164 90081f33 993f507e 14a74146 25c83c98 13718bbd dc7659bd 0b153874 a73ee510 03e48276 e51ddf94 18fe7085 3516f6e6 64c94865 98995c3b 8c48eb08 e5ba7672 7181ccc8 2ed6b316 3a171ecb abf08f1b
1 277 3 7318 24 6 3 98 1 3 8cf07265 9adf4cf9 2e76fb61 0b1ad9da 4cf72387 fe6b92e5 75dcaaca 0b153874 a73ee510 3b08e48b 8aabdae8 9886a0a7 edcf17ce 07d13a8f 2aaebd23 338c0d09 e5ba7672 c7dbecd5 60d2d691 3a171ecb 90b6276f
0 -1 4956 0 37 97 0 be589b51 4c2bc594 d032c263 c18be181 25c83c98 fe6b92e5 aa0d873c 0b153874 a73ee510 3b08e48b 868744ab dfbb09fb 9dfda2b9 8ceecbc8 7ac43a46 84898b2a 776ce399 bc48b783 0014c32a 55dd3565 3b183c5c
0 1 0 1 1427 3 16 11 50 0 2 1 05db9164 26a88120 615e3e4e 2788fed8 4cf72387 7e0ccccf 3f4ec687 0b153874 a73ee510 0e9ead52 c4adf918 f5d19c1c 85dbe138 07d13a8f 24ff9452 1034ac0d 3486227d b486119d 63580fba 32c7478e 2a90c749
0 4 0 55 8 859 13 4 12 13 1 1 8 05db9164 e5fb1af3 4b644986 5dbf0cc5 25c83c98 cc5ed2f1 0b153874 a73ee510 3b08e48b facf05cc 6d89b6a5 9f16a973 cfef1c29 1e744fde 2bd32e5c 776ce399 13145934 21ddcdc9 a458ea53 1419c3fc 32c7478e 8ecc176a e8b83407 a70a038a
1 1 259 1 1 5 1 6 1 1 1 3 1 05db9164 f3b07830 ad981000 f96c819d 25c83c98 df5c2d18 0b153874 a73ee510 8aef4905 a7b606c4 b912be9f eae197fd b28479f6 d27eed0e b8b09fe6 e5ba7672 048d01f4 08ae854d 32c7478e c657e6e5
1 0 127 1 3 1683 19 26 17 475 0 9 0 3 05db9164 8947f767 11c9d79e 52a787c8 4cf72387 fbad5c96 18671b18 0b153874 a73ee510 ceb10289 77212bd7 79507c6b 7203f04e 07d13a8f 2c14c412 49013ffe 8efede7f bd17c3da f6a3e43b a458ea53 35cd95c9 ad3062eb c7dc6720 3fdb382b 010f6491 49d68486
0 1 23255 0 1 73 0 7e5c2ff4 d833535f b00d1501 d16679b9 25c83c98 7e0ccccf 65c53f25 1f89b562 a73ee510 3b08e48b ad2bc6f4 e0d76380 39ccb769 b28479f6 a733d362 1203a270 776ce399 281769c2 73d06dde 32c7478e aee52b6f
0 6 -1 915 40 26 33 72 1 3 9a89b36c 4f25e98b 9042c4ea 343f8ed3 25c83c98 fbad5c96 27cc0b50 0b153874 a73ee510 f364a867 7671c62f 00750e7a 1fa0660e b28479f6 df2f73e9 4f71659c e5ba7672 bc5a0ff7 21ddcdc9 a458ea53 706ee322 c9d4222a bcdee96c 990a118a 001f3601 47b6f269
0 0 0 3 12 7308 97 2 21 90 0 1 12 68fd1e64 9adf4cf9 723c059c 4c942c6d 4cf72387 7e0ccccf ce4f7f55 0b153874 a73ee510 d7026747 38f692a7 ab60a748 6e5da64f 1adce6ef 808ff1bc c23fc7ec e5ba7672 2a93f7c8 5dc9a057 32c7478e 90b6276f
0 8 0 15 20 115 24 8 23 24 2 2 20 5a9ed9b0 c66fca21 78171040 373c404a 25c83c98 8ff6f5af 0b153874 a73ee510 5ba575e7 b5a9f90e 6766a7f0 949ea585 1adce6ef 8736735c 59974c9c 8efede7f 1304f63b 21ddcdc9 b1252a9d 07b2853e 32c7478e 94bde4f2 010f6491 09b76f8d
0 38 2 4 3119 149 64 48 139 6 6 4 05db9164 26a88120 d032c263 c18be181 4cf72387 fbad5c96 3f4ec687 1f89b562 a73ee510 726f00fd c4adf918 dfbb09fb 85dbe138 07d13a8f 040ec437 84898b2a 8efede7f 57598e25 0014c32a 32c7478e 3b183c5c
1 88 319 4 5 4 89 40 88 3 4 12 4 05db9164 08d6d899 333440d5 fc86bde0 25c83c98 fbad5c96 f00bddf8 0b153874 a73ee510 83ff688a 55795b33 1b0c8aa3 39795005 b28479f6 bffbd637 4a838997 8efede7f bbf70d82 16e2e3b3 32c7478e d859b4dd
0 1 18 5 1683 80 38 5 95 5 0 5 05db9164 09e68b86 aa8c1539 85dd697c 25c83c98 2903ead3 0b153874 a73ee510 bcc8b4c6 a0a5e9d7 d8c29807 ee79db7b 1adce6ef dcd06253 c64d548f 3486227d 63cdbb21 cf99e5de a458ea53 5f957280 32c7478e 1793a828 e8b83407 b7d9c3bc
0 27 112878 2106 0 2 95 0 5a9ed9b0 38a947a1 2d8004c4 40ed41e5 25c83c98 7e0ccccf 4d9d55ae 5b392875 7cc72ec2 3b08e48b 55065437 ad972965 80dcea18 07d13a8f c68ba31d 1206a8a1 d4bb7bd8 e96a7df2 54d8bb06 3a171ecb a415643d
0 0 -1 4894 20 1 7 20 0 1 05db9164 4c2bc594 d032c263 c18be181 43b19349 7e0ccccf 7f52e00f 0b153874 a73ee510 48a4f593 bca79aeb dfbb09fb 5218d824 8ceecbc8 7ac43a46 84898b2a 07c540c4 bc48b783 0014c32a c9d4222a 3a171ecb 3b183c5c
0 0 32 1 9375 0 37 18 0 0 0 1 05db9164 d833535f ad4b77ff d16679b9 25c83c98 7e0ccccf 9d547ce0 5b392875 a73ee510 3b08e48b 868a9e47 a2f4e8b5 fc5dea81 b28479f6 a733d362 89052618 3486227d 281769c2 d4703ebd 32c7478e aee52b6f
0 6 6 15 20213 507 7 42 360 2 0 40 05db9164 0ca4b7d7 d032c263 c18be181 4cf72387 7e0ccccf e9396c09 c8ddd494 a73ee510 3b08e48b a0060bca dfbb09fb 22d23aac 1adce6ef 9014f0f9 84898b2a e5ba7672 c786d1ea 0014c32a 32c7478e 3b183c5c
0 31 17 2 11 290 23 31 23 65 2 2 11 05db9164 4f25e98b 03280284 5214fda3 25c83c98 fbad5c96 0c41b6a1 0b153874 a73ee510 fa642b71 4ba74619 60bab41d 879fa878 07d13a8f 5be89da3 b6acbd10 e5ba7672 bc5a0ff7 fae651c5 a458ea53 3792328c c0061c6d 423fab69 7a8e7ed6 001f3601 f159b6cb
0 1 2382 13 4 40 4 69 3 609 1 11 0 4 05db9164 38a947a1 933cc823 b1c1e580 25c83c98 fe6b92e5 002fdf0c 1f89b562 a73ee510 61f70369 a4ea009a 2562cf3c 1e9339bc b28479f6 f5bfabbd 03dee53f e5ba7672 b3e92443 be661a75 c7dc6720 67d37917
0 0 190 1624 6 29 6 74 0 9 68fd1e64 c41a84c8 0a266224 759c4a2e 25c83c98 804d2f11 0b153874 a73ee510 2860ede1 1aa6cf31 99dfd83a 3b03d76e b28479f6 e3eb97c7 62b5674b e5ba7672 5911fc7e 28ee216d 32c7478e 590b856f
0 0 19 8 9 1506 31 3 7 34 0 1 9 05db9164 8947f767 100a3803 ad1b5124 30903e74 7e0ccccf bb3b7ab9 c8ddd494 a73ee510 3b08e48b 90b202b5 d377c333 3a9dafb8 b28479f6 a473257f 68d2c2b9 8efede7f bd17c3da e51f040f a458ea53 79c3f011 bcdee96c fe35ffe2 010f6491 987ea0be
1 1 30 3 116 31 1 3 3 1 1 3 5a9ed9b0 38a947a1 0f90b6d6 16b922ed 25c83c98 fbad5c96 17b47bf9 0b153874 a73ee510 3b08e48b 76120d9d ee2d3fdb d4384424 07d13a8f 3046a70a 81371cbc d4bb7bd8 1bae7658 37bad455 ad3062eb 3a171ecb 9d96bacb
0 6 2 3 2779 0 3 13 0 3 fb174e6b 47e8ab98 b009d929 c7043c4b 384874ce 646e7593 0b153874 a73ee510 3b08e48b d05acfa9 3563ab62 969e14fd 1adce6ef bfa6d08a b688c8cc 8efede7f eb4d3f8a 21ddcdc9 5840adea 2754aaf1 55dd3565 3b183c5c f55c04b6 491eeeef
0 0 2 22 3 4687 242 6 6 183 0 1 4 3 05db9164 287130e0 c09cf4ef bc8d1aa6 25c83c98 13718bbd 1919941b 37e4aa92 a73ee510 6c47047a 86c05043 c4bba41d 2ecea536 b28479f6 9efd8b77 ac2e5095 8efede7f 891589e7 2efde463 b1252a9d dc4e98e3 3a171ecb ee42de86 e8b83407 a00829e6
0 55 16 7 1696 72 2 7 95 2 7 5bfa8ab5 89ddfee8 00e2b23c 10d65c35 25c83c98 7e0ccccf ad3508b1 5b392875 a73ee510 fc3680e8 ad757a5a f400e021 93b18cb5 1adce6ef 34cce7d2 9e87470c e5ba7672 5bb2ec8e 7a45f7f2 a458ea53 a13d5eab 423fab69 faf5d8b3 f0f449dd a8cf207e
0 6 0 28 0 31 0 6 0 0 1 1 0 8cf07265 287130e0 c1ba4c5a 16fe249c 25c83c98 7e0ccccf c1225605 985e3fcb a73ee510 ede207dc f29b9ed2 469027a9 7eaf6f1a 07d13a8f 10040656 8f13519e e5ba7672 891589e7 6f3756eb 5840adea f4095a39 c7dc6720 1793a828 e8b83407 a475662f
0 0 19 9 3 14414 1353 3 1 362 0 1 0 3 be589b51 09e68b86 4bee8a47 5031d726 25c83c98 7e0ccccf 197b4575 322e63df a73ee510 6c47047a 48876b80 6f95f18b e40e52ae 07d13a8f 36721ddc 3d66d729 8efede7f 5aed7436 21ddcdc9 a458ea53 3c1a8dd8 3a171ecb 3fdb382b b9266ff0 49d68486
0 18 12 8 8965 44 2 12 57 2 0 8 05db9164 d7988e72 5eee7056 c1a3acf5 afcf7897 96825c8f 0b153874 a73ee510 bc283a64 2df02cf1 f71904ea 03232503 b28479f6 c8389df7 0db58836 07c540c4 0f2f9850 5fd56cf9 b1252a9d 96725293 32c7478e 2702453c 8b8de563 303cea07
0 1 2 0 177674 0 3 2 0 0 1 87552397 207b2d81 6e136288 4f938621 25c83c98 7e0ccccf 8025502e 6c41e35e 7cc72ec2 4072f40f 29e4ad33 64ddde07 80467802 07d13a8f 0bf0feff 0c41b634 e5ba7672 fa0643ee 21ddcdc9 b1252a9d b4031b95 3a171ecb a81956df 001f3601 b1262ddd
0 0 20 0 4412 855 0 4 522 0 5 05db9164 58e67aaf 54e3c628 9725d851 25c83c98 7e0ccccf 5b18f3d9 0b153874 a73ee510 ad0b97fb 720446f5 92409ea2 034e5f3b 07d13a8f 10935a85 05c5cfbe d4bb7bd8 c21c3e4c 6f62a118 b1252a9d 54d0b766 c7dc6720 2913df0f 9b3e8820 bc7f21c2
0 56 2 0 0 1 0 5a9ed9b0 8084ee93 02cf9876 c18be181 25c83c98 fbad5c96 af0809a5 5b392875 7cc72ec2 3b08e48b 9e12e146 8fe001f4 025225f2 b28479f6 16d2748c 36103458 2005abd1 003d4f4f e587c466 be7c41b4 3b183c5c
0 76 5 46200 7 0 68fd1e64 287130e0 7555338e e161fae2 25c83c98 7e0ccccf ce17d537 0b153874 7cc72ec2 ed111662 5b225578 f34e8f6a d1be539d 07d13a8f 10040656 8ec308fc 3486227d 891589e7 21ddcdc9 5840adea 182fdd1a c7dc6720 6c1cdd05 ea9a246c 1219b447
0 39 8 42 32 27 33 39 24 32 1 1 32 05db9164 73a46ff0 844ce0a4 9bb11257 4cf72387 7e0ccccf ff3f3dda 0fb392dd a73ee510 3b08e48b e2217f93 1d0b8187 da9ee8bd 1adce6ef d57668e2 4372eb4b e5ba7672 da507f45 21ddcdc9 5840adea ecb5cd6f 32c7478e 6dbd889f ea9a246c 33ced911
1 0 55 5 14477 0 5 1 0 5 05db9164 09e68b86 2beedeb2 c59396e7 25c83c98 7e0ccccf a972360e 0b153874 7cc72ec2 acf0058d 9e511730 8f70e33a 04e4a7e0 64c94865 91126f30 6df0eed9 e5ba7672 5aed7436 55dd3565 5840adea c412f773 3a171ecb 3fdb382b e8b83407 ccc71a58
0 1 1 5 8 7 8 1 8 8 1 1 0 8 05db9164 8e4f887c 25c83c98 13718bbd 47802627 5b9f3341 a73ee510 fbbf2c95 42e01668 79db54f6 07d13a8f b708086d d4bb7bd8 4b340164 3a171ecb
1 1 7 3 10087 67 6 3 57 3 3 05db9164 d7988e72 fb535e16 6fe3d332 25c83c98 13718bbd 2829f187 66f29b89 a73ee510 e034d733 3a9c7259 8e86918c 0d8d4492 07d13a8f 194c42a4 57236df8 e5ba7672 0f2f9850 c27239bd 5840adea 841f2712 3a171ecb 1793a828 e8b83407 b820b6c5
0 7 1 40 1418 23 147 0 7 0 4 0 68fd1e64 80e26c9b ba1947d0 85dd697c 25c83c98 7e0ccccf 16401b7d a61cc0ef a73ee510 3b08e48b 20ec800a 34a238e0 18a5e4b8 b28479f6 a785131a da441c7e e5ba7672 005c6740 21ddcdc9 5840adea 8717ea07 32c7478e 1793a828 e8b83407 b9809574
0 0 10 2 3545 0 2 3 0 2 be589b51 e5fb1af3 50808b4e 39ec3719 25c83c98 13718bbd 316949b7 5b392875 a73ee510 3b08e48b d51f40d7 75ef3efe 4eb5dabc 07d13a8f b5de5956 d5cb04e4 776ce399 13145934 21ddcdc9 b1252a9d 1d4696ef 32c7478e 39fe175c e8b83407 1c7f8927
0 5 51 5 457 5 5 7 11 1 1 1 5 ae82ea21 9e5ce894 e1120103 13508380 25c83c98 7e0ccccf 6855ef53 0b153874 a73ee510 175d6c71 b7094596 d19a1cc6 1f9d2c38 07d13a8f 8cf98699 e58b9a62 3486227d a5bb7b8a 1d1eb838 a458ea53 2eb5be02 ad3062eb c7dc6720 45ab94c8 ea9a246c c84c4aec
0 0 0 15 2 20112 305 1 43 228 0 1 30 05db9164 0a519c5c 02cf9876 c18be181 25c83c98 7e0ccccf fe4e75fa 0b153874 a73ee510 6aea41c7 8f4f8f83 8fe001f4 8828a59c 07d13a8f 4ac81a35 36103458 d4bb7bd8 416e8695 e587c466 93bad2c0 3b183c5c
0 0 1 4 3 1689 184 12 46 53 0 1 3 8cf07265 207b2d81 d6be853a 4842a03d 384874ce fe6b92e5 209d1929 5b392875 a73ee510 9eff685e 87fe3e10 dadde5ca 3bd6c21d b28479f6 3c767806 6077db2c e5ba7672 395856b0 21ddcdc9 a458ea53 ae7b2d98 32c7478e b8942a02 001f3601 4a6648b5
0 4 0 89 4 486 5 4 7 4 1 1 4 05db9164 8947f767 22e8ec23 f8faa363 43b19349 7e0ccccf 45607029 51d76abe a73ee510 0ada1061 2b9f131d 18e1f914 aca10c14 1adce6ef ba8b8b16 226c87e7 d4bb7bd8 bd17c3da 656485cf a458ea53 0443b252 bcdee96c c73755d6 e8b83407 c23979db
1 39 2 1 3343 1 17 2 1 1 1 05db9164 5a88f1d5 16424a73 08694bce 30903e74 fe6b92e5 12c61956 5b392875 a73ee510 52486df2 94d2aad8 c76cdf17 f23a3825 b28479f6 e842876b ffe60785 e5ba7672 1adff463 5f0fcebd ad3062eb 3a171ecb 392bf8f1
0 2 1 2 8036 164 18 13 367 2 0 2 68fd1e64 287130e0 54597e12 12c911a7 384874ce 13718bbd 48b70cb6 985e3fcb a73ee510 6123dced 1736789a 8c51bef7 1a347339 b28479f6 9efd8b77 f8f7edbf 3486227d 891589e7 99f90f6d 5840adea 72b1423f 32c7478e af062947 ea9a246c b4a4615f
0 -1 71140 142 0 7 63 0 8cf07265 4c2bc594 d032c263 c18be181 25c83c98 fbad5c96 5d859d57 0b153874 7cc72ec2 cd481139 00adbfbb dfbb09fb d4b85d8d 8ceecbc8 7ac43a46 84898b2a 07c540c4 bc48b783 0014c32a 3a171ecb 3b183c5c
1 0 11 3 16184 125 2 3 103 2 0 3 05db9164 f0cf0024 6f67f7e5 41274cd7 25c83c98 94a113a4 0b153874 a73ee510 4ddb41b1 f47e21eb 623049e6 4f3f2bb1 b28479f6 e6c5b5cd c92f3b61 07c540c4 b04e4670 21ddcdc9 5840adea 60f6221e 32c7478e 43f13e8b ea9a246c 731c3655
0 140 2 2 0 2 2 0 2 5bfa8ab5 38a947a1 25c83c98 7e0ccccf 88002ee1 64523cfa 7cc72ec2 3b08e48b f1b78ab4 6e5da64f 07d13a8f c2b7aaa6 2005abd1 659bdb63 ad3062eb 32c7478e
0 2 0 7 443 37 7 34 282 1 4 7 7 3c9d8785 38a947a1 4470baf4 8c8a4c47 43b19349 fbad5c96 282b88fc 0b153874 a73ee510 0f1a2599 ea26a3ee bb669e25 0e5bc979 b28479f6 547b8c62 2b2ce127 8efede7f b133fcd4 2b796e4a 32c7478e 8d365d3b
1 2 1 60 75 61 121 52 39 248 1 8 1 77 05db9164 942f9a8d ab4a038c fea9881c 4cf72387 7e0ccccf 3f4ec687 0b153874 a73ee510 726f00fd c4adf918 2b7b1137 85dbe138 1adce6ef ae97ecc3 2dbf1d23 8efede7f 1f868fdd f44bef3c a458ea53 8adfc28d bcdee96c 3fdb382b 9d93af03 49d68486
0 1 34 2 42 328 44 15 49 58 1 9 0 42 05db9164 2c16a946 a65db9fb 9f43a1b5 25c83c98 1d794a16 5b392875 a73ee510 ed086ca2 4c9e8313 28156fd4 67b031b4 b28479f6 3628a186 87140baa e5ba7672 e4ca448c 67bb5322 32c7478e 9117a34a
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import paddle.fluid.incubate.data_generator as dg
cont_min_ = [0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
cont_max_ = [20, 600, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50]
#cont_diff_ = [20, 603, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50]
cont_diff_ = [cont_max_[i] - cont_min_[i] for i in range(len(cont_min_))]
continuous_range_ = range(1, 14)
categorical_range_ = range(14, 40)
line_num = 0
def reader(hash_dim_, paddle_train_data_dir):
files = [str(paddle_train_data_dir) + "/%s" % x for x in os.listdir(paddle_train_data_dir)]
for file in files:
with open(file, 'r') as f:
for line in f:
features = line.rstrip('\n').split('\t')
feat_idx = []
feat_value = []
for idx in continuous_range_:
feat_idx.append(hash('dense_feat_id' + str(idx)) % hash_dim_)
if features[idx] == '':
feat_value.append(0.0)
else:
feat_value.append(
(float(features[idx]) - cont_min_[idx - 1]) /
cont_diff_[idx - 1])
for idx in categorical_range_:
if features[idx] == '':
feat_idx.append(hash('sparse_feat_id' + str(idx)) % hash_dim_)
feat_value.append(0.0)
else:
feat_idx.append(
hash(str(idx) + features[idx]) % hash_dim_)
feat_value.append(1.0)
label = [int(features[0])]
yield feat_idx[:], feat_value[:], label[:]
def generate_sample(hash_dim_, paddle_train_data_dir):
files = [str(paddle_train_data_dir) + "/%s" % x for x in os.listdir(paddle_train_data_dir)]
#print("file_list : {}".format(files))
def reader():
for file in files:
with open(file, 'r') as f:
for line in f:
features = line.rstrip('\n').split('\t')
feat_idx = []
feat_value = []
for idx in continuous_range_:
#feat_idx.append(idx)
feat_idx.append(hash('dense_feat_id' + str(idx)) % hash_dim_)
if features[idx] == '':
feat_value.append(0.0)
else:
feat_value.append(
(float(features[idx]) - cont_min_[idx - 1]) /
cont_diff_[idx - 1])
for idx in categorical_range_:
if features[idx] == '':
feat_idx.append(hash('sparse_feat_id' + str(idx)) % hash_dim_)
feat_value.append(0.0)
else:
feat_idx.append(
hash(str(idx) + features[idx]) % hash_dim_)
feat_value.append(1.0)
label = [int(features[0])]
yield feat_idx[:], feat_value[:], label[:]
return reader
def train(hash_dim_, paddle_train_data_dir):
return generate_sample(hash_dim_, paddle_train_data_dir)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Evaluate accuracy.
"""
import numpy as np
import logging
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger("fluid")
logger.setLevel(logging.INFO)
def evaluate_accuracy(file1, file2):
"""
evaluate accuracy
"""
count = 0
same_count = 0
f1 = open(file1, 'r')
f2 = open(file2, 'r')
while 1:
line1 = f1.readline().strip('\n')
line2 = f2.readline().strip('\n')
if (not line1) or (not line2):
break
count += 1
if int(float(line1)) == int(1 if float(line2) > 0.5 else 0):
same_count += 1
logger.info("evaluate accuracy: ")
logger.info(float(same_count)/count)
return float(same_count)/count
if __name__ == '__main__':
#evaluate_accuracy("./mpc_data/label_mnist", "./mpc_infer_data/label_paddle")
evaluate_accuracy("./mpc_data/label_criteo", "./mpc_infer_data/label_mpc")
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import numpy as np
import time
import os
import logging
import errno
import paddle
import paddle.fluid as fluid
import paddle_fl.mpc as pfl_mpc
import paddle_fl.mpc.data_utils.aby3 as aby3
import args
import mpc_network
import process_data
import evaluate_accuracy
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger("fluid")
logger.setLevel(logging.INFO)
def load_model_and_infer(args):
# Init MPC
role = int(args.role)
pfl_mpc.init("aby3", role, "localhost", args.server, int(args.port))
place = fluid.CPUPlace()
exe = fluid.Executor(place)
# Input
BATCH_SIZE = args.batch_size
FIELD_NUM = args.num_field
FEATURE_NUM = args.sparse_feature_number + 1
feat_idx = pfl_mpc.data(name='feat_idx', shape=[BATCH_SIZE, FIELD_NUM, FEATURE_NUM], lod_level=1, dtype="int64")
feat_value = pfl_mpc.data(name='feat_value', shape=[BATCH_SIZE, FIELD_NUM], lod_level=0, dtype="int64")
label = pfl_mpc.data(name='label', shape=[BATCH_SIZE, 1], lod_level=1, dtype="int64")
# Prepare test data
mpc_data_dir = "./mpc_data/"
mpc_test_data_dir = mpc_data_dir + 'test/'
if not os.path.exists(mpc_test_data_dir):
raise ValueError("{}is not found. Please prepare encrypted data.".format(mpc_test_data_dir))
test_feature_idx_reader = aby3.load_aby3_shares(mpc_test_data_dir + "criteo_feature_idx", id=role, shape=(FIELD_NUM, FEATURE_NUM))
test_feature_value_reader = aby3.load_aby3_shares(mpc_test_data_dir + "criteo_feature_value", id=role, shape=(FIELD_NUM,))
test_label_reader = aby3.load_aby3_shares(mpc_test_data_dir + "criteo_label", id=role, shape=(1,))
test_batch_feature_idx = aby3.batch(test_feature_idx_reader, BATCH_SIZE, drop_last=True)
test_batch_feature_value = aby3.batch(test_feature_value_reader, BATCH_SIZE, drop_last=True)
test_batch_label = aby3.batch(test_label_reader, BATCH_SIZE, drop_last=True)
test_loader = fluid.io.DataLoader.from_generator(feed_list=[feat_idx, feat_value, label], capacity=BATCH_SIZE, drop_last=True)
test_batch_sample = paddle.reader.compose(test_batch_feature_idx, test_batch_feature_value, test_batch_label)
test_loader.set_batch_generator(test_batch_sample, places=place)
for i in range(args.epoch_num):
mpc_model_dir = './mpc_model/epoch{}/party{}'.format(i, role)
mpc_model_filename = '__model__'
infer(test_loader, role, exe, BATCH_SIZE, mpc_model_dir, mpc_model_filename)
def infer(test_loader, role, exe, BATCH_SIZE, mpc_model_dir, mpc_model_filename):
# Load mpc model
logger.info('Load model from {}'.format(mpc_model_dir))
infer_program, feed_targets, fetch_targets = aby3.load_mpc_model(exe=exe,
mpc_model_dir=mpc_model_dir,
mpc_model_filename=mpc_model_filename,
inference=True)
# Infer
logger.info('******************************************')
logger.info('Start Inferring...')
mpc_infer_data_dir = "./mpc_infer_data/"
if not os.path.exists(mpc_infer_data_dir):
try:
os.mkdir(mpc_infer_data_dir)
except OSError as e:
if e.errno != errno.EEXIST:
raise
prediction_file = mpc_infer_data_dir + "prediction.part{}".format(role)
if os.path.exists(prediction_file):
os.remove(prediction_file)
start_time = time.time()
for sample in test_loader():
prediction = exe.run(program=infer_program, feed=sample, fetch_list=fetch_targets)
with open(prediction_file, 'ab') as f:
f.write(np.array(prediction).tostring())
end_time = time.time()
logger.info('End Inferring...cost time: {}'.format(end_time - start_time))
logger.info('Start Evaluate Accuracy...')
cypher_file = mpc_infer_data_dir + "prediction"
decrypt_file= mpc_infer_data_dir + 'label_mpc'
time.sleep(0.1)
if role == 0:
if os.path.exists(decrypt_file):
os.remove(decrypt_file)
process_data.decrypt_data_to_file(cypher_file, (BATCH_SIZE, ), decrypt_file)
evaluate_accuracy.evaluate_accuracy('./mpc_infer_data/label_criteo', decrypt_file)
os.remove(decrypt_file)
end_time = time.time()
logger.info('End Evaluate Accuracy...cost time: {}'.format(end_time - start_time))
logger.info('******************************************')
if __name__ == '__main__':
args = args.parse_args()
load_model_and_infer(args)
"""
"""
import os
import math
import logging
import paddle
from paddle import fluid
import paddle_fl.mpc as pfl_mpc
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger('fluid')
logger.setLevel(logging.INFO)
def FM(args, inputs, seed=0):
init_value_ = 0.1
raw_feat_idx = inputs[0]
feat_idx = raw_feat_idx
raw_feat_value = inputs[1]
label = inputs[2]
feat_value = pfl_mpc.layers.reshape(
raw_feat_value,
[args.share_num, args.batch_size, args.num_field])
# ------------------------- first order term --------------------------
feat_idx_re = pfl_mpc.layers.reshape(
feat_idx,
[args.share_num, args.batch_size * args.num_field, args.sparse_feature_number + 1])
first_weights_re = pfl_mpc.input.embedding(
input=feat_idx_re,
is_sparse=False,
is_distributed=False,
dtype='int64',
size=[args.sparse_feature_number + 1, 1],
padding_idx=0,
)
first_weights = pfl_mpc.layers.reshape(
first_weights_re,
shape=[args.share_num, args.batch_size, args.num_field])
y_first_order = pfl_mpc.layers.reduce_sum((first_weights * feat_value), 2, keep_dim=True)
b_linear = pfl_mpc.layers.create_mpc_parameter(
shape=[1],
dtype='int64',
default_initializer=fluid.initializer.ConstantInitializer(
value=0))
# ------------------------- second order term --------------------------
feat_embeddings_re = pfl_mpc.input.embedding(
input=feat_idx_re,
is_sparse=False,
is_distributed=False,
dtype='int64',
size=[args.sparse_feature_number + 1, args.sparse_feature_dim],
padding_idx=0
)
feat_embeddings = pfl_mpc.layers.reshape(
feat_embeddings_re,
shape=[args.share_num, args.batch_size, args.num_field, args.sparse_feature_dim])
feat_embeddings = pfl_mpc.layers.elementwise_mul(feat_embeddings, feat_value, axis=0)
# sum_square part
summed_features_emb = pfl_mpc.layers.reduce_sum(
feat_embeddings, 2)
summed_features_emb_square = pfl_mpc.layers.square(
summed_features_emb)
# square_sum part
squared_features_emb = pfl_mpc.layers.square(
feat_embeddings)
squared_sum_features_emb = pfl_mpc.layers.reduce_sum(
squared_features_emb, 2)
y_FM_ = pfl_mpc.layers.reduce_sum(
summed_features_emb_square - squared_sum_features_emb,
dim=2,
keep_dim=True)
y_FM = pfl_mpc.layers.scale(y_FM_, 0.5)
# ------------------------- Predict --------------------------
cost = pfl_mpc.layers.sigmoid_cross_entropy_with_logits(y_first_order + y_FM + b_linear, label)
avg_cost = pfl_mpc.layers.reduce_sum(cost, 1)
return avg_cost, cost
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Process data for Criteo.
"""
import os
import time
import logging
import numpy as np
import six
import paddle
from paddle_fl.mpc.data_utils import aby3
import dataset_generator
import args
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger("fluid")
logger.setLevel(logging.INFO)
def generate_encrypted_data(args, mpc_data_dir, reader, label_filepath=None):
"""
generate encrypted samples
"""
def encrypt_feature_idx():
for instance in reader():
global count
feature_idx_ = np.array(instance[0])
feature_idx = np.eye(args.sparse_feature_number + 1)[feature_idx_.reshape(-1)]
yield aby3.make_shares(feature_idx)
def encrypt_feature_value():
for instance in reader():
#print(np.array(instance[1]).shape)
yield aby3.make_shares(np.array(instance[1]))
def encrypt_label():
for instance in reader():
#print(np.array(instance[2]))
if label_filepath != None:
with open(label_filepath, 'a+') as f:
f.write(str(instance[2][0]) + '\n')
yield aby3.make_shares(np.array(instance[2]))
aby3.save_aby3_shares(encrypt_label, mpc_data_dir + "criteo_label")
aby3.save_aby3_shares(encrypt_feature_value, mpc_data_dir + "criteo_feature_value")
aby3.save_aby3_shares(encrypt_feature_idx, mpc_data_dir + "criteo_feature_idx")
def load_decrypt_data(filepath, shape):
"""
load the encrypted data and reconstruct
"""
part_readers = []
for id in six.moves.range(3):
part_readers.append(aby3.load_aby3_shares(filepath, id=id, shape=shape))
aby3_share_reader = paddle.reader.compose(part_readers[0], part_readers[1], part_readers[2])
for instance in aby3_share_reader():
p = aby3.reconstruct(np.array(instance))
logger.info(p)
def decrypt_data_to_file(filepath, shape, decrypted_filepath):
"""
load the encrypted data (arithmetic share) and reconstruct to a file
"""
#while(not (os.path.exists(filepath + '.part0')
# and os.path.exists(filepath + '.part1')
# and os.path.exists(filepath + '.part2'))):
# time.sleep(0.1)
if os.path.exists(decrypted_filepath):
os.remove(decrypted_filepath)
part_readers = []
for id in six.moves.range(3):
part_readers.append(aby3.load_aby3_shares(filepath, id=id, shape=shape))
aby3_share_reader = paddle.reader.compose(part_readers[0], part_readers[1], part_readers[2])
for instance in aby3_share_reader():
p = aby3.reconstruct(np.array(instance))
with open(decrypted_filepath, 'a+') as f:
for i in p:
f.write(str(i) + '\n')
if __name__ == '__main__':
args = args.parse_args()
mpc_data_dir = './mpc_data/'
mpc_infer_data_dir = './mpc_infer_data/'
if not os.path.exists(mpc_data_dir):
os.mkdir(mpc_data_dir)
if not os.path.exists(mpc_infer_data_dir):
os.mkdir(mpc_infer_data_dir)
mpc_train_data_dir = mpc_data_dir + 'train/'
if not os.path.exists(mpc_train_data_dir):
os.mkdir(mpc_train_data_dir)
train_reader = dataset_generator.train(args.sparse_feature_number + 1, args.paddle_sample_data_dir)
generate_encrypted_data(args, mpc_train_data_dir, train_reader)
mpc_test_data_dir = mpc_data_dir + 'test/'
if not os.path.exists(mpc_test_data_dir):
os.mkdir(mpc_test_data_dir)
label_test_filepath = mpc_infer_data_dir + "label_criteo"
if os.path.exists(label_test_filepath):
os.remove(label_test_filepath)
test_reader = dataset_generator.train(args.sparse_feature_number + 1, args.paddle_sample_data_dir)
generate_encrypted_data(args, mpc_test_data_dir, test_reader, label_test_filepath)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import numpy as np
import time
import os
import logging
import errno
import paddle
import paddle.fluid as fluid
import paddle_fl.mpc as pfl_mpc
import paddle_fl.mpc.data_utils.aby3 as aby3
import args
import mpc_network
import process_data
import evaluate_accuracy
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger("fluid")
logger.setLevel(logging.INFO)
def train(args):
# Init MPC
role = int(args.role)
pfl_mpc.init("aby3", role, "localhost", args.server, int(args.port))
# Input and Network
BATCH_SIZE = args.batch_size
FIELD_NUM = args.num_field
FEATURE_NUM = args.sparse_feature_number + 1
feat_idx = pfl_mpc.data(name='feat_idx', shape=[BATCH_SIZE, FIELD_NUM, FEATURE_NUM], lod_level=1, dtype="int64")
feat_value = pfl_mpc.data(name='feat_value', shape=[BATCH_SIZE, FIELD_NUM], lod_level=0, dtype="int64")
label = pfl_mpc.data(name='label', shape=[BATCH_SIZE, 1], lod_level=1, dtype="int64")
inputs = [feat_idx] + [feat_value] + [label]
avg_cost, predict = mpc_network.FM(args, inputs, seed=2)
infer_program = fluid.default_main_program().clone(for_test=True)
optimizer = pfl_mpc.optimizer.SGD(args.base_lr)
optimizer.minimize(avg_cost)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
# Prepare train data
mpc_data_dir = "./mpc_data/"
mpc_train_data_dir = mpc_data_dir + 'train/'
if not os.path.exists(mpc_train_data_dir):
raise ValueError("{} is not found. Please prepare encrypted data.".format(mpc_train_data_dir))
feature_idx_reader = aby3.load_aby3_shares(mpc_train_data_dir + "criteo_feature_idx", id=role, shape=(FIELD_NUM, FEATURE_NUM))
feature_value_reader = aby3.load_aby3_shares(mpc_train_data_dir + "criteo_feature_value", id=role, shape=(FIELD_NUM,))
label_reader = aby3.load_aby3_shares(mpc_train_data_dir + "criteo_label", id=role, shape=(1,))
batch_feature_idx = aby3.batch(feature_idx_reader, BATCH_SIZE, drop_last=True)
batch_feature_value = aby3.batch(feature_value_reader, BATCH_SIZE, drop_last=True)
batch_label = aby3.batch(label_reader, BATCH_SIZE, drop_last=True)
loader = fluid.io.DataLoader.from_generator(feed_list=[feat_idx, feat_value, label], capacity=BATCH_SIZE)
batch_sample = paddle.reader.compose(batch_feature_idx, batch_feature_value, batch_label)
loader.set_batch_generator(batch_sample, places=place)
# Training
logger.info('******************************************')
logger.info('Start Training...')
logger.info('batch_size = {}, learning_rate = {}'.format(args.batch_size, args.base_lr))
mpc_model_basedir = "./mpc_model/"
start_time = time.time()
step = 0
for epoch_id in range(args.epoch_num):
for sample in loader():
step += 1
exe.run(feed=sample, fetch_list=[predict.name])
batch_end = time.time()
if step % 100 == 0:
print('Epoch={}, Step={}, current cost time: {}'.format(epoch_id, step, batch_end - start_time))
print('Epoch={}, current cost time: {}'.format(epoch_id, batch_end - start_time))
# For each epoch: save infer program
mpc_model_dir = mpc_model_basedir + "epoch{}/party{}".format(epoch_id, role)
fluid.io.save_inference_model(dirname=mpc_model_dir,
feeded_var_names=["feat_idx", "feat_value", "label"],
target_vars=[predict],
executor=exe,
main_program=infer_program,
model_filename="__model__")
logger.info('Model is saved in {}'.format(mpc_model_dir))
end_time = time.time()
print('Mpc Training of Epoch={} Batch_size={}, epoch_cost={:.4f} s'
.format(args.epoch_num, BATCH_SIZE, (end_time - start_time)))
logger.info('******************************************')
logger.info('End Training...')
if __name__ == '__main__':
args = args.parse_args()
train(args)
......@@ -37,6 +37,8 @@ from . import rnn
from .rnn import *
from . import metric_op
from .metric_op import *
from . import tensor
from .tensor import *
__all__ = []
__all__ += basic.__all__
......@@ -46,3 +48,4 @@ __all__ += ml.__all__
__all__ += compare.__all__
__all__ += conv.__all__
__all__ += metric_op.__all__
__all__ += tensor.__all__
......@@ -23,6 +23,7 @@ from ..mpc_layer_helper import MpcLayerHelper
__all__ = [
'elementwise_add',
'elementwise_sub',
'elementwise_mul',
]
......@@ -91,15 +92,40 @@ def elementwise_sub(x, y, axis=-1, act=None, name=None):
But the output only shares the LoD information with input $x$.
Args:
x (MpcVariable): The first input Tensor/LoDTensor of elementwise_sub_op.
y (MpcVariable): The second input Tensor/LoDTensor of elementwise_add_op.
y (MpcVariable): The second input Tensor/LoDTensor of elementwise_sub_op.
The dimensions of must be less than or equal to the dimensions of x.
axis: If X.dimension != Y.dimension, Y.dimension must be a subsequence of x.dimension.
axis: If X.dimension != Y.dimension, Y.dimension must be a subsequence of x.dimension. (TODO)
And axis is the start dimension index for broadcasting Y onto X.
act (string, optional): Activation applied to the output. Default is None.
name (string, optional): Name of the output. Default is None. It is used to print debug info for developers.
Returns:
MpcVariable(Tensor/LoDTensor): The output Tensor/LoDTensor of elementwise add op.
MpcVariable(Tensor/LoDTensor): The output Tensor/LoDTensor of elementwise sub op.
Examples: todo
"""
return _elementwise_op(MpcLayerHelper('elementwise_sub', **locals()))
def elementwise_mul(x, y, axis=-1, act=None, name=None):
"""
elementwise_mul Operator.
This operator is used to perform subtraction for input $x$ and $y$.
The equation is:
.. math::
Out = x * y
Both the input $x$ and $y$ can carry the LoD (Level of Details) information, or not.
But the output only shares the LoD information with input $x$.
Args:
x (MpcVariable): The first input Tensor/LoDTensor of elementwise_mul_op.
y (MpcVariable): The second input Tensor/LoDTensor of elementwise_mul_op.
The dimensions of must be equal to the dimensions of x.
axis: If X.dimension != Y.dimension, Y.dimension must be equal or (less than) (TODO) a subsequence of x.dimension.
And axis is the start dimension index for broadcasting Y onto X.
act (string, optional): Activation applied to the output. Default is None.
name (string, optional): Name of the output. Default is None. It is used to print debug info for developers.
Returns:
MpcVariable(Tensor/LoDTensor): The output Tensor/LoDTensor of elementwise mul op.
Examples: todo
"""
return _elementwise_op(MpcLayerHelper('elementwise_mul', **locals()))
......@@ -25,7 +25,8 @@ __all__ = [
'square',
'sum',
'square_error_cost',
'reduce_sum'
'reduce_sum',
'scale'
]
......@@ -200,3 +201,47 @@ def reduce_sum(input, dim=None, keep_dim=False, name=None):
return out
def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None):
"""
Scale operator.
Putting scale and bias to the input Tensor as following:
``bias_after_scale`` is True:
.. math::
Out=scale*X+bias
``bias_after_scale`` is False:
.. math::
Out=scale*(X+bias)
Args:
x(MpcVariable): Input N-D Tensor of scale operator. Data type should be int64.
scale(float|Variable): The scale factor of the input, it should be a float number or a Variable with shape [1] and data type as float32.
bias(float): The bias to be put on the input.
bias_after_scale(bool): Apply bias addition after or before scaling. It is useful for numeric stability in some circumstances.
act(str, optional): Activation applied to the output such as tanh, softmax, sigmoid, relu.
name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`
Returns:
Variable(Tensor|LoDTensor): Output tensor of scale operator, with shape and data type same as input.
Examples:
.. code-block:: python
import paddle_fl.mpc as pfl_mpc
pfl_mpc.init("aby3", int(args.role), "localhost", args.server, int(args.port))
data_1 = pfl_mpc.data(name='x', shape=[3, 3], dtype='int64')
pfl_mpc.layers.scale(data_1, 0.5)
"""
check_mpc_variable_and_dtype(x, "x", ['int64'], "scale")
inputs = {'X': [x]}
attrs = {
'bias': float(bias),
'bias_after_scale': bias_after_scale,
}
if isinstance(scale, MpcVariable):
inputs['ScaleTensor'] = [scale]
else:
attrs['scale'] = float(scale)
helper = MpcLayerHelper('scale', **locals())
out = helper.create_mpc_variable_for_type_inference(dtype=x.dtype)
helper.append_op(
type='mpc_scale', inputs=inputs, outputs={'Out': out}, attrs=attrs)
return helper.append_activation(out)
......@@ -22,7 +22,7 @@ from paddle.fluid.layers.layer_function_generator import OpProtoHolder
from ..framework import MpcVariable, create_mpc_var
supported_mpc_ops = ['__add__', '__radd__', '__sub__', '__rsub__']
supported_mpc_ops = ['__add__', '__radd__', '__sub__', '__rsub__', '__mul__', '__rmul__']
compare_ops = ['__gt__', '__ge__', '__lt__', '__le__', '__eq__', '__ne__']
supported_mpc_ops.extend(compare_ops)
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
basic tensor layers.
"""
import six
import numpy
from paddle.fluid.data_feeder import check_type, check_dtype
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Initializer
from ..framework import MpcVariable
from ..framework import check_mpc_variable_and_dtype
from ..mpc_layer_helper import MpcLayerHelper
__all__ = [
'create_mpc_parameter',
]
def create_mpc_parameter(shape,
dtype,
name=None,
attr=None,
is_bias=False,
default_initializer=None):
"""
:api_attr: Static Graph
This function creates a mpc parameter. The parameter is a learnable variable, which can have
gradient, and can be optimized.
NOTE: this is a very low-level API. This API is useful when you create
operator by your self. instead of using layers.
Parameters:
shape (list of int): Shape of the parameter
dtype (str): Data type of the parameter
name (str, optional): For detailed information, please refer to
:ref:`api_guide_Name` . Usually name is no need to set and None by default.
attr (ParamAttr, optional): Attributes of the parameter
is_bias (bool, optional): This can affect which default initializer is chosen
when default_initializer is None. If is_bias,
initializer.Constant(0.0) will be used. Otherwise,
Xavier() will be used.
default_initializer (Initializer, optional): Initializer for the parameter
Returns:
The created parameter.
Examples:
.. code-block:: python
import paddle_fl.mpc as pfl_mpc
pfl_mpc.init("aby3", role, "localhost", redis_server, redis_port)
W = pfl_mpc.layers.create_mpc_parameter(shape=[784, 200], dtype='int64')
"""
check_type(shape, 'shape', (list, tuple, numpy.ndarray), 'create_mpc_parameter')
for item in shape:
if six.PY2:
check_type(item, 'item of shape',
(int, long, numpy.uint8, numpy.int8, numpy.int16,
numpy.int32, numpy.int64), 'create_mpc_parameter')
else:
check_type(item, 'item of shape',
(int, numpy.uint8, numpy.int8, numpy.int16, numpy.int32,
numpy.int64), 'create_mpc_parameter')
check_dtype(dtype, 'dtype', ['int64'], 'create_mpc_parameter')
check_type(attr, 'attr', (type(None), ParamAttr), 'create_mpc_parameter')
check_type(default_initializer, 'default_initializer',
(type(None), Initializer), 'create_mpc_parameter')
helper = MpcLayerHelper("create_mpc_parameter", **locals())
if attr is None:
attr = ParamAttr(name=name)
return helper.create_mpc_parameter(attr, shape,
dtype, is_bias,
default_initializer)
......@@ -29,6 +29,7 @@ TEST_MODULES=("test_datautils_aby3"
"test_data_preprocessing"
"test_op_reshape"
"test_op_reduce_sum"
"test_op_elementwise_mul"
)
# run unittest
......
# Copyright (c) 2020 PmullePmulle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module test mul op.
"""
import unittest
from multiprocessing import Manager
import numpy as np
import paddle.fluid as fluid
import paddle_fl.mpc as pfl_mpc
import paddle_fl.mpc.data_utils.aby3 as aby3
import test_op_base
class TestOpMul(test_op_base.TestOpBase):
def elementwise_mul(self, **kwargs):
"""
Add two variables with one dimension.
:param kwargs:
:return:
"""
role = kwargs['role']
d_1 = kwargs['data_1'][role]
d_2 = kwargs['data_2'][role]
return_results = kwargs['return_results']
pfl_mpc.init("aby3", role, "localhost", self.server, int(self.port))
x = pfl_mpc.data(name='x', shape=[4], dtype='int64')
y = pfl_mpc.data(name='y', shape=[4], dtype='int64')
op_mul = pfl_mpc.layers.elementwise_mul(x=x, y=y)
exe = fluid.Executor(place=fluid.CPUPlace())
results = exe.run(feed={'x': d_1, 'y': d_2}, fetch_list=[op_mul])
self.assertEqual(results[0].shape, (2, 4))
return_results.append(results[0])
def multi_dim_mul(self, **kwargs):
"""
Add two variables with multi dimensions.
:return:
"""
role = kwargs['role']
d_1 = kwargs['data_1'][role]
d_2 = kwargs['data_2'][role]
return_results = kwargs['return_results']
pfl_mpc.init("aby3", role, "localhost", self.server, int(self.port))
x = pfl_mpc.data(name='x', shape=[2, 2], dtype='int64')
y = pfl_mpc.data(name='y', shape=[2, 2], dtype='int64')
math_mul = x * y
exe = fluid.Executor(place=fluid.CPUPlace())
results = exe.run(feed={'x': d_1, 'y': d_2}, fetch_list=[math_mul])
self.assertEqual(results[0].shape, (2, 2, 2))
return_results.append(results[0])
def diff_dim_mul(self, **kwargs):
"""
Add with different dimensions.
:param kwargs:
:return:
"""
role = kwargs['role']
d_1 = kwargs['data_1'][role]
d_2 = kwargs['data_2'][role]
return_results = kwargs['return_results']
pfl_mpc.init("aby3", role, "localhost", self.server, int(self.port))
x = pfl_mpc.data(name='x', shape=[3, 4], dtype='int64')
y = pfl_mpc.data(name='y', shape=[4], dtype='int64')
math_mul = x * y
exe = fluid.Executor(place=fluid.CPUPlace())
results = exe.run(feed={'x': d_1, 'y': d_2}, fetch_list=[math_mul])
self.assertEqual(results[0].shape, (2, 3, 4))
return_results.append(results[0])
def diff_dim_mul_mid(self, **kwargs):
"""
Add with different dimensions.
:param kwargs:
:return:
"""
role = kwargs['role']
d_1 = kwargs['data_1'][role]
d_2 = kwargs['data_2'][role]
return_results = kwargs['return_results']
pfl_mpc.init("aby3", role, "localhost", self.server, int(self.port))
x = pfl_mpc.data(name='x', shape=[3, 4, 2], dtype='int64')
y = pfl_mpc.data(name='y', shape=[3, 4], dtype='int64')
math_mul = pfl_mpc.layers.elementwise_mul(x, y, axis=0)
exe = fluid.Executor(place=fluid.CPUPlace())
results = exe.run(feed={'x': d_1, 'y': d_2}, fetch_list=[math_mul])
self.assertEqual(results[0].shape, (2, 3, 4, 2))
return_results.append(results[0])
def test_elementwise_mul(self):
data_1 = np.full(shape=(4), fill_value=2)
data_2 = np.full(shape=(4), fill_value=3)
data_1_shares = aby3.make_shares(data_1)
data_2_shares = aby3.make_shares(data_2)
data_1_all3shares = np.array([aby3.get_aby3_shares(data_1_shares, i) for i in range(3)])
data_2_all3shares = np.array([aby3.get_aby3_shares(data_2_shares, i) for i in range(3)])
return_results = Manager().list()
ret = self.multi_party_run(target=self.elementwise_mul,
data_1=data_1_all3shares,
data_2=data_2_all3shares,
return_results=return_results)
self.assertEqual(ret[0], True)
revealed = aby3.reconstruct(np.array(return_results))
expected_out = np.array([[6, 6, 6, 6]])
self.assertTrue(np.allclose(revealed, expected_out, atol=1e-4))
def test_multi_dim_mul(self):
data_1 = np.full(shape=(2, 2), fill_value=2)
data_2 = np.full(shape=(2, 2), fill_value=3)
data_1_shares = aby3.make_shares(data_1)
data_2_shares = aby3.make_shares(data_2)
data_1_all3shares = np.array([aby3.get_aby3_shares(data_1_shares, i) for i in range(3)])
data_2_all3shares = np.array([aby3.get_aby3_shares(data_2_shares, i) for i in range(3)])
return_results = Manager().list()
ret = self.multi_party_run(target=self.multi_dim_mul,
data_1=data_1_all3shares,
data_2=data_2_all3shares,
return_results=return_results)
self.assertEqual(ret[0], True)
revealed = aby3.reconstruct(np.array(return_results))
expected_out = np.array([[6, 6], [6, 6]])
self.assertTrue(np.allclose(revealed, expected_out, atol=1e-4))
def test_diff_dim_mul(self):
data_1 = np.full((3, 4), fill_value=2)
data_2 = np.full((4), fill_value=2)
data_1_shares = aby3.make_shares(data_1)
data_2_shares = aby3.make_shares(data_2)
data_1_all3shares = np.array([aby3.get_aby3_shares(data_1_shares, i) for i in range(3)])
data_2_all3shares = np.array([aby3.get_aby3_shares(data_2_shares, i) for i in range(3)])
return_results = Manager().list()
ret = self.multi_party_run(target=self.diff_dim_mul,
data_1=data_1_all3shares,
data_2=data_2_all3shares,
return_results=return_results)
self.assertEqual(ret[0], True)
revealed = aby3.reconstruct(np.array(return_results))
expected_out = np.array([[4, 4, 4, 4], [4, 4, 4, 4], [4, 4, 4, 4]])
self.assertTrue(np.allclose(revealed, expected_out, atol=1e-4))
def test_diff_dim_mul_mid(self):
data_1 = np.full((3, 4, 2), fill_value=2)
data_2 = np.full((3, 4,), fill_value=1.5)
#data_2 = np.ones((4,))
# print(data_1)
# print(data_2)
data_1_shares = aby3.make_shares(data_1)
data_2_shares = aby3.make_shares(data_2)
data_1_all3shares = np.array([aby3.get_aby3_shares(data_1_shares, i) for i in range(3)])
data_2_all3shares = np.array([aby3.get_aby3_shares(data_2_shares, i) for i in range(3)])
return_results = Manager().list()
ret = self.multi_party_run(target=self.diff_dim_mul_mid,
data_1=data_1_all3shares,
data_2=data_2_all3shares,
return_results=return_results)
self.assertEqual(ret[0], True)
revealed = aby3.reconstruct(np.array(return_results))
expected_out = np.array([[[3, 3], [3, 3], [3, 3], [3, 3]],
[[3, 3], [3, 3], [3, 3], [3, 3]],
[[3, 3], [3, 3], [3, 3], [3, 3]]])
self.assertTrue(np.allclose(revealed, expected_out, atol=1e-4))
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册