“45af8c1e99333d807c052277220b0fd01b2bd18a”上不存在“...operators/distributed/grpc/grpc_bytebuffer_stream.cc”
提交 b8d8ee2b 编写于 作者: Y yangqingyou

Merge branch 'master' of https://github.com/PaddlePaddle/PaddleFL into refactor_context

Conflicts:
	core/paddlefl_mpc/mpc_protocol/abstract_context.h
	core/paddlefl_mpc/mpc_protocol/aby3_operators.h
	core/privc3/boolean_tensor.h
	core/privc3/boolean_tensor_impl.h
	core/privc3/fixedpoint_tensor.h
......@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.15)
project(PaddleEncrypted)
add_compile_options(-msse4.2 -maes -fPIC -DPADDLE_WITH_MKLDNN)
add_compile_options(-msse4.2 -fPIC -DPADDLE_WITH_MKLDNN -O2)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
set(CMAKE_CXX_STANDARD 11)
......@@ -34,8 +34,8 @@ execute_process(COMMAND ${PYTHON} -c "import paddle;print(paddle.version.full_ve
RESULT_VARIABLE ret OUTPUT_VARIABLE paddle_version OUTPUT_STRIP_TRAILING_WHITESPACE)
if (NOT ret)
if (NOT ${paddle_version} STREQUAL "1.8.0")
message(FATAL_ERROR "Paddle installation of 1.8.0 is required but ${paddle_version} is found")
if (NOT ${paddle_version} STRGREATER_EQUAL "1.8.0")
message(FATAL_ERROR "Paddle installation of >= 1.8.0 is required but ${paddle_version} is found")
endif()
else()
message(FATAL_ERROR "Could not get paddle version.")
......@@ -57,6 +57,10 @@ option(WITH_TESTING "Compile with unit testing" ON)
option(WITH_PSI "Compile with psi lib" ON)
option(USE_AES_NI "Compile with AES NI" ON)
option(USE_OPENMP "Compile with OpenMP" ON)
########################### the project build part ###############################
message(STATUS "Using paddlepaddle installation of ${paddle_version}")
message(STATUS "paddlepaddle include directory: ${PADDLE_INCLUDE}")
......@@ -70,6 +74,15 @@ include_directories(.)
include_directories(${PADDLE_INCLUDE})
include_directories(${PADDLE_INCLUDE}/third_party)
if (USE_AES_NI)
add_compile_definitions(USE_AES_NI)
add_compile_options(-maes)
endif (USE_AES_NI)
if (USE_OPENMP)
add_compile_options(-fopenmp)
find_package(OpenMP REQUIRED)
endif(USE_OPENMP)
add_subdirectory(core/privc3)
add_subdirectory(core/paddlefl_mpc/mpc_protocol)
......
<img src='https://github.com/PaddlePaddle/PaddleFL/blob/master/docs/source/_static/FL-logo.png' width = "400" height = "160">
[DOC](https://paddlefl.readthedocs.io/en/latest/) | [Quick Start](https://paddlefl.readthedocs.io/en/latest/instruction.html) | [中文](./README_cn.md)
[DOC](https://paddlefl.readthedocs.io/en/latest/) | [Quick Start](https://paddlefl.readthedocs.io/en/latest/compile_and_intall.html) | [中文](./README_cn.md)
PaddleFL is an open source federated learning framework based on PaddlePaddle. Researchers can easily replicate and compare different federated learning algorithms with PaddleFL. Developers can also benefit from PaddleFL in that it is easy to deploy a federated learning system in large scale distributed clusters. In PaddleFL, serveral federated learning strategies will be provided with application in computer vision, natural language processing, recommendation and so on. Application of traditional machine learning training strategies such as Multi-task learning, Transfer Learning in Federated Learning settings will be provided. Based on PaddlePaddle's large scale distributed training and elastic scheduling of training job on Kubernetes, PaddleFL can be easily deployed based on full-stack open sourced software.
......@@ -42,7 +42,7 @@ We **highly recommend** to run PaddleFL in Docker
```sh
#Pull and run the docker
docker pull hub.baidubce.com/paddlefl/paddle_fl:latest
docker run --name <docker_name> --net=host -it -v $PWD:/root <image id> /bin/bash
docker run --name <docker_name> --net=host -it -v $PWD:/paddle <image id> /bin/bash
#Install paddle_fl
pip install paddle_fl
......
......@@ -39,7 +39,7 @@ PaddleFL 中主要提供两种解决方案:**Data Parallel** 以及 **Federate
```sh
#Pull and run the docker
docker pull hub.baidubce.com/paddlefl/paddle_fl:latest
docker run --name <docker_name> --net=host -it -v $PWD:/root <image id> /bin/bash
docker run --name <docker_name> --net=host -it -v $PWD:/paddle <image id> /bin/bash
#Install paddle_fl
pip install paddle_fl
......
add_compile_options(-msse4.2 -maes)
set(PYBIND_SRCS
"./data_utils.cc"
)
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <atomic>
#include <set>
......@@ -21,8 +21,8 @@
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include "core/paddlefl_mpc/mpc_protocol/aby3_operators.h"
#include "core/privc3/fixedpoint_util.h"
#include "core/paddlefl_mpc/mpc_protocol/aby3_operators.h"
#include "core/psi/psi_api.h"
namespace py = pybind11;
......@@ -30,12 +30,13 @@ namespace py = pybind11;
namespace aby3 {
// split plaintext into three shares.
template <typename T, size_t N> py::array_t<T> share(double input) {
template<typename T, size_t N>
py::array_t<T> share(double input) {
size_t share_num = 3;
auto shares = py::array_t<T>(share_num);
py::buffer_info shares_buf = shares.request();
T *shares_buf_ptr = (T *)shares_buf.ptr;
T *ret_ptr[share_num];
T* shares_buf_ptr = (T*)shares_buf.ptr;
T* ret_ptr[share_num];
for (size_t i = 0; i < share_num; ++i) {
ret_ptr[i] = &shares_buf_ptr[i];
}
......@@ -46,10 +47,11 @@ template <typename T, size_t N> py::array_t<T> share(double input) {
}
// combine three shares to reveal plaintext.
template <typename T, size_t N> double reveal(py::array_t<T> shares) {
template<typename T, size_t N>
double reveal(py::array_t<T> shares) {
size_t share_num = 3;
py::buffer_info shares_buf = shares.request();
T *shares_buf_ptr = (T *)shares_buf.ptr;
T *shares_buf_ptr = (T *) shares_buf.ptr;
T *ret[share_num];
for (size_t idx = 0; idx < share_num; ++idx) {
......@@ -62,14 +64,15 @@ template <typename T, size_t N> double reveal(py::array_t<T> shares) {
}
// call psi_send
int send_psi(int port, const std::set<std::string> &input) {
int send_psi(int port, const std::set<std::string>& input) {
std::atomic<int> prog(0);
return psi::psi_send(port, input, &prog);
}
// call psi_recv
std::vector<std::string> recv_psi(const std::string &remote_ip, int port,
const std::set<std::string> &input) {
std::vector<std::string> recv_psi(const std::string &remote_ip,
int port,
const std::set<std::string>& input) {
std::vector<std::string> output;
std::atomic<int> prog(0);
int ret = psi::psi_recv(remote_ip, port, input, &output, &prog);
......@@ -80,7 +83,8 @@ std::vector<std::string> recv_psi(const std::string &remote_ip, int port,
return output;
}
PYBIND11_MODULE(mpc_data_utils, m) {
PYBIND11_MODULE(mpc_data_utils, m)
{
// optional module docstring
m.doc() = "pybind11 paddle-mpc plugin: data_utils (share, reveal, psi)";
......@@ -90,8 +94,11 @@ PYBIND11_MODULE(mpc_data_utils, m) {
"combine three shares to reveal plaintext.");
m.def("send_psi", &send_psi, "Send input in two party PSI.");
m.def("recv_psi", &recv_psi,
"Send input and return PSI result as output in two party PSI.");
m.def("recv_psi", &recv_psi, "Send input and return PSI result as output in two party PSI.");
m.attr("mpc_one_share") = (1 << paddle::mpc::ABY3_SCALING_FACTOR) / 3;
}
} // namespace aby3
add_compile_options(-msse4.2 -maes)
set(PROTO_SRCS
"./aby3_protocol.cc"
"./mesh_network.cc"
......@@ -17,3 +15,5 @@ target_link_libraries(mpc_protocol fluid_framework gloo hiredis privc3)
cc_test(mesh_network_test SRCS mesh_network_test.cc DEPS mpc_protocol)
cc_test(mpc_protocol_test SRCS mpc_protocol_test.cc DEPS mpc_protocol)
cc_test(mpc_instance_test SRCS mpc_instance_test.cc DEPS mpc_protocol)
......@@ -13,7 +13,6 @@
// limitations under the License.
#pragma once
#include <algorithm>
#include <algorithm>
#include <memory>
......@@ -30,13 +29,6 @@ using PseudorandomNumberGenerator = psi::PseudorandomNumberGenerator;
class AbstractContext {
public:
/*
AbstractContext(size_t party, std::shared_ptr<AbstractNetwork> network,
const block &seed = psi::g_zero_block,
const block &seed2 = psi::g_zero_block) {
init(party, network, seed, seed2);
}
*/
AbstractContext() = default;
AbstractContext(const AbstractContext &other) = delete;
......@@ -53,7 +45,7 @@ public:
}
void set_num_party(size_t num_party) {
PADDLE_ENFORCE_TRUE(num_party == 2 || num_party == 3,
PADDLE_ENFORCE_EQ(num_party == 2 || num_party == 3, true,
"2 or 3 party protocol is supported.");
_num_party = num_party;
}
......@@ -177,10 +169,9 @@ public:
private:
size_t _num_party;
size_t _party;
std::shared_ptr<AbstractNetwork> _network;
PseudorandomNumberGenerator _prng[3];
};
} // namespace mpc
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
// Description: implementations of each virtual op according to ABY3 protocol
......@@ -24,6 +24,7 @@
#include "core/privc3/boolean_tensor.h"
#include "core/privc3/aby3_context.h"
#include "core/privc3/fixedpoint_tensor.h"
#include "core/privc3/boolean_tensor.h"
#include "core/privc3/paddle_tensor.h"
namespace paddle {
......@@ -32,13 +33,14 @@ namespace mpc {
using paddle::framework::Tensor;
using aby3::ABY3Context;
// TODO: decide scaling factor
const size_t ABY3_SCALING_FACTOR = 16;
const size_t ABY3_SCALING_FACTOR = FIXED_POINTER_SCALING_FACTOR;
using FixedTensor = aby3::FixedPointTensor<int64_t, ABY3_SCALING_FACTOR>;
using BoolTensor = aby3::BooleanTensor<int64_t>;
using PaddleTensor = aby3::PaddleTensor<int64_t>;
class Aby3OperatorsImpl : public MpcOperators {
public:
void add(const Tensor *lhs, const Tensor *rhs, Tensor *out) override {
auto lhs_tuple = from_tensor(lhs);
......@@ -50,6 +52,7 @@ public:
auto out_ = std::get<0>(out_tuple).get();
lhs_->add(rhs_, out_);
}
// TODO: override
......@@ -122,8 +125,7 @@ public:
auto out_ = std::get<0>(out_tuple).get();
PaddleTensor scale_tensor(ContextHolder::device_ctx());
scale_tensor.from_float_point_scalar(factor, lhs_->shape(),
ABY3_SCALING_FACTOR);
scale_tensor.from_float_point_scalar(factor, lhs_->shape(), ABY3_SCALING_FACTOR);
lhs_->mul(&scale_tensor, out_);
}
......@@ -138,6 +140,18 @@ public:
op_->relu(out_);
}
void relu_with_derivative(const Tensor *op, Tensor *out, Tensor *derivative) override {
auto op_tuple = from_tensor(op);
auto out_tuple = from_tensor(out);
auto der_tuple = from_tensor<BoolTensor>(derivative);
auto op_ = std::get<0>(op_tuple).get();
auto out_ = std::get<0>(out_tuple).get();
auto der_ = std::get<0>(der_tuple).get();
op_->relu_with_derivative(out_, der_);
}
void sigmoid(const Tensor *op, Tensor *out) override {
auto op_tuple = from_tensor(op);
auto out_tuple = from_tensor(out);
......@@ -148,14 +162,34 @@ public:
op_->sigmoid(out_);
}
void softmax(const Tensor *op, Tensor *out) override {
void sigmoid_enhanced(const Tensor *op, Tensor *out) override {
auto op_tuple = from_tensor(op);
auto out_tuple = from_tensor(out);
auto op_ = std::get<0>(op_tuple).get();
auto out_ = std::get<0>(out_tuple).get();
op_->sigmoid_enhanced(out_);
}
void sigmoid_chebyshev(const Tensor *op, Tensor *out) override {
auto op_tuple = from_tensor(op);
auto out_tuple = from_tensor(out);
auto op_ = std::get<0>(op_tuple).get();
auto out_ = std::get<0>(out_tuple).get();
op_->sigmoid_chebyshev(out_);
}
void softmax(const Tensor *op, Tensor *out, bool use_relu, bool use_long_div) override {
auto op_tuple = from_tensor(op);
auto out_tuple = from_tensor(out);
auto op_ = std::get<0>(op_tuple).get();
auto out_ = std::get<0>(out_tuple).get();
op_->softmax(out_);
op_->softmax(out_, use_relu, use_long_div);
}
void gt(const Tensor *lhs, const Tensor *rhs, Tensor *out) override {
......@@ -239,8 +273,8 @@ public:
out->data<int64_t>(), [](int64_t b) { return 1 - b; });
}
void relu_grad(const Tensor *y, const Tensor *dy, Tensor *dx,
float point = 0.0f) override {
void relu_grad(const Tensor *y, const Tensor *dy,
Tensor *dx, float point = 0.0f) override {
auto y_tuple = from_tensor(y);
......@@ -248,8 +282,7 @@ public:
PaddleTensor point_(ContextHolder::device_ctx());
point_.from_float_point_scalar<float>(point, y_->shape(),
ABY3_SCALING_FACTOR);
point_.from_float_point_scalar<float>(point, y_->shape(), ABY3_SCALING_FACTOR);
auto tmp0 = ContextHolder::tensor_factory()->create_int64_t(y_->shape());
auto tmp1 = ContextHolder::tensor_factory()->create_int64_t(y_->shape());
......@@ -267,24 +300,77 @@ public:
bool_out.mul(dy_, out_);
}
void arith_bool_mul(const Tensor* op_a, const Tensor* op_b, Tensor* out) override {
auto a_tuple = from_tensor(op_a);
auto a_ = std::get<0>(a_tuple).get();
auto b_tuple = from_tensor<BoolTensor>(op_b);
auto b_ = std::get<0>(b_tuple).get();
auto out_tuple = from_tensor(out);
auto out_ = std::get<0>(out_tuple).get();
b_->mul(a_, out_);
}
void max_pooling(const Tensor* in, Tensor* out, Tensor* pos_info) override {
auto a_tuple = from_tensor(in);
auto a_ = std::get<0>(a_tuple).get();
auto b_tuple = from_tensor<BoolTensor>(pos_info);
auto b_ = std::get<0>(b_tuple).get();
auto out_tuple = from_tensor(out);
auto out_ = std::get<0>(out_tuple).get();
a_->max_pooling(out_, b_);
}
void inverse_square_root(const Tensor* in, Tensor* out) override {
auto x_tuple = from_tensor(in);
auto x_ = std::get<0>(x_tuple).get();
auto y_tuple = from_tensor(out);
auto y_ = std::get<0>(y_tuple).get();
x_->inverse_square_root(y_);
}
private:
std::tuple<std::shared_ptr<FixedTensor>, std::shared_ptr<PaddleTensor>,
std::shared_ptr<PaddleTensor>>
from_tensor(const Tensor *t) {
template <typename T>
std::tuple<
std::shared_ptr<T>,
std::shared_ptr<PaddleTensor>,
std::shared_ptr<PaddleTensor> > from_tensor(const Tensor* t) {
PADDLE_ENFORCE_EQ(t->dims()[0], 2);
auto pt0 = std::make_shared<PaddleTensor>(ContextHolder::device_ctx(),
t->Slice(0, 1));
auto pt1 = std::make_shared<PaddleTensor>(ContextHolder::device_ctx(),
t->Slice(1, 2));
auto pt0 = std::make_shared<PaddleTensor>(ContextHolder::device_ctx(), t->Slice(0, 1));
auto pt1 = std::make_shared<PaddleTensor>(ContextHolder::device_ctx(), t->Slice(1, 2));
aby3::TensorAdapter<int64_t> *pt_array[2] = {pt0.get(), pt1.get()};
// remove leading 1 in shape
auto shape = pt0->shape();
shape.erase(shape.begin());
pt0->reshape(shape);
pt1->reshape(shape);
auto ft = std::make_shared<FixedTensor>(pt_array);
aby3::TensorAdapter<int64_t>* pt_array[2] = {pt0.get(), pt1.get()};
auto ft = std::make_shared<T>(pt_array);
return std::make_tuple(ft, pt0, pt1);
}
std::tuple<
std::shared_ptr<FixedTensor>,
std::shared_ptr<PaddleTensor>,
std::shared_ptr<PaddleTensor> > from_tensor(const Tensor* t) {
return from_tensor<FixedTensor>(t);
}
};
} // mpc
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
// Description:
// abstract mpc operation interface
......@@ -24,6 +24,9 @@ namespace mpc {
using paddle::framework::Tensor;
// TODO: decide scaling factor
const size_t FIXED_POINTER_SCALING_FACTOR = 16;
class MpcOperators {
public:
virtual void add(const Tensor *lhs, const Tensor *rhs, Tensor *out) = 0;
......@@ -42,9 +45,16 @@ public:
virtual void relu(const Tensor *op, Tensor *out) = 0;
virtual void relu_with_derivative(const Tensor *op, Tensor *out,
Tensor *derivative) = 0;
virtual void sigmoid(const Tensor *op, Tensor *out) = 0;
virtual void softmax(const Tensor *op, Tensor *out) = 0;
virtual void sigmoid_enhanced(const Tensor *op, Tensor *out) = 0;
virtual void sigmoid_chebyshev(const Tensor *op, Tensor *out) = 0;
virtual void softmax(const Tensor *op, Tensor *out, bool use_relu, bool use_long_div) = 0;
virtual void gt(const Tensor *lhs, const Tensor *rhs, Tensor *out) = 0;
......@@ -58,9 +68,23 @@ public:
virtual void neq(const Tensor *lhs, const Tensor *rhs, Tensor *out) = 0;
virtual void relu_grad(const Tensor *y, const Tensor *dy, Tensor *dx,
const float point) = 0;
virtual void relu_grad(const Tensor *y, const Tensor *dy, Tensor *dx, const float point) = 0;
// arithmetic tensor mult boolean tensor, element-wisely
// see [ABY3, sec 5.4.1]
// for aby3 only
// example (in plaintext):
// [1, 2, 3, 4] * [0, 0, 1, 0] = [0, 0, 3, 0]
virtual void arith_bool_mul(const Tensor* op_a, const Tensor* op_b, Tensor* out) {}
// max pooling in which shape of filter is nx1
// pos_info keeps which element is max in a col, for backward grad
// for filter in other shape, reshape input first
virtual void max_pooling(const Tensor* in, Tensor* out, Tensor* pos_info) {}
virtual void inverse_square_root(const Tensor* in, Tensor* out) = 0;
};
} // mpc
} // paddle
add_compile_options(-msse4.2 -maes)
aux_source_directory(. DIR_SRCS)
add_library(mpc_ops_o OBJECT ${DIR_SRCS})
aux_source_directory(./math MATH_SRCS)
add_library(mpc_ops_o OBJECT ${DIR_SRCS} ${MATH_SRCS})
add_dependencies(mpc_ops_o fluid_framework gloo)
add_library(mpc_ops STATIC $<TARGET_OBJECTS:mpc_ops_o>)
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "./conv_op.h"
#include <memory>
#include <string>
#include <vector>
namespace paddle {
namespace operators {
std::vector<int64_t> ConvOp::ComputeOutputShape(
framework::InferShapeContext* ctx) const {
OP_INOUT_CHECK(ctx->HasInput("Input"), "Input", "Input", "Conv");
OP_INOUT_CHECK(ctx->HasInput("Filter"), "Input", "Filter", "Conv");
auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter");
std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
std::string padding_algorithm =
ctx->Attrs().Get<std::string>("padding_algorithm");
int groups = ctx->Attrs().Get<int>("groups");
std::vector<int> dilations = ctx->Attrs().Get<std::vector<int>>("dilations");
const std::string data_format = ctx->Attrs().Get<std::string>("data_format");
// MKL-DNN Kernels are using NCHW order of dims description
// so we ignore data_format consideration for MKL-DNN kernel
const bool channel_last = (this->IsMKLDNNType() == false) &&
(data_format == "NHWC" || data_format == "NDHWC");
PADDLE_ENFORCE_EQ(
// 1 for share dim
in_dims.size() == 4 + 1 || in_dims.size() == 5 + 1, true,
platform::errors::InvalidArgument(
"The input of Op(Conv) should be a 4-D or 5-D Tensor. But "
"received: input's dimension is %u, input's shape is [%s].",
in_dims.size(), in_dims));
PADDLE_ENFORCE_EQ(
in_dims.size(), filter_dims.size(),
platform::errors::InvalidArgument(
"The input's dimension and filter's dimension of "
"Op(Conv) should be equal. But received: the input's shape is [%s], "
"the input's dimension is %d; the filter's shape is [%s], "
"the filter's dimension is %d.",
in_dims, in_dims.size(), filter_dims, filter_dims.size()));
int in_sub_stride_size = in_dims.size() - strides.size();
PADDLE_ENFORCE_EQ(
in_dims.size(), strides.size() + 2U + 1,
platform::errors::InvalidArgument(
"The difference of input's dimension and Attr(strides)'s "
"length must be euqal to 2 for Op(Conv). "
"But received: input's dimension is %d, input's shape is [%s]; "
"Attr(stride)'s length is %d, Attr(stride) is [%s]; "
"difference of input's dimention and Attr(strides)'s length = %u.",
in_dims.size(), in_dims, strides.size(),
framework::make_ddim(strides), in_sub_stride_size));
const auto input_channels =
channel_last ? in_dims[in_dims.size() - 1] : in_dims[1 + 1];
PADDLE_ENFORCE_EQ(
input_channels, filter_dims[1 + 1] * groups,
platform::errors::InvalidArgument(
"The number of input's channels should be equal to filter's channels "
"* groups for Op(Conv). But received: the input's channels is %d, "
"the input's shape is [%s]; the filter's channels is %d, the "
"filter's shape is [%s]; the groups is %d, the data_format is %s. "
"The error may come from wrong data_format setting.",
input_channels, in_dims, filter_dims[1 + 1], filter_dims, groups,
data_format));
PADDLE_ENFORCE_EQ(
filter_dims[0 + 1] % groups, 0,
platform::errors::InvalidArgument(
"The number of output's channels (filter's first dimension) of "
"Op(Conv) should be divided by groups. But received: "
"the output channels is %d, the filter's shape is [%s], "
"the groups is %d.",
filter_dims[0 + 1], filter_dims, groups));
framework::DDim in_data_dims;
if (channel_last) {
in_data_dims = framework::slice_ddim(in_dims, 1 + 1, in_dims.size() - 1);
} else {
in_data_dims = framework::slice_ddim(in_dims, 2 + 1, in_dims.size());
}
framework::DDim filter_data_dims =
framework::slice_ddim(filter_dims, 2 + 1, filter_dims.size());
std::vector<int> ksize = framework::vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
in_data_dims, strides, ksize);
std::vector<int64_t> output_shape({in_dims[0], in_dims[1]});
if (!channel_last) {
output_shape.push_back(filter_dims[0 + 1]);
}
for (int i = 0; i < in_data_dims.size(); ++i) {
if ((!ctx->IsRuntime()) &&
(in_data_dims[i] <= 0 || filter_dims[i + 2] <= 0)) {
output_shape.push_back(-1);
} else {
output_shape.push_back(
ConvOutputSize(in_data_dims[i], filter_data_dims[i], dilations[i],
paddings[2 * i], paddings[2 * i + 1], strides[i]));
}
}
if (channel_last) {
output_shape.push_back(filter_dims[1]);
}
return output_shape;
}
framework::OpKernelType ConvOp::GetExpectedKernelType(
const framework::ExecutionContext& ctx) const {
int customized_type_value =
framework::OpKernelType::kDefaultCustomizedTypeValue;
framework::LibraryType library{framework::LibraryType::kPlain};
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input");
std::string data_format =
"AnyLayout"; // todo enable data layout when it's ready
framework::DataLayout layout = framework::StringToDataLayout(data_format);
if (input_data_type != framework::proto::VarType::INT8 &&
input_data_type != framework::proto::VarType::UINT8) {
auto filter_data_type = ctx.Input<Tensor>("Filter")->type();
PADDLE_ENFORCE_EQ(input_data_type, filter_data_type,
platform::errors::InvalidArgument(
"input and filter data type should be consistent"));
}
if (input_data_type == framework::proto::VarType::FP16) {
PADDLE_ENFORCE_EQ(library, framework::LibraryType::kCUDNN,
platform::errors::InvalidArgument(
"float16 can only be used when CUDNN is used"));
}
auto type = framework::OpKernelType(input_data_type, ctx.GetPlace(), layout,
library, customized_type_value);
return type;
}
framework::OpKernelType ConvOp::GetKernelTypeForVar(
const std::string& var_name, const Tensor& tensor,
const framework::OpKernelType& expected_kernel_type) const {
return framework::OpKernelType(expected_kernel_type.data_type_,
tensor.place(), tensor.layout());
}
void Conv2DOpMaker::Make() {
AddAttr<bool>("is_test",
"(bool, default false) Set to true for inference only, false "
"for training. Some layers may run faster when this is true.")
.SetDefault(false);
AddInput("Input",
"(Tensor) The input tensor of convolution operator. "
"The format of input tensor is NCHW or NHWC, where N is batch size, "
"C is the "
"number of channels, H is the height of the feature, "
"and W is the width of the feature.");
AddInput("Filter",
"(Tensor) The filter tensor of convolution operator. "
"The format of the filter tensor is MCHW, where M is the number of "
"output image channels, C is the number of input image channels, "
"H is the height of the filter, and W is the width of the filter. "
"If the groups attribute is greater than 1, C equals the number of "
"input image channels divided by the groups.");
AddInput("Bias",
"(Tensor) Bias to be added to each output of filter application."
"The format of output tensor is X (one-dimensional) of size equal"
"to the number of output channels. Only used with MKL-DNN.")
.AsDispensable();
AddOutput("Output",
"(Tensor) The output tensor of convolution operator. "
"It has same data fromat and data type as the Input.");
AddAttr<std::vector<int>>("strides",
"(vector<int> default:{1, 1}), the "
"strides(h_stride, w_stride) of "
"convolution operator.")
.SetDefault({1, 1});
AddAttr<std::vector<int>>("paddings",
"(vector<int> default:{0, 0}), the "
"paddings(pad_height_top, pad_height_bottom, "
"pad_width_left, pad_wifth_right) of "
"convolution operator.")
.SetDefault({0, 0});
AddAttr<std::string>(
"padding_algorithm",
"(string, default \"EXPLICIT\") An optional string from: \"EXPLICIT\","
"\"SAME\",\"VALID\". Set to \"EXPLICIT\" for explicit padding. "
"Set to \"SAME\" or \"VALID\" for algorithm of padding. ")
.SetDefault("EXPLICIT");
AddAttr<int>(
"groups",
"(int default:1), the groups number of the convolution operator. "
"According to grouped convolution in Alex Krizhevsky's Deep CNN paper: "
"when group=2, the first half of the filters is only connected to the "
"first half of the input channels, while the second half of the filters "
"is only connected to the second half of the input channels.")
.SetDefault(1);
AddAttr<std::vector<int>>("dilations",
"(vector<int> default:{1, 1}), the "
"dilations(h_dilation, w_dilation) of "
"convolution operator.")
.SetDefault({1, 1});
AddAttr<bool>("use_quantizer",
"(bool, default false) "
"Set to true for operators that should be quantized and use "
"int8 kernel. "
"Only used on CPU.")
.SetDefault(false);
AddAttr<float>("Scale_in",
"Scale_in to be used for int8 input data."
"Only used with MKL-DNN INT8.")
.SetDefault(1.0f);
AddAttr<float>("Scale_out",
"Scale_out to be used for int8 output data."
"Only used with MKL-DNN INT8.")
.SetDefault(1.0f);
AddAttr<float>("Scale_in_eltwise",
"Scale_in_eltwise to be used for int8 eltwise input data."
"Only used with MKL-DNN INT8.")
.SetDefault(1.0f);
AddAttr<std::vector<float>>("Scale_weights",
"Scale_weights to be used for int8 weights data."
"Only used with MKL-DNN INT8.")
.SetDefault({1.0f});
AddAttr<bool>("force_fp32_output",
"(bool, default false) Force INT8 kernel output FP32, only "
"used in MKL-DNN INT8")
.SetDefault(false);
AddAttr<std::string>(
"data_format",
"(string, default NCHW) Only used in "
"An optional string from: \"NHWC\", \"NCHW\". "
"Defaults to \"NHWC\". Specify the data format of the output data, "
"the input will be transformed automatically. ")
.SetDefault("NCHW");
// TODO(dzhwinter): need to registered layout transform function
AddAttr<bool>("exhaustive_search",
"(bool, default false) cuDNN has many algorithm to calculation "
"convolution, whether enable exhaustive search "
"for cuDNN convolution or not, default is False.")
.SetDefault(false);
AddComment(R"DOC(
Convolution Operator.
The convolution operation calculates the output based on the input, filter
and strides, paddings, dilations, groups parameters. The size of each dimension of the
parameters is checked in the infer-shape.
Input(Input) and Output(Output) are in NCHW or NHWC format. Where N is batch
size, C is the number of channels, H is the height of the feature, and W is
the width of the feature.
Filters(Input) is MCHW format format. Where M is the number of output image channels, C is
the number of input image channels, H is the height of the filter, and W
is the width of the filter.
Parameters(strides, paddings, dilations) are two elements. These two elements represent
height and width, respectively.
The input(X) size and output(Out) size may be different.
Example:
Input:
Input shape: $(N, C_{in}, H_{in}, W_{in})$
Filter shape: $(C_{out}, C_{in}, H_f, W_f)$
Output:
Output shape: $(N, C_{out}, H_{out}, W_{out})$
Where
$$
H_{out}= \frac{(H_{in} + pad_height_top + pad_height_bottom - (dilations[0] * (H_f - 1) + 1))}{strides[0]}+ 1 \\
W_{out}= \frac{(W_{in} + pad_width_left + pad_width_right - (dilations[1] * (W_f - 1) + 1))}{strides[1]}+ 1
$$
)DOC");
Apply();
}
void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const {
auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter");
if (ctx->HasOutput(framework::GradVarName("Input"))) {
ctx->SetOutputDim(framework::GradVarName("Input"), in_dims);
}
if (ctx->HasOutput(framework::GradVarName("Filter"))) {
ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims);
}
}
framework::OpKernelType ConvOpGrad::GetExpectedKernelType(
const framework::ExecutionContext& ctx) const {
int customized_type_value =
framework::OpKernelType::kDefaultCustomizedTypeValue;
framework::LibraryType library_{framework::LibraryType::kPlain};
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
std::string data_format = "AnyLayout";
framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
auto type = framework::OpKernelType(
OperatorWithKernel::IndicateVarDataType(ctx, "Input"), ctx.GetPlace(),
layout_, library_, customized_type_value);
return type;
}
framework::OpKernelType ConvOpGrad::GetKernelTypeForVar(
const std::string& var_name, const Tensor& tensor,
const framework::OpKernelType& expected_kernel_type) const {
return framework::OpKernelType(expected_kernel_type.data_type_,
tensor.place(), tensor.layout());
}
template <typename T>
class Conv2DGradMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
void Apply(GradOpPtr<T> op) const override {
op->SetType(this->ForwardOpType() + "_grad");
op->SetInput("Input", this->Input("Input"));
op->SetInput("Filter", this->Input("Filter"));
op->SetInput("Bias", this->Input("Bias"));
op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output"));
op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input"));
op->SetOutput(framework::GradVarName("Filter"), this->InputGrad("Filter"));
op->SetOutput(framework::GradVarName("Bias"), this->InputGrad("Bias"));
op->SetAttrMap(this->Attrs());
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(mpc_conv2d, ops::ConvOp, ops::Conv2DOpMaker,
ops::ConvOpInferVarType,
ops::Conv2DGradMaker<paddle::framework::OpDesc>,
ops::Conv2DGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(mpc_conv2d_grad, ops::ConvOpGrad);
REGISTER_OP_CPU_KERNEL(
mpc_conv2d, ops::GemmConvKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL(
mpc_conv2d_grad,
ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "./math/im2col.h"
#include "./math/vol2col.h"
#include "./math/math_function.h"
#include "mpc_op.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
constexpr int kConvMKLDNNFP32 = 1;
constexpr int kConvMKLDNNINT8 = 2;
constexpr int MaxKeyLength = 256;
// Base convolution operator definations for other conv
// like operators to reuse the implementation.
inline int ConvOutputSize(int input_size, int filter_size, int dilation,
int padding, int stride) {
const int dkernel = dilation * (filter_size - 1) + 1;
int output_size = (input_size + 2 * padding - dkernel) / stride + 1;
PADDLE_ENFORCE_GT(
output_size, 0,
platform::errors::InvalidArgument(
"The output's size is expected to be greater than 0. "
"But recieved: output's size is %d. The output's size is computed by "
"((input_size + 2 * padding - (dilation * (filter_size - 1) + 1)) / "
"stride + 1), where input_size is %d, padding is %d, "
"filter_size is %d, dilation is %d, stride is %d.",
output_size, input_size, padding, filter_size, dilation, stride));
return output_size;
}
inline int ConvOutputSize(int input_size, int filter_size, int dilation,
int padding_1, int padding_2, int stride) {
const int dkernel = dilation * (filter_size - 1) + 1;
int output_size = (input_size + padding_1 + padding_2 - dkernel) / stride + 1;
PADDLE_ENFORCE_GT(
output_size, 0,
platform::errors::InvalidArgument(
"The output's size is expected to be greater than 0. "
"But recieved: output's size is %d. The output's size is computed by "
"((input_size + padding_1 + padding_2 - (dilation * (filter_size - "
"1) + 1)) / stride + 1), where input_size is %d, padding is "
"(%d, %d), filter_size is %d, dilation is %d, stride is %d.",
output_size, input_size, padding_1, padding_2, filter_size, dilation,
stride));
return output_size;
}
template <typename T = int>
inline void UpdatePaddingAndDilation(std::vector<T>* paddings,
std::vector<T>* dilation,
const std::string& padding_algorithm,
const framework::DDim data_dims,
const std::vector<T>& strides,
const std::vector<T>& ksize) {
// set padding size == data_dims.size() * 2
auto data_shape = framework::vectorize<T>(data_dims);
if (static_cast<int>(paddings->size()) == data_dims.size()) {
for (int i = 0; i < data_dims.size(); ++i) {
T copy_pad = *(paddings->begin() + 2 * i);
paddings->insert(paddings->begin() + 2 * i + 1, copy_pad);
}
} else {
PADDLE_ENFORCE_EQ(
data_dims.size() * 2, paddings->size(),
platform::errors::InvalidArgument(
"Attribute padding's size should be the same or twice as the "
"input's dimension. "
"But recieved: padding's size is %d, padding is [%s]; input's "
"dimension is %d, input's shape is [%s].",
paddings->size(), framework::make_ddim(*paddings), data_dims.size(),
data_dims));
}
// when padding_algorithm is "VALID" or "SAME"
if (padding_algorithm == "SAME") {
for (int i = 0; i < data_dims.size(); ++i) {
T out_size = (data_dims[i] + strides[i] - 1) / strides[i];
T pad_sum =
std::max((out_size - 1) * strides[i] + ksize[i] - data_shape[i],
static_cast<T>(0));
T pad_0 = pad_sum / 2;
T pad_1 = pad_sum - pad_0;
*(paddings->begin() + i * 2) = pad_0;
*(paddings->begin() + i * 2 + 1) = pad_1;
// dilation
*(dilation->begin() + i) = 1;
}
} else if (padding_algorithm == "VALID") {
for (auto it = paddings->begin(); it != paddings->end(); it++) {
*it = 0;
}
}
}
inline bool IsExpand(const std::vector<int64_t>& filter_dim,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& dilations) {
bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true;
for (size_t j = 0; j < strides.size(); ++j) {
// extra 1 for share dim
filter_1 = filter_1 && (static_cast<int>(filter_dim[j + 2 + 1]) == 1);
strides_1 = strides_1 && (strides[j] == 1);
padding_0 = padding_0 && (paddings[j] == 0);
dilation_1 = dilation_1 && (dilations[j] == 1);
}
if (paddings.size() != strides.size()) {
for (size_t j = 0; j < paddings.size(); ++j) {
padding_0 = padding_0 && (paddings[j] == 0);
}
}
return !(filter_1 && strides_1 && padding_0 && dilation_1);
}
template <typename DeviceContext, typename T>
inline void ResizeToChannelFirst(const framework::ExecutionContext& context,
const Tensor* input,
Tensor* transformed_input) {
// extra 1 for leading share dim S
int dim = input->dims().size() - 2 - 1;
if (dim == 3) {
// input
transformed_input->Resize(input->dims());
// SNDHWC -> NCSDHW
auto in_dims_vec = framework::vectorize(input->dims());
in_dims_vec[0] = input->dims()[1];
in_dims_vec[1] = input->dims()[5];
in_dims_vec[2] = input->dims()[0];
in_dims_vec[3] = input->dims()[2];
in_dims_vec[4] = input->dims()[3];
in_dims_vec[5] = input->dims()[4];
transformed_input->Resize(framework::make_ddim(in_dims_vec));
transformed_input->mutable_data<T>(context.GetPlace());
} else if (dim == 2) {
// input
transformed_input->Resize(input->dims());
// SNHWC -> NCSHW
auto in_dims_vec = framework::vectorize(input->dims());
in_dims_vec[0] = input->dims()[1];
in_dims_vec[1] = input->dims()[4];
in_dims_vec[2] = input->dims()[0];
in_dims_vec[3] = input->dims()[2];
in_dims_vec[4] = input->dims()[3];
transformed_input->Resize(framework::make_ddim(in_dims_vec));
transformed_input->mutable_data<T>(context.GetPlace());
}
}
template <typename DeviceContext, typename T>
inline void ResizeToChannelLast(const framework::ExecutionContext& context,
const Tensor* input,
Tensor* transformed_input) {
// extra 1 for leading share dim S
int dim = input->dims().size() - 2 - 1;
if (dim == 3) {
// input
transformed_input->Resize(input->dims());
// NCSDHW -> SNDHWC
auto in_dims_vec = framework::vectorize(input->dims());
in_dims_vec[0] = input->dims()[2];
in_dims_vec[1] = input->dims()[0];
in_dims_vec[2] = input->dims()[3];
in_dims_vec[3] = input->dims()[4];
in_dims_vec[4] = input->dims()[5];
in_dims_vec[5] = input->dims()[1];
transformed_input->Resize(framework::make_ddim(in_dims_vec));
transformed_input->mutable_data<T>(context.GetPlace());
} else if (dim == 2) {
// input
transformed_input->Resize(input->dims());
// NCSHW -> SNHWC
auto in_dims_vec = framework::vectorize(input->dims());
in_dims_vec[0] = input->dims()[2];
in_dims_vec[1] = input->dims()[0];
in_dims_vec[2] = input->dims()[3];
in_dims_vec[3] = input->dims()[4];
in_dims_vec[4] = input->dims()[1];
transformed_input->Resize(framework::make_ddim(in_dims_vec));
transformed_input->mutable_data<T>(context.GetPlace());
}
}
template <typename DeviceContext, typename T>
inline void ResizeToShareLast(const framework::ExecutionContext& context,
const Tensor* input,
Tensor* transformed_input) {
transformed_input->Resize(input->dims());
// SNC.. -> NCS..
auto in_dims_vec = framework::vectorize(input->dims());
in_dims_vec[0] = input->dims()[1];
in_dims_vec[1] = input->dims()[2];
in_dims_vec[2] = input->dims()[0];
transformed_input->Resize(framework::make_ddim(in_dims_vec));
transformed_input->mutable_data<T>(context.GetPlace());
}
template <typename DeviceContext, typename T>
inline void ResizeToShareFirst(const framework::ExecutionContext& context,
const Tensor* input,
Tensor* transformed_input) {
transformed_input->Resize(input->dims());
// NCS.. -> SNC..
auto in_dims_vec = framework::vectorize(input->dims());
in_dims_vec[0] = input->dims()[2];
in_dims_vec[1] = input->dims()[0];
in_dims_vec[2] = input->dims()[1];
transformed_input->Resize(framework::make_ddim(in_dims_vec));
transformed_input->mutable_data<T>(context.GetPlace());
}
template <typename DeviceContext, typename T>
inline void TransToChannelFirst(const framework::ExecutionContext& context,
const Tensor* input,
Tensor* transformed_input) {
// extra 1 for leading share dim
// swap share and batch_size
int dim = input->dims().size() - 2 - 1;
if (dim == 3) {
auto& dev_ctx = context.template device_context<DeviceContext>();
std::vector<int> axis{1, 5, 0, 2, 3, 4};
math::Transpose<DeviceContext, T, 6> trans6;
trans6(dev_ctx, *input, transformed_input, axis);
} else if (dim == 2) {
auto& dev_ctx = context.template device_context<DeviceContext>();
std::vector<int> axis{1, 4, 0, 2, 3};
math::Transpose<DeviceContext, T, 5> trans5;
trans5(dev_ctx, *input, transformed_input, axis);
}
}
template <typename DeviceContext, typename T>
inline void TransToChannelLast(const framework::ExecutionContext& context,
const Tensor* input, Tensor* transformed_input) {
// extra 1 for leading share dim
// swap share and batch_size
int dim = input->dims().size() - 2 - 1;
if (dim == 3) {
auto& dev_ctx = context.template device_context<DeviceContext>();
std::vector<int> axis{2, 0, 3, 4, 5, 1};
math::Transpose<DeviceContext, T, 6> trans6;
trans6(dev_ctx, *input, transformed_input, axis);
} else if (dim == 2) {
auto& dev_ctx = context.template device_context<DeviceContext>();
std::vector<int> axis{2, 0, 3, 4, 1};
math::Transpose<DeviceContext, T, 5> trans5;
trans5(dev_ctx, *input, transformed_input, axis);
}
}
template <typename DeviceContext, typename T>
inline void TransToShareFirst(const framework::ExecutionContext& context,
const Tensor* input, Tensor* transformed_input) {
int dim = input->dims().size();
PADDLE_ENFORCE_GT(
dim, 4,
platform::errors::InvalidArgument(
"The input's dim is expected to be greater than 4."));
std::vector<int> axis(dim);
for (size_t i = 3; i < dim; ++i) {
axis[i] = i;
}
// share
axis[0] = 2;
// N
axis[1] = 0;
// C
axis[2] = 1;
auto& dev_ctx = context.template device_context<DeviceContext>();
switch(dim) {
case 5:
math::Transpose<DeviceContext, T, 5> trans5;
trans5(dev_ctx, *input, transformed_input, axis);
break;
case 6:
math::Transpose<DeviceContext, T, 6> trans6;
trans6(dev_ctx, *input, transformed_input, axis);
break;
default:
PADDLE_ENFORCE_LT(
dim, 7, platform::errors::InvalidArgument(
"The input's dim greater than 6 not supported yet. "));
}
}
template <typename DeviceContext, typename T>
inline void TransToShareLast(const framework::ExecutionContext& context,
const Tensor* input, Tensor* transformed_input) {
int dim = input->dims().size();
PADDLE_ENFORCE_GT(
dim, 4,
platform::errors::InvalidArgument(
"The input's dim is expected to be greater than 4."));
std::vector<int> axis(dim);
for (size_t i = 3; i < dim; ++i) {
axis[i] = i;
}
// SNC -> NCS
axis[0] = 1;
axis[1] = 2;
axis[2] = 0;
auto& dev_ctx = context.template device_context<DeviceContext>();
switch(dim) {
case 5:
math::Transpose<DeviceContext, T, 5> trans5;
trans5(dev_ctx, *input, transformed_input, axis);
break;
case 6:
math::Transpose<DeviceContext, T, 6> trans6;
trans6(dev_ctx, *input, transformed_input, axis);
break;
default:
PADDLE_ENFORCE_LT(
dim, 7, platform::errors::InvalidArgument(
"The input's dim greater than 6 not supported yet. "));
}
}
template <typename DeviceContext, typename T>
inline void TransToBatchFirst(const framework::ExecutionContext& context,
const Tensor* input, Tensor* transformed_input) {
int dim = input->dims().size();
PADDLE_ENFORCE_GT(
dim, 4,
platform::errors::InvalidArgument(
"The input's dim is expected to be greater than 4."));
std::vector<int> axis(dim);
for (size_t i = 3; i < dim; ++i) {
axis[i] = i;
}
// N
axis[0] = 1;
// C
axis[1] = 2;
// share
axis[2] = 0;
auto& dev_ctx = context.template device_context<DeviceContext>();
switch(dim) {
case 5:
math::Transpose<DeviceContext, T, 5> trans5;
trans5(dev_ctx, *input, transformed_input, axis);
break;
case 6:
math::Transpose<DeviceContext, T, 6> trans6;
trans6(dev_ctx, *input, transformed_input, axis);
break;
default:
PADDLE_ENFORCE_LT(
dim, 7, platform::errors::InvalidArgument(
"The input's dim greater than 6 not supported yet. "));
}
}
template <typename DeviceContext, typename T>
inline void ResizeToSwapedLeadingDims(const framework::ExecutionContext& context,
const Tensor* input,
Tensor* transformed_input) {
transformed_input->Resize(input->dims());
// NS.. -> SN..
// or CS.. -> SC..
auto in_dims_vec = framework::vectorize(input->dims());
in_dims_vec[0] = input->dims()[1];
in_dims_vec[1] = input->dims()[0];
transformed_input->Resize(framework::make_ddim(in_dims_vec));
transformed_input->mutable_data<T>(context.GetPlace());
}
template <typename DeviceContext, typename T>
void TransToSwapedLeadingDims(const framework::ExecutionContext& context,
const Tensor* input,
Tensor* output){
output->Resize(input->dims());
auto in_dims_vec = framework::vectorize(input->dims());
in_dims_vec[0] = input->dims()[1];
in_dims_vec[1] = input->dims()[0];
output->Resize(framework::make_ddim(in_dims_vec));
output->mutable_data<T>(context.GetPlace());
const int dim = input->dims().size();
std::vector<int> axis(dim);
for (size_t i = 0; i < dim; ++i) {
axis[i] = i;
}
axis[0] = 1;
axis[1] = 0;
auto& dev_ctx = context.template device_context<DeviceContext>();
switch(dim) {
case 3:
math::Transpose<DeviceContext, T, 3> trans3;
trans3(dev_ctx, *input, output, axis);
break;
case 4:
math::Transpose<DeviceContext, T, 4> trans4;
trans4(dev_ctx, *input, output, axis);
break;
case 5:
math::Transpose<DeviceContext, T, 5> trans5;
trans5(dev_ctx, *input, output, axis);
break;
case 6:
math::Transpose<DeviceContext, T, 6> trans6;
trans6(dev_ctx, *input, output, axis);
break;
default:
PADDLE_ENFORCE_GT(
dim, 2, platform::errors::InvalidArgument(
"The input's dim less than 3 not supported yet. "));
PADDLE_ENFORCE_LT(
dim, 7, platform::errors::InvalidArgument(
"The input's dim greater than 6 not supported yet. "));
}
return;
}
template <typename DeviceContext, typename T, typename Func>
void SharesToCols(const framework::ExecutionContext& context,
const Tensor* input,
const std::vector<int>& dilations,
const std::vector<int>& strides,
const std::vector<int>& paddings,
Tensor* col, Func data2col) {
// // input: CSHW or CSDHW, S for share dim
framework::DDim in_plain_dim =
framework::slice_ddim(input->dims(), 1, input->dims().size());
framework::DDim col_plain_dim =
framework::slice_ddim(col->dims(), 1, col->dims().size());
auto& dev_ctx = context.template device_context<DeviceContext>();
const int share_size = input->dims()[0];
for (size_t i = 0; i < share_size; ++i) {
Tensor share = input->Slice(i, i + 1).Resize(in_plain_dim);
Tensor col_share = col->Slice(i, i + 1).Resize(col_plain_dim);
data2col(dev_ctx, share, dilations, strides, paddings, &col_share);
}
}
template <typename DeviceContext, typename T>
Tensor SwapedLeadingDims(const framework::ExecutionContext& context,
const Tensor* input) {
Tensor output(input->type());
ResizeToSwapedLeadingDims<DeviceContext, T>(context, input,
&output);
TransToSwapedLeadingDims<DeviceContext, T>(context, input,
&output);
return output;
}
template <typename DeviceContext, typename T>
Tensor TransposeMpcMat(const framework::ExecutionContext& context,
const Tensor* input) {
Tensor output(input->type());
auto in_dims_vec = framework::vectorize(input->dims());
PADDLE_ENFORCE_EQ(
in_dims_vec.size(), 3, platform::errors::InvalidArgument(
"The input's dim should be 3. "));
in_dims_vec[0] = input->dims()[0];
in_dims_vec[1] = input->dims()[2];
in_dims_vec[2] = input->dims()[1];
output.Resize(framework::make_ddim(in_dims_vec));
output.mutable_data<T>(context.GetPlace());
std::vector<int> axis(3);
axis[0] = 0;
axis[1] = 2;
axis[2] = 1;
auto& dev_ctx = context.template device_context<DeviceContext>();
math::Transpose<DeviceContext, T, 3> trans3;
trans3(dev_ctx, *input, &output, axis);
return output;
}
// Define Op classes in .h file so that other conv
// operator implementations can reuse the code.
class Conv2DOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() final;
protected:
virtual void Apply() {}
};
class ConvOpInferVarType : public framework::PassInDtypeAndVarTypeToOutput {
protected:
std::unordered_map<std::string, std::string>& GetInputOutputWithSameType()
const override {
static std::unordered_map<std::string, std::string> m{
{"Input", /*->*/ "Output"}};
return m;
}
};
class ConvOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
std::vector<int64_t> output_shape = ComputeOutputShape(ctx);
OP_INOUT_CHECK(ctx->HasOutput("Output"), "Output", "Output", "Conv");
ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
ctx->ShareLoD("Input", "Output");
}
protected:
std::vector<int64_t> ComputeOutputShape(
framework::InferShapeContext* ctx) const;
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override;
framework::OpKernelType GetKernelTypeForVar(
const std::string& var_name, const Tensor& tensor,
const framework::OpKernelType& expected_kernel_type) const override;
};
class ConvOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override;
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override;
framework::OpKernelType GetKernelTypeForVar(
const std::string& var_name, const Tensor& tensor,
const framework::OpKernelType& expected_kernel_type) const override;
};
// TODO: add conv double grad
template <typename DeviceContext, typename T>
class GemmConvKernel : public MpcOpKernel<T> {
public:
void ComputeImpl(const framework::ExecutionContext& context) const override {
const Tensor* input = context.Input<Tensor>("Input");
// The filter will be reshaped in the calculations,
// so here use an assignment operation,
// that avoids modifying the variable in the Scope.
Tensor filter = *context.Input<Tensor>("Filter");
Tensor* output = context.Output<Tensor>("Output");
output->mutable_data<T>(context.GetPlace());
const int groups = context.Attr<int>("groups");
const std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = context.Attr<std::vector<int>>("dilations");
const std::string padding_algorithm =
context.Attr<std::string>("padding_algorithm");
const std::string data_format = context.Attr<std::string>("data_format");
const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC");
Tensor transformed_input(input->type());
Tensor transformed_output(output->type());
if (channel_last) {
ResizeToChannelFirst<DeviceContext, T>(context, input,
&transformed_input);
TransToChannelFirst<DeviceContext, T>(context, input, &transformed_input);
ResizeToChannelFirst<DeviceContext, T>(context, output,
&transformed_output);
} else {
ResizeToShareLast<DeviceContext, T>(context, input,
&transformed_input);
TransToShareLast<DeviceContext, T>(context, input, &transformed_input);
ResizeToShareLast<DeviceContext, T>(context, output,
&transformed_output);
}
// update padding and dilation
auto trans_in_dims = transformed_input.dims();
auto filter_dims = filter.dims();
// extra 1 for share dim
framework::DDim in_data_dims =
framework::slice_ddim(trans_in_dims, 2 + 1, trans_in_dims.size());
framework::DDim filter_data_dims =
framework::slice_ddim(filter_dims, 2 + 1, filter_dims.size());
std::vector<int> ksize = framework::vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
in_data_dims, strides, ksize);
auto& dev_ctx = context.template device_context<DeviceContext>();
const int batch_size = static_cast<int>(transformed_input.dims()[0]);
// filter_shape_vec:
// {k_share, k_o, k_i, k_h, k_w} or {k_share, k_o, k_i, k_d, k_h, k_w}
std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims()));
// output_shape_vec:
// {o_n, o_c, o_share, o_h, o_w} or {o_n, o_c, o_share, o_d, o_h, o_w}
std::vector<int64_t> output_shape_vec(
framework::vectorize(transformed_output.dims()));
// use col_shape in the im2col calculation
// col_shape_vec:
// {i_s, i_c/g, k_h, k_w, o_h, o_w} or {i_s, i_c/g, k_d, k_h, k_w,
// o_d, o_h, o_w}
size_t data_dim = filter_shape_vec.size() - 2 - 1;
std::vector<int64_t> col_shape_vec(2 + 2 * data_dim);
col_shape_vec[0] = trans_in_dims[2];
col_shape_vec[1] = trans_in_dims[1] / groups;
std::vector<int64_t> col_matrix_shape_vec(3);
col_matrix_shape_vec[0] = col_shape_vec[0];
col_matrix_shape_vec[1] = col_shape_vec[1];
col_matrix_shape_vec[2] = 1;
// use col_matrix_shape in the gemm calculation
// size:
// (i_c/g * k_h * k_w, o_h * o_w) or (i_c/g * k_d * k_h * k_w, o_d * o_h *
// o_w)
for (size_t j = 0; j < data_dim; ++j) {
col_shape_vec[j + 2] = filter_shape_vec[j + 3];
col_shape_vec[j + 2 + data_dim] = output_shape_vec[j + 3];
col_matrix_shape_vec[1] *= filter_shape_vec[j + 3];
col_matrix_shape_vec[2] *= output_shape_vec[j + 3];
}
framework::DDim col_shape(framework::make_ddim(col_shape_vec));
framework::DDim col_matrix_shape(framework::make_ddim(col_matrix_shape_vec));
bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations);
Tensor col;
// col_matrix shares the same piece of data with col,
// but will be reshaped into a two-dimensional matrix shape
// to call the matrix multiplication interface.
Tensor col_matrix;
if (is_expand) {
col = context.AllocateTmpTensor<T, DeviceContext>(col_shape, dev_ctx);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
}
// with share dim
framework::DDim in_matrix_shape = framework::slice_ddim(
transformed_input.dims(), 1, transformed_input.dims().size());
// SOIHW or SOIDHW
framework::DDim filter_matrix_shape = {filter.dims()[0], filter.dims()[1],
filter.numel() / (filter.dims()[0] * filter.dims()[1]) };
filter.Resize(filter_matrix_shape);
// OSIHW or OSIDHW
Tensor filter_ = SwapedLeadingDims<DeviceContext, T>(context, &filter);
// CS(H * W) or CS(D * H * W)
framework::DDim output_matrix_shape = {
transformed_output.dims()[1],
transformed_output.dims()[2],
transformed_output.numel() /
(transformed_output.dims()[0]
* transformed_output.dims()[1]
* transformed_output.dims()[2])};
// convolution operator: im2col(or vol2col) + gemm
int in_step = static_cast<int>(transformed_input.dims()[1]) / groups;
int out_step = static_cast<int>(transformed_output.dims()[1]) / groups;
math::Vol2ColFunctor<DeviceContext, T> vol2col;
math::Im2ColFunctor<math::ColFormat::kCFO, DeviceContext, T> im2col;
for (int i = 0; i < batch_size; i++) {
Tensor in_batch =
transformed_input.Slice(i, i + 1).Resize(in_matrix_shape);
Tensor out_batch =
transformed_output.Slice(i, i + 1).Resize(output_matrix_shape);
for (int g = 0; g < groups; g++) {
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
Tensor in_slice_ = SwapedLeadingDims<DeviceContext, T>(context, &in_slice);
if (!is_expand) {
col.ShareDataWith(in_slice_);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
} else if (data_dim == 2U) {
SharesToCols<DeviceContext, T>(context, &in_slice_, dilations, strides,
std::vector<int>{paddings[0], paddings[2], paddings[1],
paddings[3]}, &col, im2col);
} else if (data_dim == 3U) {
SharesToCols<DeviceContext, T>(context, &in_slice_, dilations, strides, paddings, &col, vol2col);
}
// gemm
Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor filter_slice = filter_.Slice(g * out_step, (g + 1) * out_step);
Tensor out_slice_ = SwapedLeadingDims<DeviceContext, T>(context, &out_slice);
Tensor filter_slice_ = SwapedLeadingDims<DeviceContext, T>(context, &filter_slice);
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->matmul(
&filter_slice_, &col_matrix, &out_slice_);
TransToSwapedLeadingDims<DeviceContext, T>(context, &out_slice_,
&out_slice);
}
}
if (channel_last) {
TransToChannelLast<DeviceContext, T>(context, &transformed_output,
output);
} else {
TransToShareFirst<DeviceContext, T>(context, &transformed_output,
output);
}
}
};
template <typename DeviceContext, typename T>
class GemmConvGradKernel : public MpcOpKernel<T> {
public:
void ComputeImpl(const framework::ExecutionContext& context) const override {
const Tensor* input = context.Input<Tensor>("Input");
const Tensor* output_grad =
context.Input<Tensor>(framework::GradVarName("Output"));
Tensor* input_grad =
context.Output<Tensor>(framework::GradVarName("Input"));
Tensor* filter_grad =
context.Output<Tensor>(framework::GradVarName("Filter"));
// The filter and filter_grad will be reshaped in the calculations,
// so here use an assignment operation,
// that avoids modifying the variable in the Scope.
Tensor filter = *context.Input<Tensor>("Filter");
if (!input_grad && !filter_grad) return;
int groups = context.Attr<int>("groups");
const std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = context.Attr<std::vector<int>>("dilations");
const std::string padding_algorithm =
context.Attr<std::string>("padding_algorithm");
const std::string data_format = context.Attr<std::string>("data_format");
const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC");
Tensor transformed_input(input->type());
Tensor transformed_output_grad(output_grad->type());
if (channel_last) {
ResizeToChannelFirst<DeviceContext, T>(context, input,
&transformed_input);
TransToChannelFirst<DeviceContext, T>(context, input, &transformed_input);
ResizeToChannelFirst<DeviceContext, T>(context, output_grad,
&transformed_output_grad);
TransToChannelFirst<DeviceContext, T>(context, output_grad,
&transformed_output_grad);
} else {
ResizeToShareLast<DeviceContext, T>(context, input,
&transformed_input);
TransToShareLast<DeviceContext, T>(context, input, &transformed_input);
ResizeToShareLast<DeviceContext, T>(context, output_grad,
&transformed_output_grad);
TransToShareLast<DeviceContext, T>(context, output_grad, &transformed_output_grad);
}
// update padding and dilation
auto in_dims = transformed_input.dims();
auto filter_dims = filter.dims();
// extra 1 for share dim
framework::DDim in_data_dims =
framework::slice_ddim(in_dims, 2 + 1, in_dims.size());
framework::DDim filter_data_dims =
framework::slice_ddim(filter_dims, 2 + 1, filter_dims.size());
std::vector<int> ksize = framework::vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
in_data_dims, strides, ksize);
const int batch_size = static_cast<int>(transformed_input.dims()[0]);
auto& dev_ctx = context.template device_context<DeviceContext>();
// filter_shape_vec: {k_share, k_o, k_i, k_h, k_w} or {k_share, k_o, k_i, k_d, k_h, k_w}
std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims()));
// output_shape_vec: {o_n, o_c, o_share, o_h, o_w} or {o_n, o_c, o_share, o_d, o_h, o_w}
std::vector<int64_t> output_shape_vec(
framework::vectorize(transformed_output_grad.dims()));
// use col_shape in the im2col calculation
// col_shape_vec: {i_c/g, k_h, k_w, o_h, o_w} or {i_c/g, k_d, k_h, k_w, o_d,
// o_h, o_w}
size_t data_dim = filter_shape_vec.size() - 2 - 1;
std::vector<int64_t> col_shape_vec(2 + 2 * data_dim);
col_shape_vec[0] = in_dims[2];
col_shape_vec[1] = in_dims[1] / groups;
std::vector<int64_t> col_matrix_shape_vec(3);
col_matrix_shape_vec[0] = col_shape_vec[0];
col_matrix_shape_vec[1] = col_shape_vec[1];
col_matrix_shape_vec[2] = 1;
// use col_matrix_shape in the gemm calculation
// size:
// (i_c/g * k_h * k_w, o_h * o_w) or (i_c/g * k_d * k_h * k_w, o_d * o_h *
// o_w)
for (size_t j = 0; j < data_dim; ++j) {
col_shape_vec[j + 2] = filter_shape_vec[j + 3];
col_shape_vec[j + 2 + data_dim] = output_shape_vec[j + 3];
col_matrix_shape_vec[1] *= filter_shape_vec[j + 3];
col_matrix_shape_vec[2] *= output_shape_vec[j + 3];
}
framework::DDim col_shape(framework::make_ddim(col_shape_vec));
framework::DDim col_matrix_shape(framework::make_ddim(col_matrix_shape_vec));
// with share dim
framework::DDim input_shape = framework::slice_ddim(
transformed_input.dims(), 1, transformed_input.dims().size());
// SOIHW or SOIDHW
framework::DDim filter_matrix_shape = {filter.dims()[0], filter.dims()[1],
filter.numel() / (filter.dims()[0] * filter.dims()[1]) };
// OSIHW or OSIDHW
framework::DDim filter_matrix_shape_ = {filter.dims()[1], filter.dims()[0],
filter.numel() / (filter.dims()[0] * filter.dims()[1]) };
filter.Resize(filter_matrix_shape);
Tensor filter_ = SwapedLeadingDims<DeviceContext, T>(context, &filter);
// CS(H * W) or CS(D * H * W)
framework::DDim output_matrix_shape = {
transformed_output_grad.dims()[1],
transformed_output_grad.dims()[2],
transformed_output_grad.numel() /
(transformed_output_grad.dims()[0]
* transformed_output_grad.dims()[1]
* transformed_output_grad.dims()[2])};
// convolution backward input operator: gemm + col2im(or col2vol)
// convolution backward weight operator: im2col(or vol2col) + gemm
int in_step = static_cast<int>(transformed_input.dims()[1]) / groups;
int out_step = static_cast<int>(transformed_output_grad.dims()[1]) / groups;
bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations);
Tensor col;
// col_matrix shares the same piece of data with col,
// but will be reshaped into a two-dimensional matrix shape
// to call the matrix multiplication interface.
Tensor col_matrix;
if (is_expand) {
col = context.AllocateTmpTensor<T, DeviceContext>(col_shape, dev_ctx);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
}
math::SetConstant<DeviceContext, T> set_zero;
if (input_grad) {
input_grad->mutable_data<T>(context.GetPlace());
Tensor transformed_input_grad(input_grad->type());
if (channel_last) {
ResizeToChannelFirst<DeviceContext, T>(context, input_grad,
&transformed_input_grad);
} else {
ResizeToShareLast<DeviceContext, T>(context, input_grad,
&transformed_input_grad);
}
// if is_expand is false, the operation of set_zero is unnecessary,
// because math::matmul will reset input_grad.
if (is_expand) {
set_zero(dev_ctx, &transformed_input_grad, static_cast<T>(0));
}
math::Col2VolFunctor<DeviceContext, T> col2vol;
math::Col2ImFunctor<math::ColFormat::kCFO, DeviceContext, T> col2im;
for (int i = 0; i < batch_size; i++) {
Tensor out_grad_batch =
transformed_output_grad.Slice(i, i + 1).Resize(output_matrix_shape);
Tensor in_grad_batch =
transformed_input_grad.Slice(i, i + 1).Resize(input_shape);
for (int g = 0; g < groups; g++) {
// gemm
Tensor out_grad_slice =
out_grad_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor filter_slice = filter_.Slice(g * out_step, (g + 1) * out_step);
Tensor in_grad_slice =
in_grad_batch.Slice(g * in_step, (g + 1) * in_step);
Tensor in_grad_slice_ = SwapedLeadingDims<DeviceContext, T>(context, &in_grad_slice);
if (!is_expand) {
col_matrix.ShareDataWith(in_grad_slice);
col_matrix.Resize(col_matrix_shape);
}
Tensor filter_slice_ = SwapedLeadingDims<DeviceContext, T>(context, &filter_slice);
Tensor out_grad_slice_ = SwapedLeadingDims<DeviceContext, T>(context, &out_grad_slice);
Tensor filter_slice_t = TransposeMpcMat<DeviceContext, T>(context, &filter_slice_);
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->matmul(
&filter_slice_t, &out_grad_slice_, &col_matrix);
if (is_expand && data_dim == 2U) {
SharesToCols<DeviceContext, T>(context, &col, dilations, strides,
std::vector<int>{paddings[0], paddings[2], paddings[1],
paddings[3]},
&in_grad_slice_, col2im);
} else if (is_expand && data_dim == 3U) {
SharesToCols<DeviceContext, T>(context, &col, dilations, strides, paddings, &in_grad_slice_, col2vol);
}
TransToSwapedLeadingDims<DeviceContext, T>(context, &in_grad_slice_,
&in_grad_slice);
}
}
if (channel_last) {
TransToChannelLast<DeviceContext, T>(context, &transformed_input_grad,
input_grad);
} else {
TransToShareFirst<DeviceContext, T>(context, &transformed_input_grad,
input_grad);
}
}
if (filter_grad) {
filter_grad->mutable_data<T>(context.GetPlace());
auto filter_grad_dims = filter_grad->dims();
Tensor filter_grad_ = SwapedLeadingDims<DeviceContext, T>(context, filter_grad);
filter_grad_.Resize(filter_matrix_shape_);
set_zero(dev_ctx, filter_grad, static_cast<T>(0));
math::Im2ColFunctor<math::ColFormat::kCFO, DeviceContext, T> im2col;
math::Vol2ColFunctor<DeviceContext, T> vol2col;
for (int i = 0; i < batch_size; i++) {
Tensor out_grad_batch =
transformed_output_grad.Slice(i, i + 1).Resize(output_matrix_shape);
Tensor in_batch = transformed_input.Slice(i, i + 1).Resize(input_shape);
for (int g = 0; g < groups; g++) {
// im2col
Tensor out_grad_slice =
out_grad_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
Tensor in_slice_ = SwapedLeadingDims<DeviceContext, T>(context, &in_slice);
if (!is_expand) {
col.ShareDataWith(in_slice_);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
} else if (data_dim == 2U) {
SharesToCols<DeviceContext, T>(context, &in_slice_, dilations, strides,
std::vector<int>{paddings[0], paddings[2], paddings[1],
paddings[3]}, &col, im2col);
} else if (data_dim == 3U) {
SharesToCols<DeviceContext, T>(context, &in_slice_, dilations, strides, paddings, &col, vol2col);
}
Tensor out_grad_slice_ = SwapedLeadingDims<DeviceContext, T>(context, &out_grad_slice);
Tensor col_mat_t = TransposeMpcMat<DeviceContext, T>(context, &col_matrix);
// gemm
Tensor filter_grad_slice =
filter_grad_.Slice(g * out_step, (g + 1) * out_step);
Tensor filter_grad_slice_ = SwapedLeadingDims<DeviceContext, T>(context, &filter_grad_slice);
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->matmul(&out_grad_slice_, &col_mat_t, &filter_grad_slice_);
TransToSwapedLeadingDims<DeviceContext, T>(context, &filter_grad_slice_,
&filter_grad_slice);
}
}
TransToSwapedLeadingDims<DeviceContext, T>(context, &filter_grad_,
filter_grad);
filter_grad->Resize(filter_grad_dims);
}
}
};
} // namespace operators
} // namespace paddle
/* Copyright (c) 2020 paddlepaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "core/paddlefl_mpc/operators/math/concat_and_split.h"
#include <vector>
namespace paddle {
namespace operators {
namespace math {
/*
* All tensors' dimension should be the same and the values of
* each dimension must be the same, except the axis dimension.
*/
template <typename T>
class ConcatFunctor<platform::CPUDeviceContext, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const std::vector<framework::Tensor>& input, int axis,
framework::Tensor* output) {
// TODO(zcd): Add input data validity checking
int num = input.size();
int rows = 1;
auto dim_0 = input[0].dims();
for (int i = 0; i < axis; ++i) {
rows *= dim_0[i];
}
int out_rows = rows, out_cols = 0;
std::vector<int64_t> input_cols(input.size());
for (int i = 0; i < num; ++i) {
int t_cols = input[i].numel() / rows;
out_cols += t_cols;
input_cols[i] = t_cols;
}
auto cpu_place = boost::get<platform::CPUPlace>(context.GetPlace());
// computation
auto output_data = output->data<T>();
int col_idx = 0;
for (int j = 0; j < num; ++j) {
int col_len = input_cols[j];
auto input_data = input[j].data<T>();
for (int k = 0; k < out_rows; ++k) {
memory::Copy(cpu_place, output_data + k * out_cols + col_idx, cpu_place,
input_data + k * col_len, sizeof(T) * col_len);
}
col_idx += col_len;
}
}
};
/*
* All tensors' dimension should be the same and the values of
* each dimension must be the same, except the axis dimension.
*/
template <typename T>
class SplitFunctor<platform::CPUDeviceContext, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& input,
const std::vector<const framework::Tensor*>& ref_inputs,
const int axis, std::vector<framework::Tensor*>* outputs) {
// TODO(zcd): Add input data validity checking
size_t num = outputs->size();
int input_rows = 1;
auto dim_0 = ref_inputs[0]->dims();
for (int i = 0; i < axis; ++i) {
input_rows *= dim_0[i];
}
int input_cols = 0;
std::vector<int64_t> output_cols(outputs->size());
for (size_t i = 0; i < num; ++i) {
int t_cols = ref_inputs[i]->numel() / input_rows;
input_cols += t_cols;
output_cols[i] = t_cols;
}
auto cpu_place = boost::get<platform::CPUPlace>(context.GetPlace());
// computation
for (int k = 0; k < input_rows; ++k) {
const T* src_ptr = input.data<T>() + k * input_cols;
int col_idx = 0;
for (size_t j = 0; j < num; ++j) {
int col_len = output_cols[j];
auto* out_tensor = outputs->at(j);
if (out_tensor != nullptr) {
T* dst_ptr = out_tensor->data<T>() + k * col_len;
memory::Copy(cpu_place, dst_ptr, cpu_place, src_ptr + col_idx,
sizeof(T) * col_len);
}
col_idx += col_len;
}
}
}
};
#define DEFINE_FUNCTOR(type) \
template class ConcatFunctor<platform::CPUDeviceContext, type>; \
template class SplitFunctor<platform::CPUDeviceContext, type>;
FOR_ALL_TYPES(DEFINE_FUNCTOR);
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/lod_tensor.h"
namespace paddle {
namespace operators {
namespace math {
/*
* \brief Concatenate the input tensors along the dimension axis.
* TODO(zcd): maybe it needs to be more detailed.
* Examples:
* Input[0] = [[1,2],[3,4]]
* Input[1] = [[5,6]]
* axis = 0
*
* Output = [[1,2],
* [3,4],
* [5,6]]
*/
template <typename DeviceContext, typename T>
class ConcatFunctor {
public:
void operator()(const DeviceContext& context,
const std::vector<framework::Tensor>& input, int axis,
framework::Tensor* output);
};
/*
* \brief Split the input tensors along the dimension axis into outputs.
* TODO(zcd): maybe it needs to be more detailed.
* Examples:
* Input = [[1,2],
* [3,4],
* [5,6]]
* axis = 0
*
* Output[0] = [[1,2],[3,4]]
* Output[1] = [[5,6]]
*/
template <typename DeviceContext, typename T>
class SplitFunctor {
public:
void operator()(const DeviceContext& context, const framework::Tensor& input,
const std::vector<const framework::Tensor*>& ref_inputs,
int axis, std::vector<framework::Tensor*>* outputs);
};
} // namespace math
} // namespace operators
} // namespace paddle
#define FOR_ALL_TYPES(macro) \
macro(int64_t); \
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "./im2col.h"
#include <vector>
#include "./im2col_cfo_cpu.h"
namespace paddle {
namespace operators {
namespace math {
/*
* im = [input_channels, input_height, input_width]
* col =
* [input_channels, filter_height, filter_width, output_height, output_width]
*/
template <class T>
class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
platform::CPUDeviceContext, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& im, const std::vector<int>& dilation,
const std::vector<int>& stride,
const std::vector<int>& padding, framework::Tensor* col,
const DataLayout data_layout) {
PADDLE_ENFORCE_EQ(im.dims().size(), 3, "The dimension of im should be 3.");
PADDLE_ENFORCE_EQ(col->dims().size(), 5,
"The dimension of col should be 5.");
if (stride[0] == 1 && stride[1] == 1 && dilation[0] == 1 &&
dilation[1] == 1) {
if (padding[0] == 0 && padding[1] == 0 && padding[2] == 0 &&
padding[3] == 0) {
im2col_sh1sw1dh1dw1ph0pw0<T>(im, col, data_layout);
return;
} else if (padding[0] == 1 && padding[1] == 1 && padding[2] == 1 &&
padding[3] == 1) {
im2col_sh1sw1dh1dw1ph1pw1<T>(im, col, data_layout);
return;
}
// TODO(TJ): complete padding >=2
}
im2col_common<T>(im, dilation, stride, padding, col, data_layout);
}
};
/*
* im = [input_channels, input_height, input_width]
* col =
* [input_channels, filter_height, filter_width, output_height, output_width]
*/
template <class T>
class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
platform::CPUDeviceContext, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& col,
const std::vector<int>& dilation,
const std::vector<int>& stride,
const std::vector<int>& padding, framework::Tensor* im,
const DataLayout data_layout) {
PADDLE_ENFORCE_EQ(im->dims().size(), 3, "The dimension of im should be 3.");
PADDLE_ENFORCE_EQ(col.dims().size(), 5,
"The dimension of col should be 5.");
int im_channels =
(data_layout != DataLayout::kNHWC ? im->dims()[0] : im->dims()[2]);
int im_height =
(data_layout != DataLayout::kNHWC ? im->dims()[1] : im->dims()[0]);
int im_width =
(data_layout != DataLayout::kNHWC ? im->dims()[2] : im->dims()[1]);
int filter_height = col.dims()[1];
int filter_width = col.dims()[2];
int col_height = col.dims()[3];
int col_width = col.dims()[4];
PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] -
((dilation[0] * (filter_height - 1) + 1))) /
stride[0] +
1,
col_height,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent.");
PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] -
((dilation[1] * (filter_width - 1) + 1))) /
stride[1] +
1,
col_width,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent.");
int channels_col = im_channels * filter_height * filter_width;
T* im_data = im->data<T>();
const T* col_data = col.data<T>();
for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width;
int h_offset = (c / filter_width) % filter_height;
int c_im = c / (filter_width * filter_height);
for (int h = 0; h < col_height; ++h) {
int im_row_idx = h * stride[0] - padding[0] + h_offset * dilation[0];
for (int w = 0; w < col_width; ++w) {
int im_col_idx = w * stride[1] - padding[1] + w_offset * dilation[1];
if ((im_row_idx) >= 0 && (im_row_idx) < im_height &&
(im_col_idx) >= 0 && (im_col_idx) < im_width) {
int im_offset;
if (data_layout != DataLayout::kNHWC) {
im_offset =
(c_im * im_height + im_row_idx) * im_width + im_col_idx;
} else {
im_offset =
(im_row_idx * im_width + im_col_idx) * im_channels + c_im;
}
im_data[im_offset] +=
col_data[(c * col_height + h) * col_width + w];
}
}
}
}
}
};
template class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
platform::CPUDeviceContext, int64_t>;
template class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
platform::CPUDeviceContext, int64_t>;
/*
* im = [input_channels, input_height, input_width]
* col =
* [output_height, output_width, input_channels, filter_height, filter_width]
*/
template <class T>
class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
platform::CPUDeviceContext, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& im, const std::vector<int>& dilation,
const std::vector<int>& stride,
const std::vector<int>& padding, framework::Tensor* col,
const DataLayout data_layout) {
PADDLE_ENFORCE_EQ(im.dims().size(), 3, "The dimension of im should be 3.");
PADDLE_ENFORCE_EQ(col->dims().size(), 5,
"The dimension of col should be 5.");
int im_channels = im.dims()[0];
int im_height = im.dims()[1];
int im_width = im.dims()[2];
int filter_height = col->dims()[3];
int filter_width = col->dims()[4];
int col_height = col->dims()[0];
int col_width = col->dims()[1];
const T* im_data = im.data<T>();
T* col_data = col->data<T>();
for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) {
for (int col_col_idx = 0; col_col_idx < col_width; ++col_col_idx) {
for (int channel = 0; channel < im_channels; ++channel) {
for (int filter_row_idx = 0; filter_row_idx < filter_height;
++filter_row_idx) {
int im_row_offset =
col_row_idx * stride[0] + filter_row_idx - padding[0];
for (int filter_col_idx = 0; filter_col_idx < filter_width;
++filter_col_idx) {
int im_col_offset =
col_col_idx * stride[1] + filter_col_idx - padding[1];
int col_offset =
((((col_row_idx)*col_width + col_col_idx) * im_channels +
channel) *
filter_height +
filter_row_idx) *
filter_width +
filter_col_idx;
int im_offset = (channel * im_height + im_row_offset) * im_width +
im_col_offset;
col_data[col_offset] =
(im_row_offset < 0 || im_row_offset >= im_height ||
im_col_offset < 0 || im_col_offset >= im_width)
? static_cast<T>(0)
: im_data[im_offset];
}
}
}
}
}
}
};
/*
* im = [input_channels, input_height, input_width]
* col =
* [output_height, output_width, input_channels, filter_height, filter_width]
*/
template <class T>
class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
platform::CPUDeviceContext, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& col,
const std::vector<int>& dilation,
const std::vector<int>& stride,
const std::vector<int>& padding, framework::Tensor* im,
const DataLayout data_layout) {
PADDLE_ENFORCE_EQ(im->dims().size(), 3, "The dimension of im should be 3.");
PADDLE_ENFORCE_EQ(col.dims().size(), 5,
"The dimension of col should be 5.");
int im_channels = im->dims()[0];
int im_height = im->dims()[1];
int im_width = im->dims()[2];
int filter_height = col.dims()[3];
int filter_width = col.dims()[4];
int col_height = col.dims()[0];
int col_width = col.dims()[1];
PADDLE_ENFORCE_EQ(
(im_height + padding[0] + padding[2] - filter_height) / stride[0] + 1,
col_height,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent.");
PADDLE_ENFORCE_EQ(
(im_width + padding[1] + padding[3] - filter_width) / stride[1] + 1,
col_width,
"col_width and padding(padding_left, padding_right) are "
"inconsistent.");
T* im_data = im->data<T>();
const T* col_data = col.data<T>();
for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) {
for (int col_col_idx = 0; col_col_idx < col_width; ++col_col_idx) {
for (int channel = 0; channel < im_channels; ++channel) {
for (int filter_row_idx = 0; filter_row_idx < filter_height;
++filter_row_idx) {
int im_row_offset =
col_row_idx * stride[0] + filter_row_idx - padding[0];
for (int filter_col_idx = 0; filter_col_idx < filter_width;
++filter_col_idx) {
int im_col_offset =
col_col_idx * stride[1] + filter_col_idx - padding[1];
int col_offset =
(((col_row_idx * col_width + col_col_idx) * im_channels +
channel) *
filter_height +
filter_row_idx) *
filter_width +
filter_col_idx;
if (im_row_offset >= 0 && im_row_offset < im_height &&
im_col_offset >= 0 && im_col_offset < im_width) {
int im_offset =
(channel * im_height + im_row_offset) * im_width +
im_col_offset;
im_data[im_offset] += col_data[col_offset];
}
}
}
}
}
}
}
};
template class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
platform::CPUDeviceContext, int64_t>;
template class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
platform::CPUDeviceContext, int64_t>;
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/device_context.h"
namespace paddle {
namespace operators {
namespace math {
using DataLayout = framework::DataLayout;
/* The storage format of the coldata in the Im2ColFunctor and Col2ImFunctor. */
enum class ColFormat { kCFO = 0, kOCF = 1 };
/*
* \brief Converts the image data of three dimensions(CHW) into a colData of
* five dimensions in the Im2ColFunctor calculation,
* And in the Col2ImFunctor calculation, it is reversed.
*
* \param imData Image data.
* \param imShape The shape of imData,
* [input_channels, input_height, input_width].
* \param colData Column data.
* \param colShape The shape of colData.
*
* \param dilations dilation data.
* \param 2-dimension [dilation_height, dilation_width].
*
* \param strides stride data.
* \param 2-dimension [stride_height, stride_width].
*
* \param paddings padding data.
* \param 4-dimension [up_pad, left_pad, down_pad, right_pad].
*
* If the template argument Format is kCFO, the shape of colData is:
* [input_channels, filter_height, filter_width, output_height, output_width]
* So, it is easy to reshape into a convolution matrix for convolution
* calculation based on matrix multiplication.
* The shape of convolution matrix is [height, width], where the height is equal
* input_channels * filter_height * filter_width, and the width is equal
* output_height * output_width.
*
* Reshape:
* shape of colData shape of convolution matrix
* [input_channels,
* filter_height,
* filter_width, ======> [height, width]
* output_height,
* output_width]
*
* If the template argument Format is kOCF, the shape of colData is:
* [output_height, output_width, input_channels, filter_height, filter_width]
* So, it is easy to reshape into a sequence matrix for rnn calculation.
* The shape of sequence matrix is [seq_length, step_size], where the seq_length
* is equal output_height * output_width, and the step_size is equal
* input_channels * filter_height * filter_width.
*
* Reshape:
* shape of colData shape of sequence matrix
* [output_height,
* output_width,
* input_channels, ======> [seqLength, stepSize]
* filter_height,
* filter_width]
*
* \note The caller needs to ensure that imShape.inputChannels is equal to
* colShape.inputChannels.
*/
template <ColFormat Format, typename DeviceContext, typename T>
class Im2ColFunctor {
public:
void operator()(const DeviceContext& context, const framework::Tensor& im,
const std::vector<int>& dilation,
const std::vector<int>& stride,
const std::vector<int>& padding, framework::Tensor* col,
const DataLayout data_layout = DataLayout::kNCHW);
};
template <ColFormat Format, typename DeviceContext, typename T>
class Col2ImFunctor {
public:
void operator()(const DeviceContext& context, const framework::Tensor& col,
const std::vector<int>& dilation,
const std::vector<int>& stride,
const std::vector<int>& padding, framework::Tensor* im,
const DataLayout data_layout = DataLayout::kNCHW);
};
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/tensor.h"
namespace paddle {
namespace operators {
namespace math {
/**
* The most common im2col algorithm.
* Support dilation, stride and padding.
*/
template <typename T>
inline void im2col_common(const framework::Tensor& im,
const std::vector<int>& dilation,
const std::vector<int>& stride,
const std::vector<int>& padding,
framework::Tensor* col,
const DataLayout data_layout = DataLayout::kNCHW) {
int im_channels =
(data_layout != DataLayout::kNHWC ? im.dims()[0] : im.dims()[2]);
int im_height =
(data_layout != DataLayout::kNHWC ? im.dims()[1] : im.dims()[0]);
int im_width =
(data_layout != DataLayout::kNHWC ? im.dims()[2] : im.dims()[1]);
int filter_height = col->dims()[1];
int filter_width = col->dims()[2];
int output_height = col->dims()[3];
int output_width = col->dims()[4];
int channels_col = im_channels * filter_height * filter_width;
const T* im_data = im.data<T>();
T* col_data = col->data<T>();
for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width;
int h_offset = (c / filter_width) % filter_height;
int c_im = c / (filter_width * filter_height);
for (int h = 0; h < output_height; ++h) {
int im_row_idx = h * stride[0] - padding[0] + h_offset * dilation[0];
for (int w = 0; w < output_width; ++w) {
int im_col_idx = w * stride[1] - padding[1] + w_offset * dilation[1];
int im_idx;
if (data_layout != DataLayout::kNHWC) {
im_idx = (im_row_idx + c_im * im_height) * im_width + im_col_idx;
} else {
im_idx = (im_row_idx * im_width + im_col_idx) * im_channels + c_im;
}
int col_idx = (c * output_height + h) * output_width + w;
col_data[col_idx] = (im_row_idx < 0 || im_row_idx >= im_height ||
im_col_idx < 0 || im_col_idx >= im_width)
? static_cast<T>(0)
: im_data[im_idx];
}
}
}
}
/**
* im2col algorithm with strides == 1, dilations == 1, paddings == 0
*/
template <typename T>
inline void im2col_sh1sw1dh1dw1ph0pw0(
const framework::Tensor& im, framework::Tensor* col,
const DataLayout data_layout = DataLayout::kNCHW) {
int im_channels =
(data_layout != DataLayout::kNHWC ? im.dims()[0] : im.dims()[2]);
int im_height =
(data_layout != DataLayout::kNHWC ? im.dims()[1] : im.dims()[0]);
int im_width =
(data_layout != DataLayout::kNHWC ? im.dims()[2] : im.dims()[1]);
int filter_height = col->dims()[1];
int filter_width = col->dims()[2];
int output_height = col->dims()[3];
int output_width = col->dims()[4];
const T* im_data = im.data<T>();
T* col_data = col->data<T>();
int col_matrix_width = output_width * output_height;
int im_size = im_height * im_width;
size_t copy_size = sizeof(T) * output_width;
const T* im_data_oh = im_data;
T* dst_data_oh = col_data;
for (int oh = 0; oh < output_height; ++oh) {
const T* src_data_ic = im_data_oh;
T* dst_data = dst_data_oh;
for (int ic = 0; ic < im_channels; ++ic) {
const T* src_data = src_data_ic;
for (int kh = 0; kh < filter_height; ++kh) {
for (int kw = 0; kw < filter_width; ++kw) {
if (data_layout != DataLayout::kNHWC) {
std::memcpy(dst_data, src_data + kw, copy_size);
} else {
for (int kow = 0; kow < output_width; ++kow) {
dst_data[kow] =
im_data[((oh + kh) * im_width + kw + kow) * im_channels + ic];
}
}
dst_data = dst_data + col_matrix_width;
}
src_data = src_data + im_width;
}
src_data_ic = src_data_ic + im_size;
}
im_data_oh = im_data_oh + im_width;
dst_data_oh = dst_data_oh + output_width;
}
}
/**
* im2col algorithm with strides == 1, dilations == 1, paddings == 1
* and filter_width == 1 have a special implementation
*/
template <typename T>
inline void im2col_sh1sw1dh1dw1ph1pw1(const framework::Tensor& im,
framework::Tensor* col,
const DataLayout data_layout) {
int im_channels =
(data_layout != DataLayout::kNHWC ? im.dims()[0] : im.dims()[2]);
int im_height =
(data_layout != DataLayout::kNHWC ? im.dims()[1] : im.dims()[0]);
int im_width =
(data_layout != DataLayout::kNHWC ? im.dims()[2] : im.dims()[1]);
int filter_height = col->dims()[1];
int filter_width = col->dims()[2];
int output_height = col->dims()[3];
int output_width = col->dims()[4];
constexpr int plh = 1;
constexpr int prh = 1;
constexpr int plw = 1;
constexpr int prw = 1;
const T* im_data = im.data<T>();
T* col_data = col->data<T>();
int im_size = im_height * im_width;
int col_matrix_width = output_width * output_height;
int col_block_fh = filter_width * col_matrix_width; // fw*oh*ow
int col_block_ic = filter_height * col_block_fh; // fh*fw*oh*ow
// fill height padding
{
size_t copy_size = sizeof(T) * output_width;
T* col_start_l = col_data;
T* col_start_r = col_data + (filter_height - 1) * col_block_fh +
col_matrix_width - output_width;
for (int ic = 0; ic < im_channels; ++ic) {
T* dst_data_l = col_start_l;
T* dst_data_r = col_start_r;
for (int kw = 0; kw < filter_width; ++kw) {
std::memset(dst_data_l, 0, copy_size);
std::memset(dst_data_r, 0, copy_size);
dst_data_l = dst_data_l + col_matrix_width;
dst_data_r = dst_data_r + col_matrix_width;
}
col_start_l = col_start_l + col_block_ic;
col_start_r = col_start_r + col_block_ic;
}
}
auto pad = static_cast<T>(0);
if (filter_width == 1) {
// fill width padding
T* dst_data_ic = col_data;
for (int ic = 0; ic < im_channels; ++ic) {
T* dst_data_kh = dst_data_ic;
for (int kh = 0; kh < filter_height; ++kh) {
T* dst_data = dst_data_kh;
for (int oh = 0; oh < output_height; ++oh) {
*dst_data = pad;
dst_data = dst_data + output_width - 1;
*dst_data = pad;
++dst_data;
}
dst_data_kh = dst_data_kh + col_block_fh;
}
dst_data_ic = dst_data_ic + col_block_ic;
}
// fill core
size_t copy_size = sizeof(T) * (output_width - plw - prw);
for (int oh = 0; oh < output_height; ++oh) {
const T* im_data_start =
im_data + (oh - plh > 0 ? oh - plh : 0) * im_width;
T* dst_data = col_data + oh * output_width;
for (int ic = 0; ic < im_channels; ++ic) {
const T* src_data = im_data_start + ic * im_size;
for (int kh = 0; kh < filter_height; ++kh) {
if ((oh < plh && kh < plh) || (oh > (output_height - prh - 1) &&
kh > (filter_height - prh - 1))) {
dst_data = dst_data + col_matrix_width;
continue;
}
if (data_layout != DataLayout::kNHWC) {
std::memcpy(dst_data + plw, src_data, copy_size);
} else {
for (int kow = 0; kow < output_width - plw - prw; ++kow) {
dst_data[plw + kow] =
im_data[(((oh - plh > 0 ? oh - plh : 0) + kh) * im_width +
kow) *
im_channels +
ic];
}
}
dst_data = dst_data + col_matrix_width;
src_data = src_data + im_width;
}
}
}
return;
}
// filter_width != 1
// fill width padding
T* dst_data_ic = col_data;
for (int ic = 0; ic < im_channels; ++ic) {
T* dst_data_kh = dst_data_ic;
for (int kh = 0; kh < filter_height; ++kh) {
for (T* dst_data :
{dst_data_kh, dst_data_kh + (filter_width - prw) * col_matrix_width +
output_width - 1}) {
// TODO(TJ): from plh, saving repeated assignment
for (int oh = 0; oh < output_height; ++oh) {
*dst_data = pad;
dst_data = dst_data + output_width;
}
}
dst_data_kh = dst_data_kh + col_block_fh;
}
dst_data_ic = dst_data_ic + col_block_ic;
}
// TODO(TJ): use array like: size_t copy_size[kw]={sizeof(T) *
// (output_width-1)}
// length of copy_size is equal kw.
for (int oh = 0; oh < output_height; ++oh) {
const T* im_data_start = im_data + (oh - plh > 0 ? oh - plh : 0) * im_width;
T* dst_data = col_data + oh * output_width;
for (int ic = 0; ic < im_channels; ++ic) {
const T* src_data = im_data_start + ic * im_size;
for (int kh = 0; kh < filter_height; ++kh) {
if ((oh < plh && kh < plh) || (oh > (output_height - prh - 1) &&
kh > (filter_height - prh - 1))) {
dst_data = dst_data + filter_width * col_matrix_width;
continue;
}
// TODO(TJ): reuse plw-kw outside this for
// try to unify
for (int kw = 0; kw < plw; ++kw) {
if (data_layout != DataLayout::kNHWC) {
std::memcpy(dst_data + (plw - kw), src_data,
sizeof(T) * (output_width - (plw - kw)));
} else {
for (int kow = 0; kow < output_width - (plw - kw); ++kow) {
dst_data[plw - kw + kow] =
im_data[(((oh - plh > 0 ? oh - plh : 0) + kh) * im_width +
kow) *
im_channels +
ic];
}
}
dst_data = dst_data + col_matrix_width;
}
for (int kw = plw; kw < filter_width - prw; ++kw) {
if (data_layout != DataLayout::kNHWC) {
std::memcpy(dst_data, src_data + (kw - plw),
sizeof(T) * output_width);
} else {
for (int kow = 0; kow < output_width; ++kow) {
dst_data[kow] =
im_data[(((oh - plh > 0 ? oh - plh : 0) + kh) * im_width +
kw - plw + kow) *
im_channels +
ic];
}
}
dst_data = dst_data + col_matrix_width;
}
int i = 1;
for (int kw = filter_width - prw; kw < filter_width; ++kw, ++i) {
if (data_layout != DataLayout::kNHWC) {
std::memcpy(dst_data, src_data + (kw - plw),
sizeof(T) * (output_width - i));
} else {
for (int kow = 0; kow < output_width - i; ++kow) {
dst_data[kow] =
im_data[(((oh - plh > 0 ? oh - plh : 0) + kh) * im_width +
kw - plw + kow) *
im_channels +
ic];
}
}
dst_data = dst_data + col_matrix_width;
}
src_data = src_data + im_width;
}
}
}
}
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "core/paddlefl_mpc/operators/math/math_function.h"
#include <vector>
#include "paddle/fluid/framework/data_type.h"
namespace paddle {
namespace operators {
namespace math {
template <typename T>
struct RowwiseAdd<platform::CPUDeviceContext, T> {
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& input,
const framework::Tensor& vector, framework::Tensor* output) {
auto in_dims = input.dims();
auto size = input.numel() / in_dims[0];
PADDLE_ENFORCE_EQ(vector.numel(), size);
PADDLE_ENFORCE_EQ(output->dims(), in_dims);
auto in = framework::EigenMatrix<T>::From(input);
auto vec = framework::EigenVector<T>::Flatten(vector);
auto out = framework::EigenMatrix<T>::From(*output);
for (int64_t i = 0; i < in_dims[0]; ++i) {
out.chip(i, 0) = in.chip(i, 0) + vec;
}
}
};
template struct RowwiseAdd<platform::CPUDeviceContext, int64_t>;
using float16 = paddle::platform::float16;
template struct SetConstant<platform::CPUDeviceContext, platform::float16>;
template struct SetConstant<platform::CPUDeviceContext, float>;
template struct SetConstant<platform::CPUDeviceContext, double>;
template struct SetConstant<platform::CPUDeviceContext, int>;
template struct SetConstant<platform::CPUDeviceContext, int64_t>;
template struct SetConstant<platform::CPUDeviceContext, bool>;
template struct SetConstant<platform::CPUDeviceContext, uint8_t>;
#define DEFINE_CPU_TRANS(RANK) \
template struct Transpose<platform::CPUDeviceContext, platform::float16, \
RANK>; \
template struct Transpose<platform::CPUDeviceContext, float, RANK>; \
template struct Transpose<platform::CPUDeviceContext, double, RANK>; \
template struct Transpose<platform::CPUDeviceContext, int, RANK>; \
template struct Transpose<platform::CPUDeviceContext, int64_t, RANK>; \
template struct Transpose<platform::CPUDeviceContext, bool, RANK>; \
template struct Transpose<platform::CPUDeviceContext, int16_t, RANK>; \
template struct Transpose<platform::CPUDeviceContext, uint8_t, RANK>; \
template struct Transpose<platform::CPUDeviceContext, int8_t, RANK>;
DEFINE_CPU_TRANS(1);
DEFINE_CPU_TRANS(2);
DEFINE_CPU_TRANS(3);
DEFINE_CPU_TRANS(4);
DEFINE_CPU_TRANS(5);
DEFINE_CPU_TRANS(6);
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cmath>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace operators {
namespace math {
template <typename DeviceContext, typename T>
struct RowwiseAdd {
void operator()(const DeviceContext& context, const framework::Tensor& input,
const framework::Tensor& vec, framework::Tensor* output);
};
template <typename DeviceContext, typename T>
struct SetConstant {
void operator()(const DeviceContext& context, framework::Tensor* tensor,
T num);
};
template <typename DeviceContext, typename T, int Rank>
struct Transpose {
void operator()(const DeviceContext& context, const framework::Tensor& in,
framework::Tensor* out, const std::vector<int>& axis);
};
template <typename DeviceContext, typename T>
struct ColwiseSum {
void operator()(const DeviceContext& context, const framework::Tensor& input,
framework::Tensor* vec);
};
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/data_type.h"
#include "core/paddlefl_mpc/operators/math/math_function.h"
namespace paddle {
namespace operators {
namespace math {
template <typename DeviceContext, typename T>
void SetConstant<DeviceContext, T>::operator()(const DeviceContext& context,
framework::Tensor* tensor,
T num) {
auto t = framework::EigenVector<T>::Flatten(*tensor);
t.device(*context.eigen_device()) = t.constant(static_cast<T>(num));
}
template <typename DeviceContext, typename T, int Rank>
void Transpose<DeviceContext, T, Rank>::operator()(
const DeviceContext& context, const framework::Tensor& in,
framework::Tensor* out, const std::vector<int>& axis) {
Eigen::array<int, Rank> permute;
for (int i = 0; i < Rank; i++) {
permute[i] = axis[i];
}
auto eigen_in = framework::EigenTensor<T, Rank>::From(in);
auto eigen_out = framework::EigenTensor<T, Rank>::From(*out);
auto* dev = context.eigen_device();
eigen_out.device(*dev) = eigen_in.shuffle(permute);
}
template <typename DeviceContext, typename T>
void ColwiseSum<DeviceContext, T>::operator()(const DeviceContext& context,
const framework::Tensor& input,
framework::Tensor* out) {
auto in_dims = input.dims();
auto size = input.numel() / in_dims[0];
PADDLE_ENFORCE_EQ(out->numel(), size);
auto in = framework::EigenMatrix<T>::From(input);
auto vec = framework::EigenVector<T>::Flatten(*out);
vec.device(*context.eigen_device()) = in.sum(Eigen::array<int, 1>({{0}}));
}
// Specialize for CPU, since Eigen implement a general reduce. However,
// colwise-sum can be easily implemented. General reduce has a huge overhead in
// CPU
template <typename T>
class ColwiseSum<platform::CPUDeviceContext, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& input, framework::Tensor* out) {
auto& in_dims = input.dims();
auto height = in_dims[0];
auto size = in_dims[1];
PADDLE_ENFORCE_EQ(out->numel(), size);
T* out_buf = out->mutable_data<T>(out->place());
const T* in_buf = input.data<T>();
for (size_t i = 0; i < static_cast<size_t>(height); ++i) {
for (size_t j = 0; j < static_cast<size_t>(size); ++j) {
if (i == 0) {
out_buf[j] = in_buf[i * size + j];
} else {
out_buf[j] += in_buf[i * size + j];
}
}
}
}
};
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "core/paddlefl_mpc/operators/math/sequence2batch.h"
namespace paddle {
namespace operators {
namespace math {
template <typename T>
class CopyMatrixRowsFunctor<platform::CPUDeviceContext, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& src,
framework::Vector<size_t> index_lod, framework::Tensor* dst,
bool is_src_index) {
size_t* index = index_lod.data();
auto src_dims = src.dims();
auto dst_dims = dst->dims();
PADDLE_ENFORCE_EQ(src_dims.size(), 2UL,
"The src must be matrix with rank 2.");
PADDLE_ENFORCE_EQ(dst_dims.size(), 2UL,
"The dst must be matrix with rank 2.");
PADDLE_ENFORCE_EQ(src_dims[1], dst_dims[1],
"The width of src and dst must be same.");
auto height = dst_dims[0];
auto width = dst_dims[1];
auto* src_data = src.data<T>();
auto* dst_data = dst->data<T>();
const int sz = width * sizeof(T);
if (is_src_index) {
for (int i = 0; i < height; ++i) {
memcpy(dst_data + i * width, src_data + index[i] * width, sz);
}
} else {
for (int i = 0; i < height; ++i) {
memcpy(dst_data + index[i] * width, src_data + i * width, sz);
}
}
}
};
template class CopyMatrixRowsFunctor<platform::CPUDeviceContext, int64_t>;
template class LoDTensor2BatchFunctor<platform::CPUDeviceContext, int64_t>;
template class Batch2LoDTensorFunctor<platform::CPUDeviceContext, int64_t>;
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/device_context.h"
namespace paddle {
namespace operators {
namespace math {
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
template <typename DeviceContext, typename T>
class CopyMatrixRowsFunctor {
public:
// If is_src_index is true,
// copy the indexed rows of input src to the output dst.
// If is_src_index is false,
// copy the input src to the indexed rows of output dst.
// The indexed rows are based on the input index.
void operator()(const DeviceContext& context, const framework::Tensor& src,
framework::Vector<size_t> index_lod, framework::Tensor* dst,
bool is_src_index);
};
template <typename DeviceContext, typename T>
class LoDTensor2BatchFunctor {
// Calculate the length of each sequence and
// sort sequence index by the length.
// example: sequences = {s0, s1, s2}
// s0: 0 0 0 0, s1: 1 1 1 1 1, s2: 2 2 2
// seq_info[3] = {(4, 5, 1), (0, 4, 0), (9, 3, 2)}
//
struct SeqInfo {
SeqInfo(size_t start, size_t length, size_t seq_idx)
: start(start), length(length), seq_idx(seq_idx) {}
size_t start;
size_t length;
size_t seq_idx;
};
public:
void operator()(const DeviceContext& context,
const framework::LoDTensor& lod_tensor,
framework::LoDTensor* batch, bool is_cal_batch_lod,
bool is_reverse = false) const {
if (!is_cal_batch_lod) {
auto lods = batch->lod();
PADDLE_ENFORCE_GT(lods.size(), 2UL,
"The LoD of LoDTensor should inlcude at least 2-level "
"sequence information.");
PADDLE_ENFORCE_EQ(
lods[1].size(), static_cast<size_t>(lod_tensor.dims()[0]),
"The LoD information should be consistent with the dims.");
CopyMatrixRowsFunctor<DeviceContext, T> to_batch;
to_batch(context, lod_tensor, lods[1], batch, true);
return;
}
auto lods = lod_tensor.lod();
PADDLE_ENFORCE_EQ(lods.size(), 1UL, "Only support one level sequence now.");
const auto& lod = lods[0];
std::vector<SeqInfo> seq_info;
for (size_t seq_id = 0; seq_id < lod.size() - 1; ++seq_id) {
size_t length = lod[seq_id + 1] - lod[seq_id];
seq_info.emplace_back(lod[seq_id], length, seq_id);
}
std::sort(seq_info.begin(), seq_info.end(),
[](SeqInfo a, SeqInfo b) {
return a.length > b.length;
});
// Calculate the start position of each batch.
// example: sequences = {s0, s1, s2}
// s0: 0 0 0 0, s1: 1 1 1 1 1, s2: 2 2 2
// max_seqlen = 5,
// batchIndex = {b0, b1, b2, b3, b4}
// b0: 1 0 2, b1: 1 0 2, b2: 1 0 2, b3: 1 0, b4: 1
// batch_start_positions[6] = {0, 3, 6, 9, 11, 12}
// batch_start_positions[0] = len(b0)
// batch_start_positions[1] = len(b0) + len(b1)
// batch_start_positions[2] = len(b0) + len(b1) + len(b2)
// ...
// seq2batch_idx[12] = {4, 0, 9,
// 5, 1, 10,
// 6, 2, 11,
// 7, 3,
// 8}
// seq_order = {1, 0, 2}, the sort order.
// where 1 is the second sequence,
// 0 is the first sequence,
// 2 is the third sequence.
// The max_seqlen represents batch size after rearranging the
// input LodTensor. It is also the maximum length of input sequence.
paddle::framework::LoD batch_lods;
batch_lods.emplace_back(std::vector<size_t> {0});
batch_lods.emplace_back(std::vector<size_t> {0});
batch_lods.emplace_back(std::vector<size_t> {0});
// batch_lods[0] is the start positions for batch LoDTensor
size_t max_seqlen = seq_info[0].length;
batch_lods[0].resize(max_seqlen + 1);
// batch_lods[1] is the raw index in the input LoDTensor
batch_lods[1].resize(static_cast<size_t>(lod_tensor.dims()[0]));
// batch_lods[2] is the sort order for the input LoDTensor.
batch_lods[2].resize(seq_info.size());
size_t* batch_starts = batch_lods[0].data();
size_t* seq2batch_idx = batch_lods[1].data();
batch_starts[0] = 0;
for (size_t n = 0; n < max_seqlen; n++) {
size_t batch_id = batch_starts[n];
for (size_t i = 0; i < seq_info.size(); ++i) {
size_t seq_len = seq_info[i].length;
size_t start = seq_info[i].start;
if (n < seq_len) {
seq2batch_idx[batch_id] =
is_reverse ? start + seq_len - 1 - n : start + n;
batch_id++;
} else {
break;
}
}
batch_starts[n + 1] = batch_id;
}
size_t* seq_order = batch_lods[2].data();
for (size_t i = 0; i < seq_info.size(); ++i) {
seq_order[i] = seq_info[i].seq_idx;
}
batch->set_lod(batch_lods);
CopyMatrixRowsFunctor<DeviceContext, T> to_batch;
to_batch(context, lod_tensor, batch_lods[1], batch, true);
}
};
template <typename DeviceContext, typename T>
class Batch2LoDTensorFunctor {
public:
void operator()(const DeviceContext& context,
const framework::LoDTensor& batch,
framework::LoDTensor* lod_tensor) const {
auto in_lod = batch.lod();
PADDLE_ENFORCE_GT(in_lod.size(), 2UL,
"The LoD of LoDTensor should inlcude at least 2-level "
"sequence information.");
PADDLE_ENFORCE_EQ(
in_lod[1].size(), static_cast<size_t>(lod_tensor->dims()[0]),
"The LoD information should be consistent with the dims.");
CopyMatrixRowsFunctor<DeviceContext, T> to_seq;
to_seq(context, batch, in_lod[1], lod_tensor, false);
}
};
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "./vol2col.h"
#include <vector>
namespace paddle {
namespace operators {
namespace math {
/*
* vol = [input_channels, input_depth, input_height, input_width]
* col =
* [input_channels, filter_depth, filter_height, filter_width,
* output_depth, output_height, output_width]
*/
template <class T>
class Vol2ColFunctor<platform::CPUDeviceContext, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& vol,
const std::vector<int>& dilations,
const std::vector<int>& strides,
const std::vector<int>& paddings, framework::Tensor* col,
const DataLayout data_layout) const {
PADDLE_ENFORCE_EQ(vol.dims().size(), 4,
"The dimension of vol should be 4.");
PADDLE_ENFORCE_EQ(col->dims().size(), 7,
"The dimension of col should be 7.");
int input_channels =
(data_layout != DataLayout::kNHWC ? vol.dims()[0] : vol.dims()[3]);
int input_depth =
(data_layout != DataLayout::kNHWC ? vol.dims()[1] : vol.dims()[0]);
int input_height =
(data_layout != DataLayout::kNHWC ? vol.dims()[2] : vol.dims()[1]);
int input_width =
(data_layout != DataLayout::kNHWC ? vol.dims()[3] : vol.dims()[2]);
int filter_depth = col->dims()[1];
int filter_height = col->dims()[2];
int filter_width = col->dims()[3];
int output_depth = col->dims()[4];
int output_height = col->dims()[5];
int output_width = col->dims()[6];
int channels_col =
input_channels * filter_depth * filter_height * filter_width;
// changed
bool paddings_size_is_6 = (paddings.size() == 6);
int pad_d_forth = paddings_size_is_6 ? paddings[0] : paddings[0];
int pad_d_back = paddings_size_is_6 ? paddings[1] : paddings[0];
int pad_h_up = paddings_size_is_6 ? paddings[2] : paddings[1];
int pad_h_down = paddings_size_is_6 ? paddings[3] : paddings[1];
int pad_w_left = paddings_size_is_6 ? paddings[4] : paddings[2];
int pad_w_right = paddings_size_is_6 ? paddings[5] : paddings[2];
PADDLE_ENFORCE_EQ((input_depth + pad_d_forth + pad_d_back -
((dilations[0] * (filter_depth - 1) + 1))) /
strides[0] +
1,
output_depth,
"input_depth and output_depth are "
"mismatching.");
PADDLE_ENFORCE_EQ((input_height + pad_h_up + pad_h_down -
((dilations[1] * (filter_height - 1) + 1))) /
strides[1] +
1,
output_height,
"input_height and output_height are "
"mismatching.");
PADDLE_ENFORCE_EQ((input_width + pad_w_left + pad_w_right -
((dilations[2] * (filter_width - 1) + 1))) /
strides[2] +
1,
output_width,
"input_width and output_width are "
"mismatching.");
const T* vol_data = vol.data<T>();
T* col_data = col->data<T>();
for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width;
int h_offset = (c / filter_width) % filter_height;
int d_offset = (c / filter_width / filter_height) % filter_depth;
int c_in = c / filter_width / filter_height / filter_depth;
for (int d = 0; d < output_depth; ++d) {
int d_pad = d * strides[0] - pad_d_forth + d_offset * dilations[0];
for (int h = 0; h < output_height; ++h) {
int h_pad = h * strides[1] - pad_h_up + h_offset * dilations[1];
for (int w = 0; w < output_width; ++w) {
int w_pad = w * strides[2] - pad_w_left + w_offset * dilations[2];
int col_idx =
((c * output_depth + d) * output_height + h) * output_width + w;
int vol_idx;
if (data_layout != DataLayout::kNHWC) {
vol_idx = ((c_in * input_depth + d_pad) * input_height + h_pad) *
input_width +
w_pad;
} else {
vol_idx = ((d_pad * input_height + h_pad) * input_width + w_pad) *
input_channels +
c_in;
}
col_data[col_idx] =
(h_pad < 0 || h_pad >= input_height || w_pad < 0 ||
w_pad >= input_width || d_pad < 0 || d_pad >= input_depth)
? static_cast<T>(0)
: vol_data[vol_idx];
}
}
}
}
}
};
/*
* vol = [input_channels,input_depth, input_height, input_width]
* col =
* [input_channels, filter_depth, filter_height, filter_width,
* output_depth, output_height, output_width]
*/
template <class T>
class Col2VolFunctor<platform::CPUDeviceContext, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& col,
const std::vector<int>& dilations,
const std::vector<int>& strides,
const std::vector<int>& paddings, framework::Tensor* vol,
const DataLayout data_layout) const {
PADDLE_ENFORCE_EQ(vol->dims().size(), 4,
"The dimension of vol should be 4.");
PADDLE_ENFORCE_EQ(col.dims().size(), 7,
"The dimension of col should be 7.");
int input_channels =
(data_layout != DataLayout::kNHWC ? vol->dims()[0] : vol->dims()[3]);
int input_depth =
(data_layout != DataLayout::kNHWC ? vol->dims()[1] : vol->dims()[0]);
int input_height =
(data_layout != DataLayout::kNHWC ? vol->dims()[2] : vol->dims()[1]);
int input_width =
(data_layout != DataLayout::kNHWC ? vol->dims()[3] : vol->dims()[2]);
int filter_depth = col.dims()[1];
int filter_height = col.dims()[2];
int filter_width = col.dims()[3];
int output_depth = col.dims()[4];
int output_height = col.dims()[5];
int output_width = col.dims()[6];
int channels_col =
input_channels * filter_depth * filter_height * filter_width;
bool paddings_size_is_6 = (paddings.size() == 6);
int pad_d_forth = paddings_size_is_6 ? paddings[0] : paddings[0];
int pad_d_back = paddings_size_is_6 ? paddings[1] : paddings[0];
int pad_h_up = paddings_size_is_6 ? paddings[2] : paddings[1];
int pad_h_down = paddings_size_is_6 ? paddings[3] : paddings[1];
int pad_w_left = paddings_size_is_6 ? paddings[4] : paddings[2];
int pad_w_right = paddings_size_is_6 ? paddings[5] : paddings[2];
PADDLE_ENFORCE_EQ((input_depth + pad_d_forth + pad_d_back -
((dilations[0] * (filter_depth - 1) + 1))) /
strides[0] +
1,
output_depth,
"input_depth and output_depth are "
"mismatching.");
PADDLE_ENFORCE_EQ((input_height + pad_h_up + pad_h_down -
((dilations[1] * (filter_height - 1) + 1))) /
strides[1] +
1,
output_height,
"input_height and output_height are "
"mismatching.");
PADDLE_ENFORCE_EQ((input_width + pad_w_left + pad_w_right -
((dilations[2] * (filter_width - 1) + 1))) /
strides[2] +
1,
output_width,
"input_width and output_width are "
"mismatching.");
T* vol_data = vol->data<T>();
const T* col_data = col.data<T>();
for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width;
int h_offset = (c / filter_width) % filter_height;
int d_offset = (c / filter_width / filter_height) % filter_depth;
int cIm = c / filter_width / filter_height / filter_depth;
for (int d = 0; d < output_depth; ++d) {
int d_pad = d * strides[0] - pad_d_forth + d_offset * dilations[0];
for (int h = 0; h < output_height; ++h) {
int h_pad = h * strides[1] - pad_h_up + h_offset * dilations[1];
for (int w = 0; w < output_width; ++w) {
int w_pad = w * strides[2] - pad_w_left + w_offset * dilations[2];
if (h_pad >= 0 && h_pad < input_height && w_pad >= 0 &&
w_pad < input_width && d_pad >= 0 && d_pad < input_depth) {
int vol_idx;
if (data_layout != DataLayout::kNHWC) {
vol_idx = ((cIm * input_depth + d_pad) * input_height + h_pad) *
input_width +
w_pad;
} else {
vol_idx =
((d_pad * input_height + h_pad) * input_width + w_pad) *
input_channels +
cIm;
}
int col_idx =
((c * output_depth + d) * output_height + h) * output_width +
w;
vol_data[vol_idx] += col_data[col_idx];
}
}
}
}
}
}
};
template class Vol2ColFunctor<platform::CPUDeviceContext, int64_t>;
template class Col2VolFunctor<platform::CPUDeviceContext, int64_t>;
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/device_context.h"
namespace paddle {
namespace operators {
namespace math {
using DataLayout = framework::DataLayout;
/*
* \brief Converts the feature data of four dimensions(CDHW) into a colData of
* seven dimensions in the Vol2ColFunctor calculation,
* And in the Col2VolFunctor calculation, it is reversed.
*
* \param volData Vol data.
* \param volShape The shape of volData,
* [input_channels, input_depth, input_height, input_width].
* \param colData Column data.
* \param colShape The shape of colData.
*
* \param dilations dilation data.
* \param 3-dimension [dilation_depth, dilation_height, dilation_width].
*
* \param strides stride data.
* \param 3-dimension [stride_depth, stride_height, stride_width].
*
* \param paddings padding data.
* \param 3-dimension [d_pad, h_pad, w_pad].
*
* The shape of colData is:
* [input_channels, filter_depth, filter_height, filter_width, output_depth,
* output_height, output_width]
* So, it is easy to reshape into a convolution matrix for convolution
* calculation based on matrix multiplication.
* The shape of convolution matrix is [height, width], where the height is equal
* input_channels * filter_depth * filter_height * filter_width, and the width
* is equal output_depth * output_height * output_width.
*
* Reshape:
* shape of colData shape of convolution matrix
* [input_channels,
* filter_depth,
* filter_height,
* filter_width, ======> [height, width]
* output_depth,
* output_height,
* output_width]
*
* \note The caller needs to ensure that volShape.inputChannels is equal to
* colShape.inputChannels.
*/
template <typename DeviceContext, typename T>
class Vol2ColFunctor {
public:
void operator()(const DeviceContext& context, const framework::Tensor& vol,
const std::vector<int>& dilations,
const std::vector<int>& strides,
const std::vector<int>& paddings, framework::Tensor* col,
const DataLayout data_layout = DataLayout::kNCHW) const;
};
template <typename DeviceContext, typename T>
class Col2VolFunctor {
public:
void operator()(const DeviceContext& context, const framework::Tensor& col,
const std::vector<int>& dilations,
const std::vector<int>& strides,
const std::vector<int>& paddings, framework::Tensor* vol,
const DataLayout data_layout = DataLayout::kNCHW) const;
};
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "mpc_adam_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include <string>
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
class MpcAdamOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override;
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override;
framework::OpKernelType GetKernelTypeForVar(
const std::string& var_name, const framework::Tensor& tensor,
const framework::OpKernelType& expected_kernel_type) const override;
};
void MpcAdamOp::InferShape(framework::InferShapeContext *ctx) const {
PADDLE_ENFORCE_EQ(
ctx->HasInput("Param"), true,
platform::errors::NotFound("Input(Param) of AdamOp should not be null."));
PADDLE_ENFORCE_EQ(
ctx->HasInput("Grad"), true,
platform::errors::NotFound("Input(Grad) of AdamOp should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasInput("Moment1"), true,
platform::errors::NotFound(
"Input(Moment1) of AdamOp should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasInput("Moment2"), true,
platform::errors::NotFound(
"Input(Moment2) of AdamOp should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasInput("LearningRate"), true,
platform::errors::NotFound(
"Input(LearningRate) of AdamOp should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasInput("Beta1Pow"), true,
platform::errors::NotFound(
"Input(Beta1Pow) of AdamOp should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasInput("Beta2Pow"), true,
platform::errors::NotFound(
"Input(Beta2Pow) of AdamOp should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasOutput("ParamOut"), true,
platform::errors::NotFound(
"Output(ParamOut) of AdamOp should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasOutput("Moment1Out"), true,
platform::errors::NotFound(
"Output(Moment1Out) of AdamOp should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasOutput("Moment2Out"), true,
platform::errors::NotFound(
"Output(Moment2Out) of AdamOp should not be null."));
auto lr_dims = ctx->GetInputDim("LearningRate");
PADDLE_ENFORCE_NE(
framework::product(lr_dims), 0,
platform::errors::InvalidArgument(
"The number of LearningRate shall not be 0, but received %d. Maybe "
"the Input variable LearningRate has not "
"been initialized. You may need to confirm "
"if you put exe.run(startup_program) "
"after optimizer.minimize function.",
framework::product(lr_dims)));
PADDLE_ENFORCE_EQ(
framework::product(lr_dims), 1,
platform::errors::InvalidArgument(
"Learning rate should have 1 dimension, but received %d",
framework::product(lr_dims)));
auto beta1_pow_dims = ctx->GetInputDim("Beta1Pow");
VLOG(3) << "dims of Beta1Pow : [" << beta1_pow_dims << "]";
PADDLE_ENFORCE_GE(framework::product(beta1_pow_dims), 1,
platform::errors::InvalidArgument(
"The size of Beta1 power accumulator should be greater "
"than 0, but received %d.",
framework::product(beta1_pow_dims)));
auto beta2_pow_dims = ctx->GetInputDim("Beta2Pow");
VLOG(3) << "dims of Beta2Pow : [" << beta2_pow_dims << "]";
PADDLE_ENFORCE_GE(framework::product(beta2_pow_dims), 1,
platform::errors::InvalidArgument(
"The size of Beta2 power accumulator should be greater "
"than 0, but received %d.",
framework::product(beta2_pow_dims)));
auto param_dims = ctx->GetInputDim("Param");
if (ctx->GetInputsVarType("Grad")[0] ==
framework::proto::VarType::LOD_TENSOR) {
PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Grad"),
platform::errors::InvalidArgument(
"Param and Grad input of AdamOp should have same dimension. But "
"received Param dims: [%s], Grad dims: [%s].",
param_dims, ctx->GetInputDim("Grad")));
}
PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Moment1"),
platform::errors::InvalidArgument(
"Param and Moment1 input of AdamOp should have same dimension. But "
"received Param dims: [%s], Moment1 dims: [%s].",
param_dims, ctx->GetInputDim("Moment1")));
PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Moment2"),
platform::errors::InvalidArgument(
"Param and Moment2 input of AdamOp should have same dimension. But "
"received Param dims: [%s], Moment2 dims: [%s].",
param_dims, ctx->GetInputDim("Moment2")));
ctx->SetOutputDim("ParamOut", param_dims);
ctx->SetOutputDim("Moment1Out", param_dims);
ctx->SetOutputDim("Moment2Out", param_dims);
ctx->SetOutputDim("Beta1PowOut", beta1_pow_dims);
ctx->SetOutputDim("Beta2PowOut", beta2_pow_dims);
}
framework::OpKernelType MpcAdamOp::GetExpectedKernelType(
const framework::ExecutionContext &ctx) const {
auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Param");
return framework::OpKernelType(input_data_type, ctx.GetPlace());
}
framework::OpKernelType MpcAdamOp::GetKernelTypeForVar(
const std::string &var_name, const framework::Tensor &tensor,
const framework::OpKernelType &expected_kernel_type) const {
if (var_name == "Beta1Pow" || var_name == "Beta2Pow") {
return expected_kernel_type;
} else {
return framework::OpKernelType(expected_kernel_type.data_type_,
tensor.place(), tensor.layout());
}
}
class MpcAdamOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("Param", "(Tensor) Input parameter");
AddInput("Grad", "(Tensor) Input gradient");
AddInput("LearningRate", "(Tensor) Learning rate");
AddInput("Moment1", "(Tensor) Input first moment");
AddInput("Moment2", "(Tensor) Input second moment");
AddInput("Beta1Pow", "(Tensor) Input beta1 power accumulator");
AddInput("Beta2Pow", "(Tensor) Input beta2 power accumulator");
AddInput("Beta1Tensor",
"(Tensor<float32>, optional) If provided, Adam will use this "
"as beta1, this has a higher priority than attr(beta1), the "
"shape of this tensor MUST BE [1].")
.AsDispensable();
AddInput("Beta2Tensor",
"(Tensor<float32>, optional) If provided, Adam will use this "
"as beta2, this has a higher priority than attr(beta2), the "
"shape of this tensor MUST BE [1].")
.AsDispensable();
AddOutput("ParamOut", "(Tensor) Output parameter");
AddOutput("Moment1Out", "(Tensor) Output first moment");
AddOutput("Moment2Out", "(Tensor) Output second moment");
AddOutput("Beta1PowOut", "(Tensor) Output beta1 power accumulator");
AddOutput("Beta2PowOut", "(Tensor) Output beta2 power accumulator");
AddAttr<float>("beta1",
"(float, default 0.9) "
"Exponential decay rate for the "
"first moment estimates.")
.SetDefault(0.9f);
AddAttr<float>("beta2",
"(float, default 0.999) "
"exponential decay rate for the "
"second moment estimates.")
.SetDefault(0.999f);
AddAttr<float>("epsilon",
"(float, default 1.0e-4) "
"Constant for numerical stability")
.SetDefault(1.0e-4f);
AddComment(R"DOC(
Adam Optimizer.
This implements the Adam optimizer from Section 2 of the Adam
paper : https://arxiv.org/abs/1412.6980.
Adam is a first-order gradient-based optimization method based on
adaptive estimates of lower-order moments.
Adam updates:
$$
moment\_1\_out = \beta_1 * moment\_1 + (1 - \beta_1) * grad \\
moment\_2_\out = \beta_2 * moment\_2 + (1 - \beta_2) * grad * grad \\
learning\_rate = learning\_rate *
\frac{\sqrt{1 - \beta_{2\_pow}}}{1 - \beta_{1\_pow}} \\
param\_out = param - learning\_rate * \frac{moment\_1}{\sqrt{moment\_2} + \epsilon}
$$
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(
mpc_adam, ops::MpcAdamOp, ops::MpcAdamOpMaker);
REGISTER_OP_CPU_KERNEL(
mpc_adam,
ops::MpcAdamOpKernel<paddle::platform::CPUDeviceContext, int64_t, float>);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "mpc_op.h"
#include <math.h>
#include "./math/math_function.h"
#include "core/paddlefl_mpc/mpc_protocol/aby3_operators.h"
namespace paddle {
namespace operators {
static inline float GetAttrFromTensor(const framework::Tensor* tensor) {
const float* tensor_data = tensor->data<float>();
framework::Tensor cpu_tensor;
return tensor_data[0];
}
template <typename DeviceContext, typename T, typename T1>
class MpcAdamOpKernel : public MpcOpKernel<T> {
public:
void ComputeImpl(const framework::ExecutionContext &ctx) const override{
const auto* param_var = ctx.InputVar("Param");
PADDLE_ENFORCE(param_var->IsType<framework::LoDTensor>(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s",
ctx.InputNames("Param").front(),
framework::ToTypeName(param_var->Type()));
using paddle::framework::LoDTensor;
T1 epsilon = static_cast<T1>(ctx.Attr<float>("epsilon"));
auto* param = ctx.Input<LoDTensor>("Param");
auto* grad_var = ctx.InputVar("Grad");
auto* mom1 = ctx.Input<LoDTensor>("Moment1");
auto* mom2 = ctx.Input<LoDTensor>("Moment2");
auto* lr = ctx.Input<LoDTensor>("LearningRate");
auto* beta1_pow = ctx.Input<LoDTensor>("Beta1Pow");
auto* beta2_pow = ctx.Input<LoDTensor>("Beta2Pow");
auto* param_out = ctx.Output<LoDTensor>("ParamOut");
auto* mom1_out = ctx.Output<LoDTensor>("Moment1Out");
auto* mom2_out = ctx.Output<LoDTensor>("Moment2Out");
auto* beta1_pow_out = ctx.Output<LoDTensor>("Beta1PowOut");
auto* beta2_pow_out = ctx.Output<LoDTensor>("Beta2PowOut");
T1 beta1 = static_cast<T1>(ctx.Attr<float>("beta1"));
if (ctx.HasInput("Beta1Tensor")) {
auto* beta1_tensor = ctx.Input<framework::Tensor>("Beta1Tensor");
PADDLE_ENFORCE_EQ(beta1_tensor->numel(), 1,
platform::errors::InvalidArgument(
"Input(Beta1Tensor) size must be 1, but get %d",
beta1_tensor->numel()));
beta1 = static_cast<T1>(GetAttrFromTensor(beta1_tensor));
}
T1 beta2 = static_cast<T1>(ctx.Attr<float>("beta2"));
if (ctx.HasInput("Beta2Tensor")) {
auto* beta2_tensor = ctx.Input<framework::Tensor>("Beta2Tensor");
PADDLE_ENFORCE_EQ(beta2_tensor->numel(), 1,
platform::errors::InvalidArgument(
"Input(Beta2Tensor) size must be 1, but get %d",
beta2_tensor->numel()));
beta2 = static_cast<T1>(GetAttrFromTensor(beta2_tensor));
}
VLOG(3) << "beta1_pow.numel() : " << beta1_pow->numel()
<< "beta2_pow.numel() : " << beta2_pow->numel();
VLOG(3) << "param.numel(): " << param->numel();
PADDLE_ENFORCE_EQ(beta1_pow_out->numel(), 1,
platform::errors::InvalidArgument(
"beta1 pow output size should be 1, but received "
"value is:%d.",
beta1_pow_out->numel()));
PADDLE_ENFORCE_EQ(beta2_pow_out->numel(), 1,
platform::errors::InvalidArgument(
"beta2 pow output size should be 1, but received "
"value is:%d.",
beta2_pow_out->numel()));
if (grad_var->IsType<framework::LoDTensor>()) {
auto* grad = ctx.Input<LoDTensor>("Grad");
// AdamFunctor<T, CPUAdam> functor(
// beta1, beta2, epsilon, beta1_pow->data<T>(), beta2_pow->data<T>(),
// mom1->data<T>(), mom1_out->mutable_data<T>(ctx.GetPlace()),
// mom2->data<T>(), mom2_out->mutable_data<T>(ctx.GetPlace()),
// lr->data<T>(), grad->data<T>(), param->data<T>(),
// param_out->mutable_data<T>(ctx.GetPlace()));
// functor(param->numel());
T1 lr_value = *lr->template data<T1>();
T1 beta1_pow_ = *beta1_pow->template data<T1>();
T1 beta2_pow_ = *beta2_pow->template data<T1>();
double lr_ = lr_value * sqrt(1 - beta2_pow_) / (1 - beta1_pow_);
framework::Tensor temp;
temp.mutable_data<T>(param->dims(), ctx.GetPlace());
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->scale(grad, (1 - beta1), &temp);
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->scale(mom1, beta1, mom1_out);
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->add(mom1_out, &temp, mom1_out);
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->scale(grad, (1 - beta2), &temp);
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->mul(grad, &temp, &temp);
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->scale(mom2, beta2, mom2_out);
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->add(mom2_out, &temp, mom2_out);
// mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->scale(grad, lr[0], &temp);
// mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->sub(param, &temp, param_out);
math::SetConstant<DeviceContext, T> set_const;
auto& dev_ctx = ctx.template device_context<DeviceContext>();
set_const(
dev_ctx,
&temp,
T(epsilon * pow(2, mpc::ABY3_SCALING_FACTOR) / 3));
// temp = epsilon + mom2_out
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->add(mom2_out, &temp, &temp);
// temp = 1 / sqrt(epsilon + mom2_out)
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->inverse_square_root(&temp, &temp);
// temp = mom1_out / sqrt(epsilon + mom2_out)
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->mul(mom1_out, &temp, &temp);
// temp = lr * mom1_out / sqrt(epsilon + mom2_out)
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->scale(&temp, lr_, &temp);
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->sub(param, &temp, param_out);
beta1_pow_out->mutable_data<T1>(ctx.GetPlace())[0] =
beta1 * beta1_pow->template data<T1>()[0];
beta2_pow_out->mutable_data<T1>(ctx.GetPlace())[0] =
beta2 * beta2_pow->template data<T1>()[0];
} else {
PADDLE_THROW("Variable type not supported by adam_op");
}
}
};
} // namespace operators
} // namespace paddle
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/data_layout.h"
#include <memory>
#include <string>
#include <unordered_map>
#include "mpc_batch_norm_op.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace paddle {
namespace operators {
class MpcBatchNormOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override{
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "BatchNorm");
OP_INOUT_CHECK(ctx->HasInput("Scale"), "Input", "Scale", "BatchNorm");
OP_INOUT_CHECK(ctx->HasInput("Bias"), "Input", "Bias", "BatchNorm");
OP_INOUT_CHECK(ctx->HasInput("Mean"), "Input", "Mean", "BatchNorm");
OP_INOUT_CHECK(ctx->HasInput("Variance"), "Input", "Variance", "BatchNorm");
OP_INOUT_CHECK(ctx->HasOutput("Y"), "Output", "Y", "BatchNorm");
bool is_test = ctx->Attrs().Get<bool>("is_test");
bool trainable_stats = ctx->Attrs().Get<bool>("trainable_statistics");
bool test_mode = is_test && (!trainable_stats);
if (!test_mode) {
OP_INOUT_CHECK(ctx->HasOutput("MeanOut"), "Output", "MeanOut", "BatchNorm");
OP_INOUT_CHECK(ctx->HasOutput("VarianceOut"), "Output", "VarianceOut",
"BatchNorm");
OP_INOUT_CHECK(ctx->HasOutput("SavedMean"), "Output", "SavedMean",
"BatchNorm");
OP_INOUT_CHECK(ctx->HasOutput("SavedVariance"), "Output", "SavedVariance",
"BatchNorm");
}
// make sure Mean/MeanOut and Variance/VarianceOut share memory in Python
PADDLE_ENFORCE_EQ(ctx->Inputs("Mean")[0], ctx->Outputs("MeanOut")[0],
platform::errors::InvalidArgument(
"Mean and MeanOut should share the same memory"));
PADDLE_ENFORCE_EQ(
ctx->Inputs("Variance")[0], ctx->Outputs("VarianceOut")[0],
platform::errors::InvalidArgument(
"Variance and VarianceOut should share the same memory"));
const auto x_dims = ctx->GetInputDim("X");
const DataLayout data_layout = framework::StringToDataLayout(
ctx->Attrs().Get<std::string>("data_layout"));
if (ctx->IsRuntime() && ctx->HasInput("MomentumTensor")) {
auto mom = ctx->Inputs("MomentumTensor");
PADDLE_ENFORCE_EQ(mom.size(), 1,
platform::errors::InvalidArgument(
"The input tensor MomentumTensor's size must be 1"
"But received: MomentumTensor's size is [%d]",
mom.size()));
}
PADDLE_ENFORCE_GE(
x_dims.size(), 3,
platform::errors::InvalidArgument(
"ShapeError: the dimension of input "
"X must greater than or equal to 3. But received: the shape of input "
"X = [%s], the dimension of input X =[%d]",
x_dims, x_dims.size()));
PADDLE_ENFORCE_LE(
x_dims.size(), 6,
platform::errors::InvalidArgument(
"ShapeError: the dimension of input X "
"must smaller than or equal to 6. But received: the shape of input X "
"= [%s], the dimension of input X = [%d]",
x_dims, x_dims.size()));
const int64_t C =
((this->IsMKLDNNType() == true) || (data_layout == DataLayout::kNCHW)
? x_dims[2]
: x_dims[x_dims.size() - 1]);
auto scale_dim = ctx->GetInputDim("Scale");
auto bias_dim = ctx->GetInputDim("Bias");
VLOG(3) << "*** scale_dims: " << scale_dim;
VLOG(3) << "*** bias_dims: " << bias_dim;
VLOG(3) << "*** mean_dims: " << ctx->GetInputDim("Mean");
VLOG(3) << "*** variance_dims: " << ctx->GetInputDim("Variance");
//VLOG(3) << "*** Y_dims: " << ctx->GetInputDim("Y");
PADDLE_ENFORCE_EQ(
scale_dim.size(), 2UL,
platform::errors::InvalidArgument(
"ShapeError: the dimension of scale must equal to 2."
"But received: the shape of scale is [%s], the dimension "
"of scale is [%d]",
scale_dim, scale_dim.size()));
PADDLE_ENFORCE_EQ(bias_dim.size(), 2UL,
platform::errors::InvalidArgument(
"ShapeError: the dimension of bias must equal to 2."
"But received: the shape of bias is [%s],the dimension "
"of bias is [%d]",
bias_dim, bias_dim.size()));
bool check = true;
if ((!ctx->IsRuntime()) && (framework::product(scale_dim) <= 0 ||
framework::product(bias_dim) <= 0)) {
check = false;
}
if (check) {
PADDLE_ENFORCE_EQ(scale_dim[1], C,
platform::errors::InvalidArgument(
"ShapeError: the shape of scale must equal to [%d]"
"But received: the shape of scale is [%d]",
C, scale_dim[1]));
PADDLE_ENFORCE_EQ(bias_dim[1], C,
platform::errors::InvalidArgument(
"ShapeError: the shape of bias must equal to [%d]"
"But received: the shape of bias is [%d]",
C, bias_dim[1]));
}
ctx->SetOutputDim("Y", x_dims);
ctx->SetOutputDim("MeanOut", {2, C}); // 2: share_num
ctx->SetOutputDim("VarianceOut", {2, C});
ctx->SetOutputDim("SavedMean", {2, C});
ctx->SetOutputDim("SavedVariance", {2, C});
ctx->ShareLoD("X", "Y");
}
protected:
framework::OpKernelType GetExpectedKernelType(const framework::ExecutionContext& ctx) const {
framework::LibraryType library_{framework::LibraryType::kPlain};
std::string data_format = "AnyLayout";
framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
return framework::OpKernelType(
OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace(),
layout_, library_);
}
framework::OpKernelType GetKernelTypeForVar(
const std::string& var_name, const Tensor& tensor,
const framework::OpKernelType& expected_kernel_type) const {
return framework::OpKernelType(expected_kernel_type.data_type_,
tensor.place(), tensor.layout());
}
};
class MpcBatchNormGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override{
// check input
OP_INOUT_CHECK(ctx->HasInput("Scale"), "Input", "Scale", "BatchNormGrad");
OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Y")), "Input",
framework::GradVarName("Y"), "BatchNormGrad");
OP_INOUT_CHECK(ctx->HasInput("SavedMean"), "Input", "SavedMean",
"BatchNormGrad");
OP_INOUT_CHECK(ctx->HasInput("SavedVariance"), "Input", "SavedVariance",
"BatchNormGrad");
// check output
OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")), "Output",
framework::GradVarName("X"), "BatchNormGrad");
const bool has_scale_grad = ctx->HasOutput(framework::GradVarName("Scale"));
const bool has_bias_grad = ctx->HasOutput(framework::GradVarName("Bias"));
PADDLE_ENFORCE_EQ((has_scale_grad == has_bias_grad), true,
platform::errors::NotFound(
"Output(Scale@GRAD) and Output(Bias@GRAD) must be null "
"or not be null at same time. But now, "
"has Scale@Grad=[%d], has Bias@GRAD=[%d]",
has_scale_grad, has_bias_grad));
const bool use_global_stats = ctx->Attrs().Get<bool>("use_global_stats");
if (use_global_stats) {
PADDLE_ENFORCE_EQ(
!ctx->Attrs().Get<bool>("use_mkldnn"), true,
platform::errors::InvalidArgument(
"Using global stats during training is not supported "
"in gradient op kernel of batch_norm_mkldnn_op now."));
}
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "BatchNormGrad");
const auto x_dims = ctx->GetInputDim("X");
const DataLayout data_layout = framework::StringToDataLayout(
ctx->Attrs().Get<std::string>("data_layout"));
const int C =
((this->IsMKLDNNType() == true) || (data_layout == DataLayout::kNCHW)
? x_dims[2]
: x_dims[x_dims.size() - 1]);
ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
// has_scale_grad == has_bias_grad, judge has_scale_grad is enough
if (has_scale_grad) {
ctx->SetOutputDim(framework::GradVarName("Scale"), {2, C}); // 2: share_num
ctx->SetOutputDim(framework::GradVarName("Bias"), {2, C});
}
}
protected:
framework::OpKernelType GetExpectedKernelType(const framework::ExecutionContext& ctx) const {
framework::LibraryType library_{framework::LibraryType::kPlain};
std::string data_format = "AnyLayout";
framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_, library_);
}
framework::OpKernelType GetKernelTypeForVar(
const std::string& var_name, const Tensor& tensor,
const framework::OpKernelType& expected_kernel_type) const {
return framework::OpKernelType(expected_kernel_type.data_type_,
tensor.place(), tensor.layout());
}
};
class MpcBatchNormOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() {
AddAttr<bool>("is_test",
"(bool, default false) Set to true for inference only, false "
"for training. Some layers may run faster when this is true.")
.SetDefault(false);
AddAttr<float>("momentum", "").SetDefault(0.9);
AddAttr<float>("epsilon", "")
.SetDefault(1e-5)
.AddCustomChecker([](const float &epsilon) {
PADDLE_ENFORCE_GE(
epsilon, 0.0f,
platform::errors::InvalidArgument(
"'epsilon' should be greater or equal than 0.0."));
PADDLE_ENFORCE_LE(epsilon, 0.001f,
platform::errors::InvalidArgument(
"'epsilon' should be less or equal than 0.001."));
});
AddAttr<std::string>("data_layout", "").SetDefault("NCHW");
AddInput("X", "The input tensor");
AddInput("Scale",
"Scale is a 1-dimensional tensor of size C "
"that is applied to the output");
AddInput("Bias",
"Bias is a 1-dimensional tensor of size C "
"that is applied to the output");
AddInput("Mean",
"The global mean (for training) or "
"estimated mean (for testing)");
AddInput("Variance",
"The global variance (for training) "
"or estimated Variance (for testing)");
AddInput("MomentumTensor",
"(Tensor<float32>, optional) If provided, batch_norm will "
"use this as momentum, this has a higher priority than "
"attr(momentum), the shape of this tensor MUST BE [1].")
.AsDispensable();
AddOutput("Y", "result after normalization");
AddOutput("MeanOut",
"Share memory with Mean. "
"Store the global mean when training");
AddOutput("VarianceOut",
"Share memory with Variance. "
"Store the global Variance when training");
AddOutput("SavedMean",
"Mean of the current mini batch, "
"will apply to output when training")
.AsIntermediate();
AddOutput("SavedVariance",
"Variance of the current mini batch, "
"will apply to output when training")
.AsIntermediate();
AddOutput("ReserveSpace",
"Reserve GPU space for triggering the new semi-persistent "
"NHWC kernel")
.AsDispensable();
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddAttr<bool>("fuse_with_relu",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddAttr<bool>("use_global_stats",
"(bool, default false) Whether to use global mean and "
"variance. In inference or test mode, set use_global_stats "
"to true or is_test true. the behavior is equivalent. "
"In train mode, when setting use_global_stats True, the "
"global mean and variance are also used during train time, "
"the BN acts as scaling and shiffting.")
.SetDefault(false);
AddAttr<bool>("trainable_statistics",
"(bool, default false) Whether to calculate mean and variance "
"in test mode. If setting true in test mode, mean and variace "
"will be calculated by current batch statistics.")
.SetDefault(false);
AddComment(R"DOC(
Batch Normalization.
Batch Norm has been implemented as discussed in the paper:
https://arxiv.org/pdf/1502.03167.pdf
Can be used as a normalizer function for conv2d and fully_connected operations.
The required data format for this layer is one of the following:
1. NHWC `[batch, in_height, in_width, in_channels]`
2. NCHW `[batch, in_channels, in_height, in_width]`
)DOC");
}
};
template <typename T>
class MpcBatchNormGradOpMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> op) const {
op->SetType(this->ForwardOpType() + "_grad");
op->SetInput("X", this->Input("X"));
op->SetInput(framework::GradVarName("Y"), this->OutputGrad("Y"));
op->SetInput("Scale", this->Input("Scale"));
op->SetInput("Bias", this->Input("Bias"));
op->SetInput("SavedMean", this->Output("SavedMean"));
op->SetInput("SavedVariance", this->Output("SavedVariance"));
if (this->HasOutput("ReserveSpace")) {
op->SetInput("ReserveSpace", this->Output("ReserveSpace"));
}
// used when setting use_global_stats True during training
if (boost::get<bool>(this->GetAttr("use_global_stats"))) {
op->SetInput("Mean", this->Output("MeanOut"));
op->SetInput("Variance", this->Output("VarianceOut"));
}
op->SetAttrMap(this->Attrs());
op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
op->SetOutput(framework::GradVarName("Scale"), this->InputGrad("Scale"));
op->SetOutput(framework::GradVarName("Bias"), this->InputGrad("Bias"));
}
};
class MpcBatchNormOpInferVarType : public framework::PassInDtypeAndVarTypeToOutput {
protected:
std::unordered_map<std::string, std::string>& GetInputOutputWithSameType() const override {
static std::unordered_map<std::string, std::string> m{{"X", /*->*/ "Y"}};
return m;
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(
mpc_batch_norm, ops::MpcBatchNormOp, ops::MpcBatchNormOpMaker,
ops::MpcBatchNormOpInferVarType,
ops::MpcBatchNormGradOpMaker<paddle::framework::OpDesc>,
ops::MpcBatchNormGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(mpc_batch_norm_grad, ops::MpcBatchNormGradOp);
REGISTER_OP_CPU_KERNEL(
mpc_batch_norm, ops::MpcBatchNormKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL(
mpc_batch_norm_grad, ops::MpcBatchNormGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <string>
#include <vector>
#include "mpc_op.h"
#include "./math/math_function.h"
#include "core/paddlefl_mpc/mpc_protocol/mpc_operators.h"
namespace paddle {
namespace operators {
using DDim = framework::DDim;
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
using DataLayout = framework::DataLayout;
std::shared_ptr<mpc::MpcOperators> mpc_operators;
// TODO: remove dependency on aby3 protocol
const int MPC_ONE_SHARE = (1 << paddle::mpc::FIXED_POINTER_SCALING_FACTOR) / 3;
template <typename T>
void Expand(const Tensor* input, Tensor* output, int S, int N, int C, int sample_size) {
// Expand tensor into specified shape
// input shape: {S, C}
// outout shape: {S, N, C, H, W}, sample_size = H * W
const T* input_data = input->data<T>();
T* output_data = output->data<T>();
int input_share_offset = C;
int output_share_offset = N * C * sample_size;
for (int nc = 0; nc < N * C; ++nc) {
int nc_offset = nc * sample_size;
std::fill(output_data + nc_offset, output_data + nc_offset + sample_size, *(input_data + nc % C));
std::fill(output_data + nc_offset + output_share_offset,
output_data + nc_offset + output_share_offset + sample_size,
*(input_data + nc % C + input_share_offset));
}
}
template <typename DeviceContext, typename T>
void TransToChannelFirst(const Tensor* input, Tensor* output, const framework::ExecutionContext &ctx) {
// Transpose tensor
// input shape: {S, N, C, H, W}
// output shape: {C, S, N, H, W}
// H and W is optional
auto& dev_ctx = ctx.template device_context<DeviceContext>();
auto input_dims = input->dims();
switch (input_dims.size()) {
case 3: {
std::vector<int> axis{2, 0, 1};
output->mutable_data<T>({input_dims[2], input_dims[0], input_dims[1]}, ctx.GetPlace());
math::Transpose<DeviceContext, T, 3> trans3;
trans3(dev_ctx, *input, output, axis);
break;
}
case 4: {
std::vector<int> axis{2, 0, 1, 3};
output->mutable_data<T>({input_dims[2], input_dims[0], input_dims[1], input_dims[3]}, ctx.GetPlace());
math::Transpose<DeviceContext, T, 4> trans4;
trans4(dev_ctx, *input, output, axis);
break;
}
case 5: {
std::vector<int> axis{2, 0, 1, 3, 4};
output->mutable_data<T>({input_dims[2], input_dims[0], input_dims[1], input_dims[3], input_dims[4]},
ctx.GetPlace());
math::Transpose<DeviceContext, T, 5> trans5;
trans5(dev_ctx, *input, output, axis);
break;
}
default:
PADDLE_THROW("The size of input X's dimensions should be larger than 2, less than 6.");
}
}
template <typename DeviceContext, typename T>
void ComputeSum(const Tensor* input, int C, Tensor* sum, const framework::ExecutionContext &ctx) {
// Compute sum of each channel
// input shape: {S, N, C, H, W}
// output shape: {S, C}
// H and W is optional, compute the sum of each channel.
Tensor input_trans;
TransToChannelFirst<DeviceContext, T>(input, &input_trans, ctx);
Tensor input_slice;
Tensor sum_slice;
auto sum_slice_data = sum_slice.mutable_data<T>(framework::make_ddim({2, 1}), ctx.GetPlace());
auto sum_data = sum->data<T>();
for (size_t i = 0; i < C; ++i) {
input_slice = input_trans.Slice(i, i + 1);
auto shape = paddle::framework::vectorize<size_t>(input_slice.dims());
shape.erase(shape.begin());
std::vector<int64_t> shape_(shape.cbegin(), shape.cend());
DDim dim(shape_.data(), shape_.size());
input_slice.Resize(dim);
mpc_operators->sum(&input_slice, &sum_slice);
sum_data[i] = sum_slice_data[0];
sum_data[i + C] = sum_slice_data[1];
}
}
template <typename DeviceContext, typename T>
void ComputeMeanVariance(const Tensor* input, int S, int N, int C, int sample_size,
Tensor* saved_mean_e, Tensor* saved_variance_e,
const framework::ExecutionContext &ctx) {
// Compute mean and variance of each channel
// input shape: {S, N, C, H, W}
// output shape: {S, C}
// H and W is optional
VLOG(3) << "Compute the mean and variance of each channel";
Tensor input_trans;
TransToChannelFirst<DeviceContext, T>(input, &input_trans, ctx);
ComputeSum<DeviceContext, T>(input, C, saved_mean_e, ctx);
mpc_operators->scale(saved_mean_e, 1.0 / (N * sample_size), saved_mean_e); // scale
Tensor saved_mean_e_expand;
T* saved_mean_e_expand_data = saved_mean_e_expand.mutable_data<T>(input->dims(), ctx.GetPlace());
Expand<T>(saved_mean_e, &saved_mean_e_expand, S, N, C, sample_size);
mpc_operators->sub(input, &saved_mean_e_expand, &saved_mean_e_expand);
mpc_operators->mul(&saved_mean_e_expand, &saved_mean_e_expand, &saved_mean_e_expand);
ComputeSum<DeviceContext, T>(&saved_mean_e_expand, C, saved_variance_e, ctx);
mpc_operators->scale(saved_variance_e, 1.0 / (N * sample_size), saved_variance_e); // scale
}
template <typename DeviceContext, typename T>
class MpcBatchNormKernel : public MpcOpKernel<T> {
public:
void ComputeImpl(const framework::ExecutionContext &ctx) const override {
mpc_operators = mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators();
VLOG(3) << "Start MpcBatchNormKernel.";
const float epsilon = ctx.Attr<float>("epsilon");
float momentum = ctx.Attr<float>("momentum");
const bool is_test = ctx.Attr<bool>("is_test");
const bool use_global_stats = ctx.Attr<bool>("use_global_stats");
const bool trainable_stats = ctx.Attr<bool>("trainable_statistics");
bool test_mode = is_test && (!trainable_stats);
bool global_stats = test_mode || use_global_stats;
const std::string data_layout_str = ctx.Attr<std::string>("data_layout");
const DataLayout data_layout =
framework::StringToDataLayout(data_layout_str);
const Tensor *x = ctx.Input<Tensor>("X");
const DDim x_dims = x->dims();
PADDLE_ENFORCE_GE(
x_dims.size(), 3,
platform::errors::InvalidArgument(
"The size of input X's dimensions should be larger than 2."
"But received: the size of input X's dimensions is [%d]",
x_dims.size()));
PADDLE_ENFORCE_LE(
x_dims.size(), 6,
platform::errors::InvalidArgument(
"The size of input X's dimensions should be less than 6."
"But received: the size of input X's dimensionss is [%d]",
x_dims.size()));
const int S = 2; // share number
const int N = x_dims[1];
const int C = (data_layout == DataLayout::kNCHW ? x_dims[2] : x_dims[x_dims.size() - 1]);
const int sample_size = x->numel() / S / N / C;
auto *y = ctx.Output<Tensor>("Y");
auto *mean_out = ctx.Output<Tensor>("MeanOut");
auto *variance_out = ctx.Output<Tensor>("VarianceOut");
auto *saved_mean = ctx.Output<Tensor>("SavedMean");
auto *saved_variance = ctx.Output<Tensor>("SavedVariance");
// alloc memory
y->mutable_data<T>(ctx.GetPlace());
mean_out->mutable_data<T>(ctx.GetPlace());
variance_out->mutable_data<T>(ctx.GetPlace());
saved_mean->mutable_data<T>(ctx.GetPlace());
saved_variance->mutable_data<T>(ctx.GetPlace());
if (!global_stats) {
if ((N * sample_size) == 1) {
// Only 1 element in normalization dimension,
// we skip the batch norm calculation, let y = x.
framework::TensorCopy(*x, ctx.GetPlace(), y);
return;
}
// saved_xx is use just in this batch of data
// compute mean and variance
switch (data_layout) {
case DataLayout::kNCHW: {
ComputeMeanVariance<DeviceContext, T>(x, S, N, C, sample_size, saved_mean, saved_variance, ctx);
break;
}
default:
PADDLE_THROW("Unknown storage order: %s", data_layout_str);
}
// updata global mean and variance, for prediction
if (ctx.HasInput("MomentumTensor")) {
const auto *mom_tensor = ctx.Input<Tensor>("MomentumTensor");
momentum = mom_tensor->data<float>()[0];
}
Tensor saved_mean_scale;
Tensor mean_out_scale;
saved_mean_scale.mutable_data<T>(saved_mean->dims(), ctx.GetPlace());
mean_out_scale.mutable_data<T>(mean_out->dims(), ctx.GetPlace());
mpc_operators->scale(mean_out, momentum, &mean_out_scale);
mpc_operators->scale(saved_mean, 1.0 - momentum, &saved_mean_scale);
mpc_operators->add(&mean_out_scale, &saved_mean_scale, mean_out);
mpc_operators->scale(variance_out, momentum, &mean_out_scale);
mpc_operators->scale(saved_variance, 1.0 - momentum, &saved_mean_scale);
mpc_operators->add(&mean_out_scale, &saved_mean_scale, variance_out);
}
// use SavedMean and SavedVariance to do normalize
// compute output y
Tensor inv_std;
Tensor mean_arr;
inv_std.mutable_data<T>({S, C}, ctx.GetPlace());
Tensor epsilon_expand;
T* epsilon_expand_data = epsilon_expand.mutable_data<int64_t>({S, C}, ctx.GetPlace());
std::fill(epsilon_expand_data, epsilon_expand_data + S * C, MPC_ONE_SHARE * epsilon); // todo
// inv_std = 1 / sqrt(variance + epsilon)
if (global_stats) {
const Tensor* variance = ctx.Input<Tensor>("Variance");
Tensor var_plus_epsilon;
var_plus_epsilon.mutable_data<T>({S, C}, ctx.GetPlace());
mpc_operators->add(variance, &epsilon_expand, &var_plus_epsilon);
mpc_operators->inverse_square_root(&var_plus_epsilon, &inv_std);
mean_arr.ShareDataWith(*ctx.Input<Tensor>("Mean"));
} else {
Tensor var_plus_epsilon;
var_plus_epsilon.mutable_data<T>({S, C}, ctx.GetPlace());
mpc_operators->add(saved_variance, &epsilon_expand, &var_plus_epsilon);
mpc_operators->inverse_square_root(&var_plus_epsilon, &inv_std);
mean_arr.ShareDataWith(*saved_mean);
}
// ((x - est_mean) * (inv_var) * scale + bias
// formula transform ====>
// (x * inv_var * scale) + (bias - est_mean * inv_var * scale)
const auto *scale = ctx.Input<Tensor>("Scale");
const auto *bias = ctx.Input<Tensor>("Bias");
const T* scale_data = scale->data<T>();
const T* bias_data = bias->data<T>();
Tensor scale_expand;
auto* scale_expand_data = scale_expand.mutable_data<T>({S, C}, ctx.GetPlace());
std::fill(scale_expand_data, scale_expand_data + C, scale_data[0]);
std::fill(scale_expand_data + C, scale_expand_data + C + C, scale_data[1]);
Tensor bias_expand;
auto* bias_expand_data = bias_expand.mutable_data<T>({S, C}, ctx.GetPlace());
std::fill(bias_expand_data, bias_expand_data + C, bias_data[0]);
std::fill(bias_expand_data + C, bias_expand_data + C + C, bias_data[1]);
Tensor new_scale;
Tensor new_bias;
Tensor new_bias_tmp;
new_scale.mutable_data<T>(scale_expand.dims(), ctx.GetPlace());
new_bias.mutable_data<T>(scale_expand.dims(), ctx.GetPlace());
new_bias_tmp.mutable_data<T>(scale_expand.dims(), ctx.GetPlace());
mpc_operators->mul(&inv_std, &scale_expand, &new_scale);
mpc_operators->mul(&mean_arr, &new_scale, &new_bias_tmp);
mpc_operators->sub(&bias_expand, &new_bias_tmp, &new_bias);
switch (data_layout) {
case DataLayout::kNCHW: {
Tensor x_new_scale;
x_new_scale.mutable_data<T>(y->dims(), ctx.GetPlace());
Tensor new_scale_expand;
new_scale_expand.mutable_data<T>(x->dims(), ctx.GetPlace());
Expand<T>(&new_scale, &new_scale_expand, S, N, C, sample_size);
Tensor new_bias_expand;
new_bias_expand.mutable_data<T>(x->dims(), ctx.GetPlace());
Expand<T>(&new_bias, &new_bias_expand, S, N, C, sample_size);
mpc_operators->mul(x, &new_scale_expand, &x_new_scale);
mpc_operators->add(&x_new_scale, &new_bias_expand, y);
break;
}
default:
PADDLE_THROW("Unknown storage order: %d", data_layout);
}
}
};
template <typename DeviceContext, typename T>
class MpcBatchNormGradKernel : public MpcOpKernel<T> {
public:
void ComputeImpl(const framework::ExecutionContext &ctx) const override {
mpc_operators = mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators();
VLOG(3) << "Start MpcBatchNormGradKernel.";
const auto *d_y = ctx.Input<Tensor>(framework::GradVarName("Y"));
const auto *scale = ctx.Input<Tensor>("Scale");
const auto *bias = ctx.Input<Tensor>("Bias");
const auto *saved_mean = ctx.Input<Tensor>("SavedMean");
// SavedVariance have been reverted in forward operator
const auto *saved_inv_variance = ctx.Input<Tensor>("SavedVariance");
const std::string data_layout_str = ctx.Attr<std::string>("data_layout");
const bool use_global_stats = ctx.Attr<bool>("use_global_stats");
const bool is_test = ctx.Attr<bool>("is_test");
const float epsilon = ctx.Attr<float>("epsilon");
const DataLayout data_layout =
framework::StringToDataLayout(data_layout_str);
auto *d_x = ctx.Output<Tensor>(framework::GradVarName("X"));
auto *d_scale = ctx.Output<Tensor>(framework::GradVarName("Scale"));
auto *d_bias = ctx.Output<Tensor>(framework::GradVarName("Bias"));
// batch_norm with inplace as false will take X as grad input, which
// is same as cuDNN batch_norm backward calculation, batch_norm
// with inplace as true only take Y as input and X should be calculate
// by inverse operation of batch_norm on Y
const Tensor *x;
x = ctx.Input<Tensor>("X");
PADDLE_ENFORCE_EQ(
is_test, false,
platform::errors::InvalidArgument(
"`is_test = True` CANNOT be used in train program. If "
"you want to use global status in pre_train model, "
"please set `use_global_stats = True`"));
// Get the size for each dimension.
// NCHW [batch_size, in_channels, in_height, in_width]
const auto &x_dims = x->dims();
PADDLE_ENFORCE_GE(
x_dims.size(), 3,
platform::errors::InvalidArgument(
"The size of input X's dimensions should be larger than 2."
"But received: the size of input X's dimensions is [%d]",
x_dims.size()));
PADDLE_ENFORCE_LE(
x_dims.size(), 6,
platform::errors::InvalidArgument(
"The size of input X's dimensions should be less than 6."
"But received: the size of input X's dimensionss is [%d]",
x_dims.size()));
const int S = 2; // share number
const int N = x_dims[1];
const int C = (data_layout == DataLayout::kNCHW ? x_dims[2] : x_dims[x_dims.size() - 1]);
const int sample_size = x->numel() / S / N / C;
d_x->mutable_data<T>(ctx.GetPlace());
const T *mean_data = saved_mean->data<T>();
Tensor inv_var_tensor;
inv_var_tensor.ShareDataWith(*saved_inv_variance); // local variance
// update mean_data, compute inv_var = 1 / sqrt(variance + epsilon)
if (use_global_stats) {
const auto *running_mean = ctx.Input<Tensor>("Mean");
const auto *running_variance = ctx.Input<Tensor>("Variance");
mean_data = running_mean->data<T>();
Tensor inv_var_tmp;
inv_var_tmp.Resize({S, C});
Tensor var_plus_epsilon;
var_plus_epsilon.mutable_data<T>(running_variance->dims(), ctx.GetPlace());
Tensor epsilon_expand;
T* epsilon_expand_data = epsilon_expand.mutable_data<T>({S, C}, ctx.GetPlace());
std::fill(epsilon_expand_data, epsilon_expand_data + S * C, MPC_ONE_SHARE * epsilon);
mpc_operators->add(running_variance, &epsilon_expand, &var_plus_epsilon);
mpc_operators->inverse_square_root(&var_plus_epsilon, &inv_var_tmp);
framework::TensorCopy(inv_var_tmp, ctx.GetPlace(), &inv_var_tensor);
}
if (d_scale && d_bias) {
d_scale->mutable_data<T>(ctx.GetPlace());
d_bias->mutable_data<T>(ctx.GetPlace());
}
// d_bias = np.sum(d_y, axis=0)
// d_scale = np.sum((X - mean) / inv_std * dy, axis=0)
if ((N * sample_size) == 1 && !use_global_stats) {
framework::TensorCopy(*d_y, ctx.GetPlace(), d_x);
return;
}
switch (data_layout) {
case DataLayout::kNCHW: {
// d_bias = np.sum(d_y, axis=0)
Tensor dy_sum;
dy_sum.Resize({S, C});
dy_sum.mutable_data<T>(ctx.GetPlace());
ComputeSum<DeviceContext, T>(d_y, C, &dy_sum, ctx); // dy_sum
// d_scale = np.sum((X - mean) / inv_std * dy, axis=0)
// = [np.sum(X * dy) - mean * dy_sum] * inv_std
Tensor x_mul_dy;
x_mul_dy.mutable_data<T>(x->dims(), ctx.GetPlace());
const DDim d_y_dim = d_y->dims();
mpc_operators->mul(x, d_y, &x_mul_dy); // X * dy
Tensor dy_mul_x_sub_mean_mul_invstd_sum;
dy_mul_x_sub_mean_mul_invstd_sum.mutable_data<T>({S, C}, ctx.GetPlace());
ComputeSum<DeviceContext, T>(&x_mul_dy, C, &dy_mul_x_sub_mean_mul_invstd_sum, ctx); // sum(X * dy)
Tensor dy_sum_mul_mean;
dy_sum_mul_mean.mutable_data<T>({S, C}, ctx.GetPlace());
mpc_operators->mul(&dy_sum, saved_mean, &dy_sum_mul_mean); // mean * dy_sum
Tensor tmp;
tmp.mutable_data<T>({S, C}, ctx.GetPlace());
// [np.sum(X * dy) - mean * dy_sum]
mpc_operators->sub(&dy_mul_x_sub_mean_mul_invstd_sum, &dy_sum_mul_mean, &tmp);
// [np.sum(X * dy) - mean * dy_sum] * inv_std
mpc_operators->mul(&tmp, saved_inv_variance, &dy_mul_x_sub_mean_mul_invstd_sum);
if (d_scale && d_bias) {
framework::TensorCopy(dy_sum, ctx.GetPlace(), d_bias);
framework::TensorCopy(dy_mul_x_sub_mean_mul_invstd_sum, ctx.GetPlace(), d_scale);
}
// d_x = (1. / N) * scale * inv_var * (N * d_y - np.sum(d_y, axis=0)
// - (X - mean) * inv_var * inv_var * np.sum(d_y * (X - mean), axis=0))
int scale_coefff = use_global_stats ? 1 : N * sample_size;
Tensor scale_inv_var_nhw;
T* scale_inv_var_nhw_data = scale_inv_var_nhw.mutable_data<T>({S, C}, ctx.GetPlace());
// scale * inv_var
mpc_operators->mul(scale, saved_inv_variance, &scale_inv_var_nhw);
// (1. / N) * scale * inv_var
mpc_operators->scale(&scale_inv_var_nhw, 1.0 / scale_coefff, &scale_inv_var_nhw);
Tensor scale_inv_var_nhw_expand;
scale_inv_var_nhw_expand.mutable_data<T>(d_y_dim, ctx.GetPlace());
Expand<T>(&scale_inv_var_nhw, &scale_inv_var_nhw_expand, S, N, C, sample_size);
if (!use_global_stats) {
Tensor dy_scale;
dy_scale.mutable_data<T>(d_y_dim, ctx.GetPlace());
// N * dy
mpc_operators->scale(d_y, N * sample_size, &dy_scale);
Tensor dy_sum_expand;
dy_sum_expand.mutable_data<T>(d_y_dim, ctx.GetPlace());
Expand<T>(&dy_sum, &dy_sum_expand, S, N, C, sample_size);
Tensor dy_scale_minus_dy;
dy_scale_minus_dy.mutable_data<T>(d_y_dim, ctx.GetPlace());
// N * dy - np.sum(d_y, axis=0)
mpc_operators->sub(&dy_scale, &dy_sum_expand, &dy_scale_minus_dy);
Tensor mean_expand;
mean_expand.mutable_data<T>(d_y_dim, ctx.GetPlace());
Expand<T>(saved_mean, &mean_expand, S, N, C, sample_size);
Tensor x_minus_mean;
x_minus_mean.mutable_data<T>(d_y_dim, ctx.GetPlace());
// (X - mean)
mpc_operators->sub(x, &mean_expand, &x_minus_mean);
// inv_var * inv_var * np.sum(d_y * (X - mean), axis=0))
mpc_operators->mul(&dy_mul_x_sub_mean_mul_invstd_sum, saved_inv_variance, &tmp);
Tensor tmp_expand;
tmp_expand.mutable_data<T>(d_y_dim, ctx.GetPlace());
Expand<T>(&tmp, &tmp_expand, S, N, C, sample_size);
Tensor tmp_expand2;
tmp_expand2.mutable_data<T>(d_y_dim, ctx.GetPlace());
// (X - mean) * inv_var * inv_var * np.sum(d_y * (X - mean), axis=0)
mpc_operators->mul(&tmp_expand, &x_minus_mean, &tmp_expand2);
mpc_operators->sub(&dy_scale_minus_dy, &tmp_expand2, &dy_scale);
mpc_operators->mul(&scale_inv_var_nhw_expand, &dy_scale, d_x);
} else {
mpc_operators->mul(&scale_inv_var_nhw_expand, d_y, d_x);
}
break;
}
default:
PADDLE_THROW("Unknown storage order: %s", data_layout_str);
} // switch
} // void ComputeImpl
}; // class MpcBatchNormGradKernel
} // namespace operators
} // namespace paddle
......@@ -69,6 +69,119 @@ private:
int64_t n_;
};
template <typename T, typename DeviceContext>
class MidWiseTransformIterator;
template <typename T>
class MidWiseTransformIterator<T, platform::CPUDeviceContext>
: public std::iterator<std::random_access_iterator_tag, T, std::ptrdiff_t,
T *, T &> {
public:
MidWiseTransformIterator(const T *ptr, int n, int post)
: ptr_(ptr), i_(0), j_(0), n_(n), post_(post) {}
MidWiseTransformIterator<T, platform::CPUDeviceContext> &operator++() {
++j_;
if (UNLIKELY(j_ == post_)) {
++i_;
j_ = 0;
if (UNLIKELY(i_ == n_)) {
i_ = 0;
}
}
return *this;
}
MidWiseTransformIterator<T, platform::CPUDeviceContext> &operator+(int n) {
while (n-- > 0) {
++j_;
if (UNLIKELY(j_ == post_)) {
++i_;
j_ = 0;
if (UNLIKELY(i_ == n_)) {
i_ = 0;
}
}
}
return *this;
}
bool operator==(const MidWiseTransformIterator<T, platform::CPUDeviceContext>
&rhs) const {
return (ptr_ + i_) == &(*rhs);
}
bool operator!=(const MidWiseTransformIterator<T, platform::CPUDeviceContext>
&rhs) const {
return (ptr_ + i_) != &(*rhs);
}
const T &operator*() { return ptr_[i_]; }
private:
const T *ptr_;
int64_t i_;
int64_t j_;
int64_t n_;
int64_t post_;
};
template <typename Functor, typename T, typename DeviceContext,
typename OutType = T>
class TransformFunctor {
public:
TransformFunctor(const framework::Tensor *x, const framework::Tensor *y,
framework::Tensor *z, const DeviceContext &ctx, Functor func,
const bool is_xsize_larger = true)
: x_(x->data<T>()),
y_(y->data<T>()),
z_(z->mutable_data<OutType>(ctx.GetPlace())),
nx_(x->numel()),
ctx_(ctx),
func_(func),
is_xsize_larger_(is_xsize_larger) {
if (is_xsize_larger_ == false) {
nx_ = y->numel();
}
}
inline void Run() const {
platform::Transform<DeviceContext> trans;
trans(ctx_, x_, x_ + nx_, y_, z_, func_);
}
inline void RunRowWise(int n, int pre) const {
platform::Transform<DeviceContext> trans;
if (is_xsize_larger_) {
trans(ctx_, x_, x_ + nx_,
RowwiseTransformIterator<T, DeviceContext>(y_, n), z_, func_);
} else {
trans(ctx_, y_, y_ + nx_,
RowwiseTransformIterator<T, DeviceContext>(x_, n), z_, func_);
}
}
inline void RunMidWise(int n, int pre, int post) const {
platform::Transform<DeviceContext> trans;
if (is_xsize_larger_) {
trans(ctx_, x_, x_ + nx_,
MidWiseTransformIterator<T, DeviceContext>(y_, n, post), z_, func_);
} else {
trans(ctx_, y_, y_ + nx_,
MidWiseTransformIterator<T, DeviceContext>(x_, n, post), z_, func_);
}
}
private:
const T *x_;
const T *y_;
OutType *z_;
int64_t nx_;
const DeviceContext &ctx_;
Functor func_;
bool is_xsize_larger_;
};
template <typename T>
struct AddFunctor {
inline HOSTDEVICE T operator()(T x, T y) { return x + y; }
......@@ -123,27 +236,34 @@ public:
in_y_t_slice = in_y_t->Slice(i, i + 1);
out_t_slice = out_t->Slice(i, i + 1);
auto x_dims = in_x_t_slice.dims();
auto y_dims = in_y_t_slice.dims();
axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(),
"Axis should be in range [0, x_dims)");
int pre, n, post;
GetMidDims get_mid_dims;
get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post);
PADDLE_ENFORCE_EQ(post, 1,
"post should be equal 1, but received post is [%s]", post);
auto x_ = in_x_t_slice.data<T>();
auto y_ = in_y_t_slice.data<T>();
auto out_ = out_t_slice.data<T>();
auto nx_ = in_x_t_slice.numel();
paddle::platform::Transform<DeviceContext> trans;
if (post == 1) {
trans(ctx.template device_context<DeviceContext>(), x_, x_ + nx_,
RowwiseTransformIterator<T, DeviceContext>(y_, n),
out_, AddFunctor<T>());
} else {
trans(ctx.template device_context<DeviceContext>(), x_, x_ + nx_,
MidWiseTransformIterator<T, DeviceContext>(y_, n, post),
out_, AddFunctor<T>());
}
}
}
}
......@@ -185,17 +305,15 @@ public:
int pre, n, post;
GetMidDims get_mid_dims;
get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post);
PADDLE_ENFORCE_EQ(post, 1,
"post should be equal 1, but received post is [%s]", post);
std::fill(dy_data, dy_data + dy->numel(), static_cast<T>(0));
for (size_t i = 0; i < SHARE_NUM; ++i) {
int y_offset = i * n;
for (size_t j = 0; j < pre; ++j) {
for (size_t k = 0; k < n; ++k) {
int out_offset = i * pre * n + j * n + k;
if (0 == j) {
dy_data[k + y_offset] = dout_data[out_offset];
} else {
for (size_t m = 0; m < post; ++m) {
int out_offset = i * pre * n * post + j * n * post + k * post + m;
dy_data[k + y_offset] += dout_data[out_offset];
}
}
......
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "mpc_gru_op.h"
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/platform/device_context.h"
#include "mpc_op.h"
#include <memory>
#include <string>
#include "core/paddlefl_mpc/operators/math/math_function.h"
namespace paddle
{
namespace operators
{
using framework::DDim;
using framework::Tensor;
using framework::LoD;
class MpcGRUOp : public framework::OperatorWithKernel
{
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override
{
PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(%s) of MpcGRUOp should not be null.", "Input");
PADDLE_ENFORCE(ctx->HasInput("Weight"),
"Input(%s) of MpcGRUOp should not be null.", "Weight");
PADDLE_ENFORCE(ctx->HasOutput("BatchGate"),
"Output(%s) of MpcGRUOp should not be null.", "BatchGate");
PADDLE_ENFORCE(ctx->HasOutput("BatchResetHiddenPrev"),
"Output(%s) of MpcGRUOp should not be null.",
"BatchResetHiddenPrev");
PADDLE_ENFORCE(ctx->HasOutput("BatchHidden"),
"Output(%s) of MpcGRUOp should not be null.", "BatchHidden");
PADDLE_ENFORCE(ctx->HasOutput("Hidden"),
"Output(%s) of MpcGRUOp should not be null.", "Hidden");
auto input_dims_trans = ctx->GetInputDim("Input");
auto input_dims = framework::make_ddim({input_dims_trans[1],
input_dims_trans[0], input_dims_trans[2]});
auto weight_dims = ctx->GetInputDim("Weight");
int input_size = input_dims[2];
int frame_size = weight_dims[1];
if (ctx->IsRuntime())
{
PADDLE_ENFORCE_EQ(
input_size, frame_size * 3,
"The input_size must be 3 times of frame_size in MpcGRUOp.");
}
PADDLE_ENFORCE_EQ(
weight_dims[2], frame_size * 3,
"The shape of mpc Weight matrix must be [frame_size, frame_size * 3].");
if (ctx->HasInput("H0"))
{
auto h0_dims = ctx->GetInputDim("H0");
PADDLE_ENFORCE_EQ(h0_dims[2], frame_size,
"The width of H0 must be equal to frame_size.");
}
if (ctx->HasInput("Bias"))
{
auto bias_dims = ctx->GetInputDim("Bias");
int bias_height = bias_dims[1];
int bias_width = bias_dims[2];
PADDLE_ENFORCE_EQ(bias_height, 1,
"The shape of Bias must be [1, frame_size * 3].");
PADDLE_ENFORCE_EQ(bias_width, frame_size * 3,
"The shape of Bias must be [1, frame_size * 3].");
}
ctx->SetOutputDim("BatchGate", input_dims);
ctx->SetOutputDim("BatchResetHiddenPrev", {2, input_dims[1], frame_size});
ctx->SetOutputDim("BatchHidden", {2, input_dims[1], frame_size});
ctx->SetOutputDim("Hidden", {2, input_dims[1], frame_size});
ctx->ShareLoD("Input", "Hidden");
}
};
class MpcGRUOpMaker : public framework::OpProtoAndCheckerMaker
{
public:
void Make() override
{
AddInput("Input",
"(LoDTensor) The first input is a LodTensor, which supports "
"variable-time length input sequence. The underlying tensor in "
"this LoDTenosr is a matrix with shape (T x 2 x 3D), where, T is the "
"total time steps in this mini-batch, D is the hidden size."
"Note: before call this OP, "
"Yout must transpose input shape of mini-batch dim to first dim,"
"that is, (2, T, 3D) is transpose to (T, 2, 3D), "
"so that its lod information of shares can be set correctly");
AddInput("H0",
"(Tensor, optional) The initial hidden state is an optional "
"input. This is a tensor with shape (2 x N x D), where N is the "
"batch size, D is the hidden size.")
.AsDispensable();
AddInput(
"Weight",
"(Tensor) The learnable hidden-hidden weight matrix with shape "
"(2 x D x 3D), where D is the hidden size. The elements continuous in "
"memory can be divided into two parts. The first part are weights of "
"the update gate and reset gate with shape (2 x D x 2D), and the second "
"part are weights of output candidate with shape (2 x D x D).");
AddInput("Bias",
"(Tensor, optional) Bias vector with shape (2 x 1 x 3D) concating "
"bias of the update gate, reset gate and output candidate.")
.AsDispensable();
AddOutput("BatchGate",
"(LoDTensor) To compute with batches, sequence data will be "
"reorganized into several successive batches each containing "
"data from the same time step. The LoDTensor BatchGate contains "
"the update gate, reset gate and output candidate values "
"organized in batches. The LoD size is 2. The first LoD contains "
"the batch offsets and the second LoD contains the indexes in "
"the raw sequence data.")
.AsIntermediate();
AddOutput(
"BatchResetHiddenPrev",
"(LoDTensor) The reset hidden state LoDTensor organized in batches. "
"This LoDTensor is a matrix with shape (2 x T x D) and has the same LoD "
"with `BatchGate`.")
.AsIntermediate();
AddOutput(
"BatchHidden",
"(LoDTensor) The hidden state LoDTensor organized in batches. "
"This LoDTensor is a matrix with shape (2 x T x D) and has the same LoD "
"with `BatchGate`.")
.AsIntermediate();
AddOutput(
"Hidden",
"(LoDTensor) the hidden state LoDTensor organized in sequences. "
"This LoDTensor is a matrix with shape (2 x T x D) and has the same LoD "
"with `BatchGate`.");
AddAttr<std::string>("activation",
"(string, default tanh) "
"The activation type used for output candidate {h}_t.")
.SetDefault("relu");
AddAttr<std::string>(
"gate_activation",
"(string, default sigmoid) "
"The activation type used in update gate and reset gate.")
.SetDefault("sigmoid");
AddAttr<bool>("is_reverse",
"(bool, default: False) "
"whether to compute reversed GRU.")
.SetDefault(false);
AddAttr<bool>("origin_mode",
"bool"
"use origin mode in article https://arxiv.org/abs/1412.3555")
.SetDefault(false);
AddComment(R"DOC(
GRU Operator implements part calculations of the complete GRU as following:
$$
update\_gate: u_t = actGate(xu_t + W_u * h_{t-1} + b_u) \\
reset\_gate: r_t = actGate(xr_t + W_r * h_{t-1} + b_r) \\
output\_candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, h_{t-1}) + b_c) \\
output: h_t = dot((1 - u_t), h_{t-1}) + dot(u_t, {h}_t)
$$
@note To implement the complete GRU, fully-connected operator must be used
before to feed xu, xr and xc as the Input of GRU operator.
)DOC");
}
};
class MpcGRUGradOp : public framework::OperatorWithKernel
{
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override
{
PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(%s) of MpcGRUGradOp should not be null.", "Input");
PADDLE_ENFORCE(ctx->HasInput("Weight"),
"Input(%s) of MpcGRUGradOp should not be null.", "Weight");
PADDLE_ENFORCE(ctx->HasInput("BatchGate"),
"Input(%s) of MpcGRUGradOp should not be null.", "BatchGate");
PADDLE_ENFORCE(ctx->HasInput("BatchResetHiddenPrev"),
"Input(%s) of MpcGRUGradOp should not be null.",
"BatchResetHiddenPrev");
PADDLE_ENFORCE(ctx->HasInput("BatchHidden"),
"Input(%s) of MpcGRUOp should not be null.", "BatchHidden");
PADDLE_ENFORCE(ctx->HasInput("Hidden"),
"Input(%s) of MpcGRUGradOp should not be null.", "Hidden");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Hidden")),
"Input(%s@GRAD) of MpcGRUGradOp should not be null.", "Hidden");
auto input_dims_trans = ctx->GetInputDim("Input");
auto input_dims = framework::make_ddim({input_dims_trans[1],
input_dims_trans[0], input_dims_trans[2]});
auto weight_dims = ctx->GetInputDim("Weight");
int input_size = input_dims[2];
int frame_size = weight_dims[1];
int weight_height = weight_dims[1];
int weight_width = weight_dims[2];
PADDLE_ENFORCE_EQ(input_size, frame_size * 3,
"The input_size must be 3 times of frame_size in MpcGRUOp.");
PADDLE_ENFORCE_EQ(
weight_height, frame_size,
"The shape of Weight matrix must be [frame_size, frame_size * 3].");
PADDLE_ENFORCE_EQ(
weight_width, frame_size * 3,
"The shape of Weight matrix must be [frame_size, frame_size * 3].");
if (ctx->HasInput("H0"))
{
auto h0_dims = ctx->GetInputDim("H0");
PADDLE_ENFORCE_EQ(h0_dims[2], frame_size,
"The width of H0 must be equal to frame_size.");
auto h0_grad_name = framework::GradVarName("H0");
if (ctx->HasOutput(h0_grad_name))
ctx->SetOutputDim(h0_grad_name, h0_dims);
}
if (ctx->HasInput("Bias"))
{
auto bias_dims = ctx->GetInputDim("Bias");
int bias_height = bias_dims[1];
int bias_width = bias_dims[2];
PADDLE_ENFORCE_EQ(bias_height, 1,
"The shape of Bias must be [1, frame_size * 3].");
PADDLE_ENFORCE_EQ(bias_width, frame_size * 3,
"The shape of Bias must be [1, frame_size * 3].");
auto bias_grad_name = framework::GradVarName("Bias");
if (ctx->HasOutput(bias_grad_name))
ctx->SetOutputDim(bias_grad_name, bias_dims);
}
auto input_grad_name = framework::GradVarName("Input");
if (ctx->HasOutput(input_grad_name))
//transpose input's shape
ctx->SetOutputDim(input_grad_name, input_dims);
auto weight_grad_name = framework::GradVarName("Weight");
if (ctx->HasOutput(weight_grad_name))
ctx->SetOutputDim(weight_grad_name, weight_dims);
}
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override
{
return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType(
ctx, framework::GradVarName("Hidden")),
ctx.device_context());
}
};
template <typename T>
class MpcGRUCPUKernel : public MpcOpKernel<T> {
public:
void BatchCompute(const framework::ExecutionContext& context) const {
using DeviceContext = paddle::platform::CPUDeviceContext;
bool origin_mode = context.Attr<bool>("origin_mode");
auto* input_trans = context.Input<LoDTensor>("Input");
auto* h0 = context.Input<Tensor>("H0");
auto* weight = context.Input<Tensor>("Weight");
const T* weight_data = weight->data<T>();
auto* bias = context.Input<Tensor>("Bias");
auto* batch_gate = context.Output<LoDTensor>("BatchGate");
batch_gate->mutable_data<T>(context.GetPlace());
auto* batch_reset_hidden_prev =
context.Output<LoDTensor>("BatchResetHiddenPrev");
batch_reset_hidden_prev->mutable_data<T>(context.GetPlace());
auto* batch_hidden = context.Output<LoDTensor>("BatchHidden");
batch_hidden->mutable_data<T>(context.GetPlace());
auto* hidden = context.Output<LoDTensor>("Hidden");
hidden->mutable_data<T>(context.GetPlace());
auto hidden_dims = hidden->dims();
const auto place = context.GetPlace();
bool is_reverse = context.Attr<bool>("is_reverse");
math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
auto& dev_ctx = context.template device_context<DeviceContext>();
// get input lod
auto input_lod = input_trans->lod();
LoD gate_lod;
// transpose input to corrected mpc_input
// (T, 2, 3D) to (2, T, 3D)
math::Transpose<DeviceContext, T, 3> transpose;
Tensor input;
auto input_dim = input_trans->dims();
auto in_dim = framework::make_ddim({input_dim[1], input_dim[0], input_dim[2]});
input.mutable_data<T>(
in_dim,
context.GetPlace());
transpose(dev_ctx, *input_trans, &input, {1, 0, 2});
for (int i = 0; i < 2; ++i) {
// mpc LoDTensor to Batch
Tensor input_s;
Tensor batch_gate_s;
SliceAndReshape(&input, input_s, i);
SliceAndReshape(batch_gate, batch_gate_s, i);
LoDTensor lod_input_s;
LoDTensor lod_batch_gate_s;
lod_input_s.ShareBufferWith(input_s);
lod_input_s.mutable_data<T>(input_s.dims(), place);
lod_batch_gate_s.ShareBufferWith(batch_gate_s);
lod_batch_gate_s.mutable_data<T>(batch_gate_s.dims(), place);
lod_input_s.set_lod(input_lod);
to_batch(dev_ctx, lod_input_s, &lod_batch_gate_s, true, is_reverse);
gate_lod = lod_batch_gate_s.lod();
}
if (bias) {
// add mpc bias
math::RowwiseAdd<DeviceContext, T> add_bias;
for (int i = 0; i < 2; ++i) {
Tensor batch_gate_s;
Tensor bias_s;
SliceAndReshape(batch_gate, batch_gate_s, i);
SliceAndReshape(bias, bias_s, i);
add_bias(dev_ctx, batch_gate_s, bias_s, &batch_gate_s);
}
}
// split mpc weight from shape (2, D, 3D) to 3 * (2, D, D)
std::vector<Tensor> mpc_splitted_weights_t;
//Split3Dim<DeviceContext, T>(context, &mpc_splitted_weights_t, *weight);
SplitWeight<DeviceContext, T>(context, mpc_splitted_weights_t, *weight);
Tensor ordered_h0;
framework::Vector<size_t> order((gate_lod)[2]);
Tensor mpc_hidden_prev_t;
bool has_hidden_prev = false;
if (h0) {
// reordered h0 based on lod
ordered_h0.Resize(h0->dims());
for (int i = 0; i < 2; ++i) {
Tensor h0_s;
Tensor ordered_h0_s;
SliceAndReshape(h0, h0_s, i);
SliceAndReshape(&ordered_h0, ordered_h0_s, i);
ReorderInitState<DeviceContext, T>(
context.template device_context<DeviceContext>(), h0_s, order,
&ordered_h0_s, true);
}
// copy ordered_h0 to mpc_hidden_prev_t
mpc_hidden_prev_t = ordered_h0;
has_hidden_prev = true;
}
auto batch_starts = (gate_lod)[0];
size_t seq_len = batch_starts.size() - 1;
std::vector<Tensor> mpc_gate_t_list;
std::vector<Tensor> mpc_reset_hidden_prev_t_list;
std::vector<Tensor> mpc_hidden_t_list;
// compute gru
for (size_t n = 0; n < seq_len; n++) {
int bstart = static_cast<int>(batch_starts[n]);
int bend = static_cast<int>(batch_starts[n + 1]);
int cur_batch_size = bend - bstart;
std::vector<Tensor> mpc_splitted_gate_t;
Tensor mpc_batch_gate_t;
Tensor mpc_reset_hidden_prev_t;
Tensor mpc_hidden_t;
ToMpcBatchTensor<DeviceContext, T>(context, mpc_batch_gate_t, *batch_gate, bstart, bend);
Split3Dim<DeviceContext, T>(context, mpc_splitted_gate_t, mpc_batch_gate_t);
ToMpcBatchTensor<DeviceContext, T>(context, mpc_reset_hidden_prev_t, *batch_reset_hidden_prev, bstart, bend);
ToMpcBatchTensor<DeviceContext, T>(context, mpc_hidden_t, *batch_hidden, bstart, bend);
ComputGRUUint<DeviceContext, T>(context, mpc_splitted_gate_t, mpc_splitted_weights_t, mpc_reset_hidden_prev_t,
mpc_hidden_t, mpc_hidden_prev_t, origin_mode, has_hidden_prev);
Tensor mpc_gate_t;
Concat3Dim<DeviceContext, T>(context, &mpc_gate_t, mpc_splitted_gate_t);
//mpc_hidden_prev_t = mpc_hidden_t;
mpc_hidden_prev_t.mutable_data<T>(mpc_hidden_t.dims(), place);
framework::TensorCopy(mpc_hidden_t, context.GetPlace(), &mpc_hidden_prev_t);
mpc_gate_t_list.emplace_back(mpc_gate_t);
mpc_reset_hidden_prev_t_list.emplace_back(mpc_reset_hidden_prev_t);
mpc_hidden_t_list.emplace_back(mpc_hidden_t);
}
// Concat output variables
ConcatBatchAll<DeviceContext, T>(context, batch_gate, mpc_gate_t_list);
ConcatBatchAll<DeviceContext, T>(context, batch_reset_hidden_prev, mpc_reset_hidden_prev_t_list);
ConcatBatchAll<DeviceContext, T>(context, batch_hidden, mpc_hidden_t_list);
// mpc batch tensor to mpc LoDTensor
for (int i = 0; i < 2; ++i)
{
Tensor batch_hidden_s;
SliceAndReshape(batch_hidden, batch_hidden_s, i);
Tensor hidden_s;
SliceAndReshape(hidden, hidden_s, i);
LoDTensor lod_batch_hidden_s;
LoDTensor lod_hidden_s;
lod_batch_hidden_s.ShareBufferWith(batch_hidden_s);
lod_batch_hidden_s.mutable_data<T>(batch_hidden_s.dims(), place);
lod_hidden_s.ShareBufferWith(hidden_s);
lod_hidden_s.mutable_data<T>(hidden_s.dims(), place);
math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
lod_batch_hidden_s.set_lod(gate_lod);
lod_hidden_s.set_lod(gate_lod);
to_seq(dev_ctx, lod_batch_hidden_s, &lod_hidden_s);
}
// set batch_gate_lod for grad op
batch_gate->set_lod(gate_lod);
}
void ComputeImpl(const framework::ExecutionContext& context) const override {
BatchCompute(context);
}
};
template <typename T>
class MpcGRUGradOpMaker : public framework::SingleGradOpMaker<T>
{
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> grad_op) const override
{
grad_op->SetType("mpc_gru_grad");
grad_op->SetInput("Input", this->Input("Input"));
grad_op->SetInput("H0", this->Input("H0"));
grad_op->SetInput("Bias", this->Input("Bias"));
grad_op->SetInput("Weight", this->Input("Weight"));
grad_op->SetInput("BatchGate", this->Output("BatchGate"));
grad_op->SetInput("BatchResetHiddenPrev",
this->Output("BatchResetHiddenPrev"));
grad_op->SetInput("BatchHidden", this->Output("BatchHidden"));
grad_op->SetInput("Hidden", this->Output("Hidden"));
grad_op->SetInput(framework::GradVarName("Hidden"),
this->OutputGrad("Hidden"));
grad_op->SetOutput(framework::GradVarName("H0"), this->InputGrad("H0"));
grad_op->SetOutput(framework::GradVarName("Input"),
this->InputGrad("Input"));
grad_op->SetOutput(framework::GradVarName("Weight"),
this->InputGrad("Weight"));
grad_op->SetOutput(framework::GradVarName("Bias"), this->InputGrad("Bias"));
grad_op->SetAttrMap(this->Attrs());
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERER(MpcGRUGradOpNoNeedBufferVarInference, "Input",
"Bias");
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(mpc_gru, ops::MpcGRUOp, ops::MpcGRUOpMaker,
ops::MpcGRUGradOpMaker<paddle::framework::OpDesc>,
ops::MpcGRUGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(mpc_gru_grad, ops::MpcGRUGradOp,
ops::MpcGRUGradOpNoNeedBufferVarInference);
REGISTER_OP_CPU_KERNEL(mpc_gru, ops::MpcGRUCPUKernel<int64_t>);
REGISTER_OP_CPU_KERNEL(
mpc_gru_grad, ops::MpcGRUGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <functional>
#include <glog/logging.h>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "core/paddlefl_mpc/operators/math/sequence2batch.h"
#include "core/paddlefl_mpc/operators/math/concat_and_split.h"
#include "core/paddlefl_mpc/operators/math/math_function.h"
#include "mpc_op.h"
namespace paddle {
namespace operators {
using LoDTensor = framework::LoDTensor;
using Tensor = framework::Tensor;
typedef std::function<void(const Tensor*, Tensor*)> GateActivation;
template<typename T>
inline void ComputeSigmoidGrad(const framework::ExecutionContext& context,
Tensor& dy, Tensor& y, Tensor& dx);
template<typename DeviceContext, typename T>
inline void BackwardStateGrad(const framework::ExecutionContext& context,
std::vector<Tensor>& mpc_splitted_gate_t,
std::vector<Tensor>& mpc_splitted_gate_grad_t,
Tensor& mpc_hidden_prev_t, Tensor& mpc_hidden_prev_grad_t,
Tensor& mpc_hidden_grad_t,
bool origin_mode, bool has_hidden_prev,
bool has_hidden_prev_grad);
template<typename DeviceContext, typename T>
inline void BackwarsResetGrad(const framework::ExecutionContext& context,
std::vector<Tensor>& mpc_splitted_gate_t,
std::vector<Tensor>& mpc_splitted_gate_grad_t,
Tensor& mpc_hidden_prev_t, Tensor& mpc_hidden_prev_grad_t,
Tensor& mpc_reset_hidden_prev_grad_t,
bool has_hidden_prev, bool has_hidden_prev_grad);
template <typename DeviceContext, typename T>
inline void ReorderInitState(const DeviceContext& ctx,
const framework::Tensor& src,
framework::Vector<size_t> index_lod,
framework::Tensor* dst, bool indexed_src) {
math::CopyMatrixRowsFunctor<DeviceContext, T> row_shuffle;
dst->mutable_data<T>(src.dims(), ctx.GetPlace());
row_shuffle(ctx, src, index_lod, dst, indexed_src);
}
template<typename DeviceContext, typename T>
inline void ComputGRUUint(const framework::ExecutionContext& context,
std::vector<Tensor>& gate_t,
std::vector<Tensor>& weight_t,
Tensor &reset_hidden_prev_t,
Tensor &hidden_t,
Tensor &hidden_prev_t,
bool origin_mode,
bool& has_hidden_prev) {
// compute GRUUnit
Tensor u_h_t;
Tensor r_h_t;
// gate_t[x] shape (2, B, D)
// weight_t[x] shape (2, D, D)
// hidden_prev_t shape (2, B, D)
// hidden_t shape (2, B, D)
u_h_t.mutable_data<T>(gate_t[0].dims(), context.GetPlace());
r_h_t.mutable_data<T>(gate_t[1].dims(), context.GetPlace());
auto mpc_operator = mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators();
if (has_hidden_prev) {
// compute update gate and reset gate: gate_t += hidden_prev_t matmul gate_weight
mpc_operator->matmul(&hidden_prev_t, &weight_t[0], &u_h_t);
mpc_operator->add(&u_h_t, &gate_t[0], &gate_t[0]);
mpc_operator->matmul(&hidden_prev_t, &weight_t[1], &r_h_t);
mpc_operator->add(&r_h_t, &gate_t[1], &gate_t[1]);
}
auto GateActProcess = [&gate_t](const GateActivation fun) {
fun(&gate_t[0], &gate_t[0]);
fun(&gate_t[1], &gate_t[1]);
};
GateActivation activ_functor;
std::string active_gate = context.Attr<std::string>("gate_activation");
if (active_gate == "sigmoid_chebyshev") {
activ_functor = std::bind(&paddle::mpc::MpcOperators::sigmoid_chebyshev,
mpc_operator.get(),
std::placeholders::_1,
std::placeholders::_2);
} else if (active_gate == "sigmoid") {
activ_functor = std::bind(&paddle::mpc::MpcOperators::sigmoid,
mpc_operator.get(),
std::placeholders::_1,
std::placeholders::_2);
} else if (active_gate == "sigmoid_enhanced") {
activ_functor = std::bind(&paddle::mpc::MpcOperators::sigmoid_enhanced,
mpc_operator.get(),
std::placeholders::_1,
std::placeholders::_2);
} else {
PADDLE_THROW("gate activation of %s is not implemented yet.", active_gate);
}
GateActProcess(activ_functor);
if (has_hidden_prev) {
// reset_hidden_prev_t = gate[1] * hidden_prev_t
// compute candidate gate: gate_t[2] += reset_hidden_prev_t matmul state_weight
Tensor r_h_tmp;
r_h_tmp.mutable_data<T>(gate_t[2].dims(), context.GetPlace());
mpc_operator->mul(&gate_t[1], &hidden_prev_t, &reset_hidden_prev_t);
mpc_operator->matmul(&reset_hidden_prev_t, &weight_t[2], &r_h_tmp);
mpc_operator->add(&r_h_tmp, &gate_t[2], &gate_t[2]);
} else {
//initialize reset_hidden_prev_t and hidden_prev_t as 0
math::SetConstant<DeviceContext, T> zero;
auto& dev_ctx = context.template device_context<DeviceContext>();
reset_hidden_prev_t.mutable_data<T>(gate_t[0].dims(), context.GetPlace());
hidden_prev_t.mutable_data<T>(gate_t[0].dims(), context.GetPlace());
zero(dev_ctx, &reset_hidden_prev_t, static_cast<T>(0));
zero(dev_ctx, &hidden_prev_t, static_cast<T>(0));
has_hidden_prev = true;
}
mpc_operator->relu(&gate_t[2], &gate_t[2]);
Tensor u_h_tmp;
Tensor ops_u_h_tmp;
u_h_tmp.mutable_data<T>(hidden_t.dims(), context.GetPlace());
ops_u_h_tmp.mutable_data<T>(hidden_t.dims(), context.GetPlace());
if (origin_mode) {
// compute output hidden_t = (gate[0] * hidden_prev_t + gate[2] - gate[0] * gate[2])
mpc_operator->mul(&gate_t[0], &hidden_prev_t, &u_h_tmp);
mpc_operator->add(&gate_t[2], &u_h_tmp, &u_h_tmp);
mpc_operator->mul(&gate_t[0], &gate_t[2], &ops_u_h_tmp);
mpc_operator->sub(&u_h_tmp, &ops_u_h_tmp, &hidden_t);
} else {
// compute output hidden_t = (gate[0] * gate[2] + hidden_prev_t - gate[0] * hidden_prev_t)
mpc_operator->mul(&gate_t[0], &gate_t[2], &u_h_tmp);
mpc_operator->add(&hidden_prev_t, &u_h_tmp, &u_h_tmp);
mpc_operator->mul(&gate_t[0], &hidden_prev_t, &ops_u_h_tmp);
mpc_operator->sub(&u_h_tmp, &ops_u_h_tmp, &hidden_t);
}
}
inline void SliceAndReshape(const Tensor* input, Tensor &output, int i) {
// Slice mpc tensor to share[i]
output = input->Slice(i, i + 1);
auto dims = output.dims();
output.Resize(paddle::framework::slice_ddim(dims, 1, dims.size()));
}
template<typename DeviceContext, typename T>
inline void ToMpcBatchTensor(const framework::ExecutionContext& context,
Tensor& output, const Tensor& input,
int start, int end) {
//input : (2 , T, x) -> output: (2, end - start, x)
auto dims = input.dims();
auto& dev_ctx = context. template device_context<DeviceContext>();
math::Transpose<DeviceContext, T, 3> transpose;
Tensor tmp;
tmp.mutable_data<T>(framework::make_ddim({dims[1], dims[0], dims[2]}), context.GetPlace());
transpose(dev_ctx, input, &tmp, {1, 0, 2});
Tensor tmp_slice = tmp.Slice(start, end);
output.mutable_data<T>(framework::make_ddim({dims[0], end - start, dims[2]}), context.GetPlace());
transpose(dev_ctx, tmp_slice, &output, {1, 0, 2});
}
template<typename DeviceContext, typename T>
inline void Split3Dim(const framework::ExecutionContext& context,
std::vector<Tensor>& output,
const Tensor& input) {
// input : (2, x, 3D) -> output : 3 * (2, x, D)
auto& dev_ctx = context. template device_context<DeviceContext>();
Tensor tmp_trans;
auto dims = input.dims();
int frame_size = dims[2] / 3;
tmp_trans.mutable_data<T>(framework::make_ddim({dims[2], dims[0], dims[1]}), context.GetPlace());
math::Transpose<DeviceContext, T, 3> transpose;
transpose(dev_ctx, input, &tmp_trans, {2, 0, 1});
for (int i = 0; i < 3; ++i) {
Tensor tmp_slice = tmp_trans.Slice(i * frame_size, (i + 1) * frame_size);
Tensor tmp_re_trans;
tmp_re_trans.mutable_data<T>(framework::make_ddim({dims[0], dims[1], dims[2] / 3}),
context.GetPlace());
transpose(dev_ctx, tmp_slice, &tmp_re_trans, {1, 2, 0});
output.emplace_back(tmp_re_trans);
}
}
template<typename DeviceContext, typename T>
inline void Concat3Dim(const framework::ExecutionContext& context,
Tensor* output,
std::vector<Tensor>& input) {
// input 3 * (2, x, D) -> (2, x, 3D)
math::ConcatFunctor<DeviceContext, T> concat;
auto& input_dims = input[0].dims();
std::vector<int64_t> output_dim{input_dims[0], input_dims[1], input_dims[2] * 3};
output->mutable_data<T>(framework::make_ddim(output_dim), context.GetPlace());
auto& dev_ctx = context. template device_context<DeviceContext>();
concat(dev_ctx, input, 3, output);
}
template<typename DeviceContext, typename T>
inline void SplitWeight(const framework::ExecutionContext& context,
std::vector<Tensor>& splitted_weights,
const Tensor& weight) {
// split weight[0]、weight[1]、weight[2] with shape (2, D, D) from weight(2, D, 3D)
// note that weight[2]'s data start at offset 2 * D * D of weight's data
auto& dev_ctx = context. template device_context<DeviceContext>();
auto dims = weight.dims();
auto frame_size = dims[2] / 3;
splitted_weights.resize(3);
auto place = context.GetPlace();
// copy weight[0] weight[1] from weight
Tensor update_weight;
update_weight.mutable_data<T>(framework::make_ddim({2, frame_size, 2 * frame_size}),
place);
//splitted_weights->at(2) = new Tensor();
splitted_weights[2].mutable_data<T>(framework::make_ddim({2, frame_size, frame_size}),
place);
for (int i = 0; i < 2; ++i) {
Tensor weight_s;
Tensor update_weight_s;
Tensor weight_3_s;
SliceAndReshape(&weight, weight_s, i);
SliceAndReshape(&update_weight, update_weight_s, i);
SliceAndReshape(&splitted_weights[2], weight_3_s, i);
T* update_s_data = update_weight_s.mutable_data<T>(place);
T* weight_s_data = weight_s.data<T>();
memcpy(update_s_data, weight_s_data, update_weight_s.numel() * sizeof(T));
// weight[3]
memcpy(weight_3_s.mutable_data<T>(place), weight_s_data + 2 * frame_size * frame_size,
weight_3_s.numel() * sizeof(T));
}
// split update_weight to weight[0] and weight[1]
math::Transpose<DeviceContext, T, 3> transpose;
Tensor weight_trans;
weight_trans.mutable_data<T>(framework::make_ddim({2 * frame_size, 2, frame_size}), place);
transpose(dev_ctx, update_weight, &weight_trans, {2, 0, 1});
for (int i = 0; i < 2; ++i) {
//splitted_weights->at(i) = new Tensor();
splitted_weights[i].mutable_data<T>(framework::make_ddim({2, frame_size, frame_size}), place);
transpose(dev_ctx, weight_trans.Slice(frame_size * i, frame_size * (i + 1)),
&splitted_weights[i], {1, 2, 0});
}
}
template<typename DeviceContext, typename T>
inline void ConcatWeight(const framework::ExecutionContext& context,
Tensor* weight,
std::vector<Tensor>& splitted_weights) {
// concat weight[0]、weight[1]、weight[2] with shape (2, D, D) to weight(2, D, 3D)
// note that weight[2]'s data append after weight[0] and weight[1]
// weight[0] and weight[1] are concat as shape (2, D, 2D) in axis 2
math::ConcatFunctor<DeviceContext, T> concat;
std::vector<Tensor> update_weight_list;
update_weight_list.resize(2);
auto place = context.GetPlace();
auto& splitted_weights_dims = splitted_weights[0].dims();
std::vector<int64_t> weight_dim{splitted_weights_dims[0], splitted_weights_dims[1],
splitted_weights_dims[2] * 3};
weight->mutable_data<T>(framework::make_ddim(weight_dim), context.GetPlace());
for (int i = 0; i < 2; ++i) {
update_weight_list[i] = splitted_weights[i];
}
auto& dev_ctx = context. template device_context<DeviceContext>();
// Concat update weight and reset weight as update weights
Tensor update_weights;
update_weights.mutable_data<T>(
framework::make_ddim({splitted_weights_dims[0],
splitted_weights_dims[1],
splitted_weights_dims[2] * 2}),
place);
concat(dev_ctx, update_weight_list, 3, &update_weights);
// Concat candidate weight
for (int i = 0; i < 2; ++i) {
Tensor weight_s = weight->Slice(i, i + 1);
Tensor update_weights_s = update_weights.Slice(i, i + 1);
Tensor reset_weight_s = splitted_weights[i].Slice(i, i + 1);
T* weight_s_data = weight_s.mutable_data<T>(place);
T* update_weights_s_data = update_weights_s.data<T>();
T* reset_weight_s_data = reset_weight_s.data<T>();
size_t numel_update = update_weights_s.numel();
memcpy(weight_s_data, update_weights_s_data, numel_update * sizeof(T));
memcpy(weight_s_data + numel_update, reset_weight_s_data, reset_weight_s.numel());
}
}
template<typename DeviceContext, typename T>
inline void ConcatBatchOne(const framework::ExecutionContext& context,
Tensor* output,
Tensor& input,
int start,
int end) {
// replace output[2, start:end, x] with input (2, end - start, x)
auto& dev_ctx = context. template device_context<DeviceContext>();
Tensor tmp_trans;
auto dims = output->dims();
tmp_trans.mutable_data<T>(framework::make_ddim({dims[1], dims[0], dims[2]}), context.GetPlace());
math::Transpose<DeviceContext, T, 3> transpose;
transpose(dev_ctx, *output, &tmp_trans, {1, 0, 2});
Tensor splitted_t0;
Tensor splitted_t2;
Tensor splitted_t0_rec;
Tensor splitted_t2_rec;
std::vector<Tensor> concat_in;
if (start > 0) {
splitted_t0 = tmp_trans.Slice(0, start);
auto t0_dims = splitted_t0.dims();
splitted_t0_rec.mutable_data<T>(framework::make_ddim({t0_dims[1], t0_dims[0], t0_dims[2]}),
context.GetPlace());
transpose(dev_ctx, splitted_t0, &splitted_t0_rec, {1, 0, 2});
concat_in.emplace_back(splitted_t0_rec);
}
concat_in.emplace_back(input);
if (end < dims[1]) {
splitted_t2 = tmp_trans.Slice(end, dims[1]);
auto t2_dims = splitted_t2.dims();
splitted_t2_rec.mutable_data<T>(framework::make_ddim({t2_dims[1], t2_dims[0], t2_dims[2]}),
context.GetPlace());
transpose(dev_ctx, splitted_t2, &splitted_t2_rec, {1, 0, 2});
concat_in.emplace_back(splitted_t2_rec);
}
math::ConcatFunctor<DeviceContext, T> concat;
concat(dev_ctx, concat_in, 1, output);
}
template<typename DeviceContext, typename T>
inline void ConcatBatchAll(const framework::ExecutionContext& context,
Tensor* output,
std::vector<Tensor>& input) {
// Concat all input tensors in dims[1]
math::ConcatFunctor<DeviceContext, T> concat;
auto& dev_ctx = context. template device_context<DeviceContext>();
concat(dev_ctx, input, 1, output);
}
template<typename DeviceContext, typename T>
inline void GRUUnitGradCompute(const framework::ExecutionContext& context,
std::vector<Tensor>& mpc_splitted_gate_t,
std::vector<Tensor>& mpc_splitted_gate_grad_t,
Tensor& mpc_hidden_prev_t, Tensor& mpc_hidden_prev_grad_t,
std::vector<Tensor>& mpc_splitted_weights_t,
std::vector<Tensor>& mpc_splitted_weights_grad_t,
Tensor& mpc_reset_hidden_prev_t, Tensor& mpc_reset_hidden_prev_grad_t,
Tensor& mpc_hidden_grad_t, bool origin_mode,
bool& has_hidden_prev, bool& has_hidden_prev_grad,
bool& has_weight_grad) {
// compute GRUUnitGrad
BackwardStateGrad<DeviceContext, T>(context,
mpc_splitted_gate_t, mpc_splitted_gate_grad_t,
mpc_hidden_prev_t, mpc_hidden_prev_grad_t,
mpc_hidden_grad_t,
origin_mode, has_hidden_prev, has_hidden_prev_grad);
PADDLE_ENFORCE_NOT_NULL(mpc::MpcInstance::mpc_protocol,
"Protocol %s is not yet created in MPC Protocol.");
auto mpc_operator = mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators();
math::Transpose<DeviceContext, T, 3> transpose;
auto& dev_ctx = context. template device_context<DeviceContext>();
std::vector<int> trans_axis{0, 2, 1};
if (has_hidden_prev && has_hidden_prev_grad) {
auto res_hidden_dims = mpc_reset_hidden_prev_grad_t.dims();
// (B, D) * (D, D)^T + (B, D) :
//reset_hidden_prev_grad = batch_gate_grad[2] * state_weight[2] + reset_hidden_prev_grad
Tensor tmp;
tmp.mutable_data<T>(res_hidden_dims, context.GetPlace());
transpose(dev_ctx, mpc_splitted_weights_t[2], &tmp, trans_axis);
mpc_operator->matmul(&mpc_splitted_gate_t[2], &tmp, &tmp);
mpc_operator->add(&mpc_reset_hidden_prev_grad_t, &tmp, &mpc_reset_hidden_prev_grad_t);
if (has_weight_grad) {
// (B, D)^T * (B, D) + (D, D)
// state_weight_grad[2] = reset_hidden_prev * batch_gate_grad[2] + state_weight_grad[2]
Tensor tmp1, tmp2;
tmp1.mutable_data<T>(
framework::make_ddim(
std::vector<int64_t>({res_hidden_dims[0], res_hidden_dims[2], res_hidden_dims[1]})),
context.GetPlace());
tmp2.mutable_data<T>(mpc_splitted_weights_t[2].dims(), context.GetPlace());
transpose(dev_ctx, mpc_reset_hidden_prev_t, &tmp1, trans_axis);
mpc_operator->matmul(&tmp1, &mpc_splitted_gate_grad_t[2], &tmp2);
mpc_operator->add(&mpc_splitted_weights_grad_t[2], &tmp2, &mpc_splitted_weights_grad_t[2]);
}
}
BackwarsResetGrad<DeviceContext, T>(context,
mpc_splitted_gate_t, mpc_splitted_gate_grad_t,
mpc_hidden_prev_t, mpc_hidden_prev_grad_t,
mpc_reset_hidden_prev_grad_t,
has_hidden_prev, has_hidden_prev_grad);
if (has_hidden_prev && has_hidden_prev_grad) {
// (B, 2D) * (D, 2D)^T + (B, D)
// hidden_prev_grad = batch_gate_grad * gate_weight + hidden_prev_grad
// block matrix multiplication: A=[block_A1, block_A2], B^T=[block_B1, block_B2]
// A*B = block_A1*block_B1 + block_A2*block_B2
Tensor tmp1, tmp2;
tmp1.mutable_data<T>(mpc_splitted_weights_t[0].dims(), context.GetPlace());
tmp2.mutable_data<T>(mpc_hidden_prev_t.dims(), context.GetPlace());
transpose(dev_ctx, mpc_splitted_weights_t[0], &tmp1, trans_axis);
mpc_operator->matmul(&mpc_splitted_gate_grad_t[0], &tmp1, &tmp2);
mpc_operator->add(&mpc_hidden_prev_grad_t, &tmp2, &mpc_hidden_prev_grad_t);
transpose(dev_ctx, mpc_splitted_weights_t[1], &tmp1, trans_axis);
mpc_operator->matmul(&mpc_splitted_gate_grad_t[1], &tmp1, &tmp2);
mpc_operator->add(&mpc_hidden_prev_grad_t, &tmp2, &mpc_hidden_prev_grad_t);
if (has_weight_grad) {
// (B, D)^T * (B, 2D) + (D, 2D)
// gate_weight_grad = hidden_prev * batch_gate_grad + gate_weight_grad
auto hid_dims = mpc_hidden_prev_t.dims();
Tensor tmp3, tmp4;
tmp3.mutable_data<T>(
framework::make_ddim({hid_dims[0], hid_dims[2], hid_dims[1]}),
context.GetPlace());
tmp4.mutable_data<T>(mpc_splitted_weights_t[0].dims(), context.GetPlace());
transpose(dev_ctx, mpc_hidden_prev_t, &tmp3, trans_axis);
mpc_operator->matmul(&tmp3, &mpc_splitted_gate_grad_t[0], &tmp4);
mpc_operator->add(&mpc_splitted_weights_grad_t[0], &tmp4, &mpc_splitted_weights_grad_t[0]);
mpc_operator->matmul(&tmp3, &mpc_splitted_gate_grad_t[1], &tmp4);
mpc_operator->add(&mpc_splitted_weights_grad_t[1], &tmp4, &mpc_splitted_weights_grad_t[1]);
}
}
}
template<typename DeviceContext, typename T>
inline void BackwardStateGrad(const framework::ExecutionContext& context,
std::vector<Tensor>& mpc_splitted_gate_t,
std::vector<Tensor>& mpc_splitted_gate_grad_t,
Tensor& mpc_hidden_prev_t, Tensor& mpc_hidden_prev_grad_t,
Tensor& mpc_hidden_grad_t,
bool origin_mode, bool has_hidden_prev,
bool has_hidden_prev_grad) {
PADDLE_ENFORCE_NOT_NULL(mpc::MpcInstance::mpc_protocol,
"Protocol %s is not yet created in MPC Protocol.");
auto mpc_operator = mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators();
math::SetConstant<DeviceContext, T> zero;
auto& dev_ctx = context.template device_context<DeviceContext>();
if (!has_hidden_prev) {
zero(dev_ctx, &mpc_hidden_prev_t, static_cast<T>(0));
}
if (!has_hidden_prev_grad) {
zero(dev_ctx, &mpc_hidden_prev_grad_t, static_cast<T>(0));
}
if (origin_mode) {
// batch_gate_grad[0] = hidden_grad * (hidden_prev - batch_gate[2])
mpc_operator->sub(&mpc_hidden_prev_t, &mpc_splitted_gate_t[2], &mpc_splitted_gate_grad_t[0]);
mpc_operator->mul(&mpc_hidden_grad_t, &mpc_splitted_gate_grad_t[0], &mpc_splitted_gate_grad_t[0]);
// hidden_prev_grad += hidden_grad * batch_gate[0]
Tensor tmp;
tmp.mutable_data<T>(mpc_hidden_prev_grad_t.dims(), context.GetPlace());
mpc_operator->mul(&mpc_hidden_grad_t, &mpc_splitted_gate_t[0], &tmp);
mpc_operator->add(&mpc_hidden_prev_grad_t, &tmp, &mpc_hidden_prev_grad_t);
// batch_gate_grad[2] = activation(hidden_grad * (1-batch_gate[0]), batch_gate[2])
// activation = grad_relu (return a * (b > 0.0 ? 1.0 : 0.0);)
Tensor tmp1;
tmp1.mutable_data<T>(mpc_splitted_gate_grad_t[2].dims(), context.GetPlace());
mpc_operator->mul(&mpc_hidden_grad_t, &mpc_splitted_gate_t[0], &tmp1);
mpc_operator->sub(&mpc_hidden_grad_t, &tmp1, &tmp1);
mpc_operator->relu_grad(&mpc_splitted_gate_t[2], &tmp1, &mpc_splitted_gate_grad_t[2], 0);
} else {
// batch_gate_grad[0] = hidden_grad * (batch_gate[2] - hidden_prev)
mpc_operator->sub(&mpc_splitted_gate_t[2], &mpc_hidden_prev_t, &mpc_splitted_gate_grad_t[0]);
mpc_operator->mul(&mpc_hidden_grad_t, &mpc_splitted_gate_grad_t[0], &mpc_splitted_gate_grad_t[0]);
// hidden_prev_grad += hidden_grad * (1 - batch_gate[0])
Tensor tmp;
tmp.mutable_data<T>(mpc_hidden_prev_grad_t.dims(), context.GetPlace());
mpc_operator->mul(&mpc_hidden_grad_t, &mpc_splitted_gate_t[0], &tmp);
mpc_operator->sub(&mpc_hidden_grad_t, &tmp, &tmp);
mpc_operator->add(&mpc_hidden_prev_grad_t, &tmp, &mpc_hidden_prev_grad_t);
// batch_gate_grad[2] = activation(hidden_grad*batch_gate[0], batch_gate[2])
// activation = grad_relu
Tensor tmp1;
tmp1.mutable_data<T>(mpc_splitted_gate_grad_t[2].dims(), context.GetPlace());
mpc_operator->mul(&mpc_hidden_grad_t, &mpc_splitted_gate_t[0], &tmp1);
mpc_operator->relu_grad(&mpc_splitted_gate_t[2], &tmp1, &mpc_splitted_gate_grad_t[2], 0);
}
}
template<typename DeviceContext, typename T>
inline void BackwarsResetGrad(const framework::ExecutionContext& context,
std::vector<Tensor>& mpc_splitted_gate_t,
std::vector<Tensor>& mpc_splitted_gate_grad_t,
Tensor& mpc_hidden_prev_t, Tensor& mpc_hidden_prev_grad_t,
Tensor& mpc_reset_hidden_prev_grad_t,
bool has_hidden_prev, bool has_hidden_prev_grad) {
PADDLE_ENFORCE_NOT_NULL(mpc::MpcInstance::mpc_protocol,
"Protocol %s is not yet created in MPC Protocol.");
auto mpc_operator = mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators();
math::SetConstant<DeviceContext, T> zero;
auto& dev_ctx = context.template device_context<DeviceContext>();
if (!has_hidden_prev) {
zero(dev_ctx, &mpc_hidden_prev_t, static_cast<T>(0));
}
if (!has_hidden_prev_grad) {
zero(dev_ctx, &mpc_hidden_prev_grad_t, static_cast<T>(0));
}
if (!has_hidden_prev || !has_hidden_prev_grad) {
zero(dev_ctx, &mpc_reset_hidden_prev_grad_t, static_cast<T>(0));
}
// batch_gate_grad[1] = reset_hidden_grad * hidden_prev
mpc_operator->mul(&mpc_reset_hidden_prev_grad_t, &mpc_hidden_prev_t, &mpc_splitted_gate_grad_t[1]);
// hidden_prev_grad += reset_hidden_grad * batch_gate_grad[1]
Tensor tmp;
tmp.mutable_data<T>(mpc_hidden_prev_grad_t.dims(), context.GetPlace());
mpc_operator->mul(&mpc_reset_hidden_prev_grad_t, &mpc_splitted_gate_grad_t[1], &tmp);
mpc_operator->add(&mpc_hidden_prev_grad_t, &tmp, &mpc_hidden_prev_grad_t);
// batch_gate_grad[0] = sigmoid_grad(batch_gate_grad[0], batch_gate[0])
ComputeSigmoidGrad<T>(context, mpc_splitted_gate_grad_t[0],
mpc_splitted_gate_t[0], mpc_splitted_gate_grad_t[0]);
// batch_gate_grad[1] = sigmoid_grad(batch_gate_grad[1], batch_gate[1])
ComputeSigmoidGrad<T>(context, mpc_splitted_gate_grad_t[1],
mpc_splitted_gate_t[1], mpc_splitted_gate_grad_t[1]);
}
template<typename T>
inline void ComputeSigmoidGrad(const framework::ExecutionContext& context,
Tensor& dy, Tensor& y, Tensor& dx) {
// dx = dy * (1.0 - y * y);
PADDLE_ENFORCE_NOT_NULL(mpc::MpcInstance::mpc_protocol,
"Protocol %s is not yet created in MPC Protocol.");
auto mpc_operator = mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators();
Tensor tmp;
tmp.mutable_data<T>(dx.dims(), context.GetPlace());
mpc_operator->mul(&y, &y, &tmp);
mpc_operator->mul(&dy, &tmp, &tmp);
mpc_operator->sub(&dy, &tmp, &dx);
}
template <typename DeviceContext, typename T>
class MpcGRUGradKernel : public MpcOpKernel<T> {
public:
void BatchCompute(const framework::ExecutionContext& context) const {
bool origin_mode = context.Attr<bool>("origin_mode");
auto* h0 = context.Input<Tensor>("H0");
auto* weight = context.Input<Tensor>("Weight");
const T* weight_data = weight->data<T>();
auto* batch_gate = context.Input<LoDTensor>("BatchGate");
auto* batch_reset_hidden_prev =
context.Input<LoDTensor>("BatchResetHiddenPrev");
auto* batch_hidden = context.Input<LoDTensor>("BatchHidden");
auto* hidden = context.Input<LoDTensor>("Hidden");
auto* hidden_grad =
context.Input<LoDTensor>(framework::GradVarName("Hidden"));
auto* input_grad =
context.Output<LoDTensor>(framework::GradVarName("Input"));
auto* h0_grad = context.Output<Tensor>(framework::GradVarName("H0"));
auto* weight_grad =
context.Output<Tensor>(framework::GradVarName("Weight"));
auto* bias_grad = context.Output<Tensor>(framework::GradVarName("Bias"));
auto gate_dims = batch_gate->dims();
auto hidden_dims = hidden->dims();
auto gate_lod = batch_gate->lod();
const auto& place = context.GetPlace();
bool has_hidden_prev = false;
bool has_hidden_prev_grad = false;
bool has_weight_grad = false;
math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
LoDTensor batch_hidden_grad, batch_gate_grad, batch_reset_hidden_prev_grad;
batch_hidden_grad.mutable_data<T>(hidden_dims, context.GetPlace());
batch_gate_grad.mutable_data<T>(gate_dims, context.GetPlace());
batch_reset_hidden_prev_grad.mutable_data<T>(hidden_dims,
context.GetPlace());
math::SetConstant<DeviceContext, T> zero;
auto& dev_ctx = context.template device_context<DeviceContext>();
zero(dev_ctx, &batch_hidden_grad, static_cast<T>(0));
zero(dev_ctx, &batch_gate_grad, static_cast<T>(0));
zero(dev_ctx, &batch_reset_hidden_prev_grad, static_cast<T>(0));
Tensor ordered_h0, ordered_h0_grad;
framework::Vector<size_t> order(gate_lod[2]);
if (h0) {
// Reorder mpc h0
ordered_h0.mutable_data<T>(h0->dims(), place);
for (int i = 0; i < 2; ++i) {
Tensor h0_s;
SliceAndReshape(h0, h0_s, i);
Tensor ordered_h0_s;
SliceAndReshape(&ordered_h0, ordered_h0_s, i);
ReorderInitState<DeviceContext, T>(dev_ctx, h0_s, order, &ordered_h0_s,
true);
}
}
if (h0_grad) {
ordered_h0_grad.mutable_data<T>(h0_grad->dims(), context.GetPlace());
zero(context.template device_context<DeviceContext>(), &ordered_h0_grad,
static_cast<T>(0));
}
bool is_reverse = context.Attr<bool>("is_reverse");
for (int i = 0; i < 2; ++i) {
// mpc LoDTensor to mpc batch
Tensor batch_hidden_grad_s;
SliceAndReshape(&batch_hidden_grad, batch_hidden_grad_s, i);
Tensor hidden_grad_s;
SliceAndReshape(hidden_grad, hidden_grad_s, i);
LoDTensor lod_batch_hidden_grad_s;
LoDTensor lod_hidden_grad_s;
lod_batch_hidden_grad_s.ShareBufferWith(batch_hidden_grad_s);
lod_batch_hidden_grad_s.mutable_data<T>(batch_hidden_grad_s.dims(), place);
lod_hidden_grad_s.ShareBufferWith(hidden_grad_s);
lod_hidden_grad_s.mutable_data<T>(hidden_grad_s.dims(), place);
lod_hidden_grad_s.set_lod(gate_lod);
lod_batch_hidden_grad_s.set_lod(gate_lod);
to_batch(dev_ctx, lod_hidden_grad_s, &lod_batch_hidden_grad_s, false, is_reverse);
}
if (weight_grad) {
T* gate_weight_grad =
weight_grad->mutable_data<T>(context.GetPlace());
zero(dev_ctx, weight_grad, static_cast<T>(0));
has_weight_grad = true;
}
// split weights
std::vector<Tensor> mpc_splitted_weights_t;
SplitWeight<DeviceContext, T>(context, mpc_splitted_weights_t, *weight);
auto batch_starts = gate_lod[0];
size_t num_batch = batch_starts.size() - 1;
for (int n = static_cast<int>(num_batch) - 1; n >= 0; n--) {
int bstart = static_cast<int>(batch_starts[n]);
int bend = static_cast<int>(batch_starts[n + 1]);
int cur_batch_size = bend - bstart;
int bstart_pre = static_cast<int>(batch_starts[n - 1]);
// Split mpc tensors
Tensor mpc_hidden_grad_t;
Tensor mpc_hidden_prev_t;
Tensor mpc_hidden_prev_grad_t;
Tensor mpc_reset_hidden_prev_t;
Tensor mpc_reset_hidden_prev_grad_t;
std::vector<Tensor> splitted_batch_gate_t;
std::vector<Tensor> mpc_splitted_gate_t;
std::vector<Tensor> splitted_batch_gate_grad_t;
std::vector<Tensor> mpc_splitted_gate_grad_t;
std::vector<Tensor> mpc_splitted_weights_grad_t;
if (weight_grad) {
SplitWeight<DeviceContext, T>(context, mpc_splitted_weights_grad_t, *weight_grad);
}
ToMpcBatchTensor<DeviceContext, T>(context, mpc_hidden_grad_t, batch_hidden_grad, bstart, bend);
ToMpcBatchTensor<DeviceContext, T>(context, mpc_reset_hidden_prev_t, *batch_reset_hidden_prev, bstart, bend);
ToMpcBatchTensor<DeviceContext, T>(context, mpc_reset_hidden_prev_grad_t,
batch_reset_hidden_prev_grad, bstart, bend);
Split3Dim<DeviceContext, T>(context, splitted_batch_gate_grad_t, batch_gate_grad);
Split3Dim<DeviceContext, T>(context, splitted_batch_gate_t, *batch_gate);
for (int i = 0; i < 3; ++i) {
ToMpcBatchTensor<DeviceContext, T>(context, mpc_splitted_gate_grad_t[i],
splitted_batch_gate_grad_t[i], bstart, bend);
ToMpcBatchTensor<DeviceContext, T>(context, mpc_splitted_gate_t[i],
splitted_batch_gate_t[i], bstart, bend);
}
if (n == 0) {
if (h0) {
// hidden_prev_t = ordered_h0
mpc_hidden_prev_t.mutable_data<T>(
ordered_h0.dims(), place);
framework::TensorCopy(ordered_h0, place, &mpc_hidden_prev_t);
has_hidden_prev = true;
if (h0_grad) {
// hidden_prev_grad_t = ordered_h0_grad
mpc_hidden_prev_grad_t.mutable_data<T>(
ordered_h0_grad.dims(), place);
framework::TensorCopy(ordered_h0_grad, place, &mpc_hidden_prev_grad_t);
has_hidden_prev_grad = true;
}
}
} else {
ToMpcBatchTensor<DeviceContext, T>(context, mpc_hidden_prev_t, *batch_hidden, bstart_pre, bstart);
ToMpcBatchTensor<DeviceContext, T>(context, mpc_hidden_prev_grad_t, batch_hidden_grad, bstart_pre, bstart);
}
// compute GRUUnitGrad
GRUUnitGradCompute<DeviceContext, T>(context,
mpc_splitted_gate_t, mpc_splitted_gate_grad_t,
mpc_hidden_prev_t, mpc_hidden_prev_grad_t,
mpc_splitted_weights_t, mpc_splitted_weights_grad_t,
mpc_reset_hidden_prev_t, mpc_reset_hidden_prev_grad_t,
mpc_hidden_grad_t, origin_mode, has_hidden_prev,
has_hidden_prev_grad, has_weight_grad);
// cancat mpc tensor to gru_grad output variables
if (weight_grad) {
ConcatWeight<DeviceContext, T>(context, weight_grad, mpc_splitted_weights_grad_t);
}
Tensor mpc_batch_gate_grad_t;
Concat3Dim<DeviceContext, T>(context, &mpc_batch_gate_grad_t, mpc_splitted_gate_grad_t);
ConcatBatchOne<DeviceContext, T>(context, &batch_gate_grad, mpc_batch_gate_grad_t, bstart, bend);
ConcatBatchOne<DeviceContext, T>(context, &batch_hidden_grad, mpc_hidden_prev_grad_t, bstart_pre, bstart);
ConcatBatchOne<DeviceContext, T>(context, &batch_reset_hidden_prev_grad, mpc_reset_hidden_prev_grad_t, bstart, bend);
}
if (input_grad) {
// batch to lodTensor for mpc input_grad
input_grad->mutable_data<T>(context.GetPlace());
math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
batch_gate_grad.set_lod(gate_lod);
for (int i = 0; i < 2; ++i) {
Tensor batch_gate_grad_s;
SliceAndReshape(&batch_gate_grad, batch_gate_grad_s, i);
Tensor input_grad_s;
SliceAndReshape(input_grad, input_grad_s, i);
LoDTensor lod_batch_gate_grad_s;
LoDTensor lod_input_grad_s;
lod_batch_gate_grad_s.ShareBufferWith(batch_gate_grad_s);
lod_batch_gate_grad_s.mutable_data<T>(batch_gate_grad_s.dims(), place);
lod_batch_gate_grad_s.set_lod(gate_lod);
lod_input_grad_s.ShareBufferWith(input_grad_s);
lod_input_grad_s.mutable_data<T>(input_grad_s.dims(), place);
to_seq(dev_ctx, lod_batch_gate_grad_s, &lod_input_grad_s);
}
}
if (bias_grad) {
// col_sum mpc bias_grad
bias_grad->mutable_data<T>(context.GetPlace());
math::ColwiseSum<DeviceContext, T> col_sum;
for (int i = 0; i < 2; ++i) {
Tensor batch_gate_grad_s;
SliceAndReshape(&batch_gate_grad, batch_gate_grad_s, i);
Tensor bias_grad_s;
SliceAndReshape(bias_grad, bias_grad_s, i);
col_sum(dev_ctx, batch_gate_grad_s, &bias_grad_s);
}
}
if (h0 && h0_grad) {
// Reorder mpc h0_grad
for (int i = 0; i < 2; ++i) {
Tensor ordered_h0_grad_s;
SliceAndReshape(&ordered_h0_grad, ordered_h0_grad_s, i);
Tensor h0_grad_s;
SliceAndReshape(h0_grad, h0_grad_s, i);
ReorderInitState<DeviceContext, T>(dev_ctx, ordered_h0_grad_s, order,
&h0_grad_s, false);
}
}
}
void ComputeImpl(const framework::ExecutionContext& context) const override {
BatchCompute(context);
}
};
} // namespace operators
} // namespace paddle
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <algorithm>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
#include "paddle/fluid/framework/var_type_inference.h"
#include "paddle/fluid/framework/op_registry.h"
#include "mpc_lookup_table_v2_op.h"
namespace paddle {
namespace operators {
class MpcLookupTableV2Op : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE_EQ(ctx->HasInput("W"), true,
"Input(W) of LookupTableV2Op should not be null.");
PADDLE_ENFORCE_EQ(ctx->HasInput("Ids"), true,
"Input(Ids) of LookupTableV2Op should not be null.");
PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true,
"Output(Out) of LookupTableV2Op should not be null.");
auto table_dims = ctx->GetInputDim("W");
auto ids_dims = ctx->GetInputDim("Ids");
int ids_rank = ids_dims.size();
VLOG(5) << "ids rank is " << ids_rank << std::endl;
PADDLE_ENFORCE_EQ(
table_dims.size(), 3,
"ShapeError: The dimensions of the 'mpc lookup table' must be 3. "
"But received lookup table's dimensions = %d, "
"lookup table's shape = [%s].",
table_dims.size(), table_dims);
PADDLE_ENFORCE_EQ(
ids_dims.size(), 3,
"ShapeError: The dimensions of the 'idexes' must be 3, "
"Other dimensions are not supported temporarily. "
"Received idexes' dimensions = %d, "
"idexes's shape = [%s].",
table_dims.size(), table_dims);
PADDLE_ENFORCE_EQ(
table_dims[0], 2,
"ShapeError: The first dimensions of the 'mpc lookup table' must be 2. "
"But received lookup table's first dimensions = %d.",
table_dims[0]);
PADDLE_ENFORCE_EQ(
ids_dims[0], 2,
"ShapeError: The first dimensions of the 'indexes' must be 2. "
"But received indexes' first dimensions = %d.",
ids_dims[0]);
auto output_dims = framework::vectorize(ids_dims);
output_dims[output_dims.size() - 1] = table_dims[2];
auto out_dims = framework::make_ddim(output_dims);
ctx->SetOutputDim("Out", out_dims);
if (ctx->GetOutputsVarType("Out")[0] ==
framework::proto::VarType::LOD_TENSOR) {
ctx->ShareLoD("Ids", /*->*/ "Out");
}
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "W");
return framework::OpKernelType(data_type, ctx.device_context());
}
};
class MpcLookupTableV2OpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("W",
"(Tensor) The input represents embedding tensors, "
"which is a learnable parameter.");
AddInput("Ids",
"An input with type int64 "
"contains the ids to be looked up in W.");
AddOutput("Out", "The lookup results, which have the same type as W.");
AddAttr<bool>("is_sparse",
"(boolean, default false) "
"Sparse update.")
.SetDefault(false);
AddAttr<bool>("is_distributed",
"(boolean, default false) distributed lookup table.")
.SetDefault(false);
AddAttr<int64_t>("padding_idx",
"(int64, default -1) "
"If the value is -1, it makes no effect to lookup. "
"Otherwise the given value indicates padding the output "
"with zeros whenever lookup encounters it in Ids.")
.SetDefault(kNoPadding);
// for parameter prefetch
AddAttr<bool>("remote_prefetch", "").SetDefault(false);
AddAttr<int>("trainer_id", "trainer id from 0 ~ worker_num.").SetDefault(0);
AddAttr<std::vector<int64_t>>("height_sections",
"Height for each output SelectedRows.")
.SetDefault(std::vector<int64_t>({}));
AddAttr<std::vector<std::string>>(
"epmap",
"(string vector, default 127.0.0.1:6164)"
"Server endpoints in the order of input variables for mapping")
.SetDefault({});
AddAttr<std::vector<std::string>>(
"table_names",
"(string vector, the splited table names that will be fetched from "
"parameter server)"
"in the order of input variables for mapping")
.SetDefault({});
AddComment(R"DOC(
Lookup Table V2 Operator.
This operator is used to perform lookups on the parameter W,
then concatenated into a dense tensor.
The input Ids can carry the LoD (Level of Details) information,
or not. And the output only shares the LoD information with input Ids.
)DOC");
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERER(MpcLookupTableV2GradOpNoBuffer, "W");
template <typename T>
class MpcLookupTableV2GradOpMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> op) const override {
op->SetType("mpc_lookup_table_v2_grad");
op->SetInput("W", this->Input("W"));
op->SetInput("Ids", this->Input("Ids"));
op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
op->SetOutput(framework::GradVarName("W"), this->InputGrad("W"));
op->SetAttrMap(this->Attrs());
}
};
class MpcLookupTableV2OpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
auto table_dims = ctx->GetInputDim("W");
ctx->SetOutputDim(framework::GradVarName("W"), table_dims);
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto data_type = OperatorWithKernel::IndicateVarDataType(
ctx, framework::GradVarName("Out"));
return framework::OpKernelType(data_type, ctx.device_context());
}
};
class MpcLookupTableV2OpGradVarTypeInference : public framework::VarTypeInference {
public:
void operator()(framework::InferVarTypeContext* ctx) const override {
auto out_var_name = framework::GradVarName("W");
auto attr = ctx->GetAttr("is_sparse");
bool is_sparse = boost::get<bool>(attr);
if (is_sparse) {
VLOG(3) << "mpc_lookup_table_v2_grad op " << framework::GradVarName("W")
<< " is set to SelectedRows";
ctx->SetOutputType(out_var_name,
framework::proto::VarType::SELECTED_ROWS);
} else {
VLOG(3) << "mpc_lookup_table_v2_grad op " << framework::GradVarName("W")
<< " is set to LoDTensor";
ctx->SetOutputType(out_var_name, framework::proto::VarType::LOD_TENSOR);
}
ctx->SetOutputDataType(out_var_name, ctx->GetInputDataType("W"));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(mpc_lookup_table_v2, ops::MpcLookupTableV2Op,
ops::MpcLookupTableV2OpMaker,
ops::MpcLookupTableV2GradOpMaker<paddle::framework::OpDesc>,
ops::MpcLookupTableV2GradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(mpc_lookup_table_v2_grad, ops::MpcLookupTableV2OpGrad,
ops::MpcLookupTableV2GradOpNoBuffer,
ops::MpcLookupTableV2OpGradVarTypeInference);
REGISTER_OP_CPU_KERNEL(mpc_lookup_table_v2, ops::MpcLookupTableV2Kernel<int64_t>);
REGISTER_OP_CPU_KERNEL(mpc_lookup_table_v2_grad,
ops::MpcLookupTableV2GradKernel<int64_t>);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "mpc_op.h"
#include <string>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/device_context.h"
#include "core/paddlefl_mpc/operators/math/math_function_impl.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
using SelectedRows = framework::SelectedRows;
using DDim = framework::DDim;
constexpr int64_t kNoPadding = -1;
template <typename T>
class MpcLookupTableV2Kernel : public MpcOpKernel<T> {
public:
void ComputeImpl(const framework::ExecutionContext &context) const override {
auto *ids_t = context.Input<Tensor>("Ids"); // int tensor
auto *output_t = context.Output<Tensor>("Out"); // float tensor
auto *table_var = context.Input<Tensor>("W");
auto *ids = ids_t->data<T>();
auto *table = table_var->data<T>();
auto *output = output_t->mutable_data<T>(context.GetPlace());
PADDLE_ENFORCE_NOT_NULL(mpc::MpcInstance::mpc_protocol,
"Protocol %s is not yet created in MPC Protocol.");
mpc::MpcInstance::mpc_instance()->mpc_protocol()->
mpc_operators()->matmul(ids_t, table_var, output_t);
}
};
template <typename T>
class MpcLookupTableV2GradKernel : public MpcOpKernel<T> {
public:
void ComputeImpl(const framework::ExecutionContext &context) const override {
auto *ids_t = context.Input<Tensor>("Ids");
auto id_dim = ids_t->dims();
auto col_width = id_dim[1];
auto row_width = id_dim[2];
auto *d_output_t = context.Input<Tensor>(framework::GradVarName("Out"));
auto *d_table_t = context.Output<Tensor>(framework::GradVarName("W"));
// transpose ids_t
auto *ids = ids_t->data<T>();
auto *table = d_table_t->mutable_data<T>(context.GetPlace());
auto *output = d_output_t->data<T>();
Tensor ids_trans_t;
auto *ids_trans = ids_trans_t.mutable_data<T>({2, row_width, col_width}, context.GetPlace());
math::Transpose<platform::CPUDeviceContext, T, 3> transpose;
auto& dev_ctx = context. template device_context<platform::CPUDeviceContext>();
transpose(dev_ctx, *ids_t, &ids_trans_t, {0, 2, 1});
PADDLE_ENFORCE_NOT_NULL(mpc::MpcInstance::mpc_protocol, "Protocol %s is not yet created in MPC Protocol.");
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->matmul(&ids_trans_t, d_output_t, d_table_t);
}
};
} // namespace operators
} // namespace paddle
......@@ -150,6 +150,7 @@ public:
if (dx) {
dx->mutable_data<T>(ctx.GetPlace());
auto dx_dim = dx->dims();
if (dx->dims().size() > 3) {
dx->Resize({2, x_mat_width, x_mat_height});
}
......@@ -160,7 +161,6 @@ public:
// dx = dout * y'. dx: M x K, dout : M x N, y : K x N
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->matmul(
&dout_matrix, &y_matrix_trans, dx);
auto dx_dim = dx->dims();
if (dx_dim.size() > 3) {
dx->Resize(dx_dim);
}
......@@ -168,6 +168,7 @@ public:
if (dy) {
dy->mutable_data<T>(ctx.GetPlace());
auto dy_dim = dy->dims();
if (dy->dims().size() > 3) {
dy->Resize({2, y_mat_width, y_mat_height});
}
......@@ -179,7 +180,6 @@ public:
// dy = x' * dout. dy K x N, dout : M x N, x : M x K
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->matmul(
&x_matrix_trans, &dout_matrix, dy);
auto dy_dim = dy->dims();
if (dy_dim.size() > 3) {
dy->Resize(dy_dim);
}
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include <unordered_map>
#include "mpc_pool_op.h"
namespace paddle {
namespace operators {
int PoolOutputSize(int input_size, int filter_size, int padding_1,
int padding_2, int stride, bool ceil_mode) {
int output_size;
if (!ceil_mode) {
output_size = (input_size - filter_size + padding_1 + padding_2) / stride + 1;
} else {
output_size = (input_size - filter_size + padding_1 + padding_2 + stride - 1) / stride + 1;
}
PADDLE_ENFORCE_GT(
output_size, 0,
"ShapeError: the output size must be greater than 0. But received: "
"output_size = %d due to the settings of input_size(%d), padding(%d,%d), "
"k_size(%d) and stride(%d). Please check again!",
output_size, input_size, padding_1, padding_2, filter_size, stride);
return output_size;
}
class MpcPoolOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override{
PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true,
"X(Input) of Pooling should not be null.");
PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true,
"Out(Output) of Pooling should not be null.");
std::string pooling_type = ctx->Attrs().Get<std::string>("pooling_type");
std::vector<int> ksize = ctx->Attrs().Get<std::vector<int>>("ksize");
std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
bool ceil_mode = ctx->Attrs().Get<bool>("ceil_mode");
// bool adaptive = ctx->Attrs().Get<bool>("adaptive");
bool global_pooling = ctx->Attrs().Get<bool>("global_pooling");
std::string data_format = ctx->Attrs().Get<std::string>("data_format");
std::string padding_algorithm = ctx->Attrs().Get<std::string>("padding_algorithm");
auto in_x_dims = ctx->GetInputDim("X");
PADDLE_ENFORCE_EQ(in_x_dims.size(), 5,
"ShapeError: the input of Op(pool) should be 5-D Tensor (ciphertext). "
"But received: %u-D Tensor and it's shape is [%s].",
in_x_dims.size(), in_x_dims);
PADDLE_ENFORCE_EQ(in_x_dims.size() - ksize.size(), 3U,
"ShapeError: the dimension of input(ciphertext) minus the size of "
"Attr(ksize)(plaintext) must be euqal to 3 in Op(pool). "
"But received: the dimension of input minus the size "
"of Attr(ksize) is %d, the "
"input's dimension is %d, the shape of input "
"is [%s], the Attr(ksize)'s size is %d, the Attr(ksize) is [%s].",
in_x_dims.size() - ksize.size(), in_x_dims.size(), in_x_dims,
ksize.size(), framework::make_ddim(ksize));
PADDLE_ENFORCE_EQ(ksize.size(), strides.size(),
"ShapeError: the size of Attr(ksize) and Attr(strides) in "
"Op(pool) must be equal. "
"But received: Attr(ksize)'s size is %d, Attr(strides)'s "
"size is %d, Attr(ksize) is [%s], Attr(strides)is [%s].",
ksize.size(), strides.size(), framework::make_ddim(ksize),
framework::make_ddim(strides));
PADDLE_ENFORCE_EQ(data_format, "NCHW",
"data format can only be 'NCHW' ",
in_x_dims.size(), in_x_dims);
// update paddings if "SAME" or global_pooling
framework::DDim data_dims;
data_dims = framework::slice_ddim(in_x_dims, 3, in_x_dims.size());
UpdatePadding(&paddings, global_pooling, padding_algorithm,
data_dims, strides, ksize);
if (global_pooling) {
UpdateKsize(&ksize, data_dims);
}
std::vector<int64_t> output_shape;
std::vector<int64_t> one_hot_tensor_shape;
for (int i = 0; i < data_dims.size(); ++i) {
if ((!ctx->IsRuntime()) && (data_dims[i] < 0)) {
output_shape.push_back(data_dims[i]);
} else {
output_shape.push_back(
PoolOutputSize(data_dims[i], ksize[i], paddings[2 * i],
paddings[2 * i + 1], strides[i], ceil_mode));
}
}
output_shape.insert(output_shape.begin(), in_x_dims[0]); // share size
output_shape.insert(output_shape.begin() + 1, in_x_dims[1]); // output_N = input_N
output_shape.insert(output_shape.begin() + 2, in_x_dims[2]); // output_C = input_C
one_hot_tensor_shape.push_back(in_x_dims[0]); // share size
one_hot_tensor_shape.push_back(in_x_dims[1]); // input_N
one_hot_tensor_shape.push_back(in_x_dims[2]); // input_C
one_hot_tensor_shape.push_back(ksize[0] * ksize[1]);
one_hot_tensor_shape.push_back(output_shape[3] * output_shape[4]);
ctx->SetOutputDim("Out", framework::make_ddim(output_shape));
ctx->ShareLoD("X", "Out");
ctx->SetOutputDim("One_hot_tensor", framework::make_ddim(one_hot_tensor_shape));
ctx->ShareLoD("X", "One_hot_tensor");
}
protected:
framework::OpKernelType GetExpectedKernelType(const framework::ExecutionContext& ctx) const {
framework::LibraryType library_{framework::LibraryType::kPlain};
std::string data_format = "AnyLayout";
framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
return framework::OpKernelType(
OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace(),
layout_, library_);
}
framework::OpKernelType GetKernelTypeForVar(
const std::string& var_name, const Tensor& tensor,
const framework::OpKernelType& expected_kernel_type) const {
return framework::OpKernelType(expected_kernel_type.data_type_,
tensor.place(), tensor.layout());
}
};
class MpcPoolOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override{
PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, "Input(X) must not be null.");
PADDLE_ENFORCE_EQ(ctx->HasOutput(framework::GradVarName("X")), true,
"Input(X@GRAD) should not be null.");
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
}
protected:
framework::OpKernelType GetExpectedKernelType(const framework::ExecutionContext& ctx) const {
framework::LibraryType library_{framework::LibraryType::kPlain};
std::string data_format = "AnyLayout";
framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_, library_);
}
framework::OpKernelType GetKernelTypeForVar(
const std::string& var_name, const Tensor& tensor,
const framework::OpKernelType& expected_kernel_type) const {
return framework::OpKernelType(expected_kernel_type.data_type_,
tensor.place(), tensor.layout());
}
};
class MpcPool2dOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override{
AddInput("X",
"(Tensor) The input tensor of pooling operator. "
"The format of input tensor is NCHW, where N is batch size, C is the "
"number of channels, H is the height of the feature, "
"and W is the width of the feature.");
AddOutput("Out",
"(Tensor) The output tensor of pooling operator. "
"The format of output tensor is also NCHW, "
"where N is batch size, C is the number of channels, "
"H is the height of the feature, "
"and W is the width of the feature.");
AddOutput("One_hot_tensor",
"one hot tensor");
AddAttr<std::string>("pooling_type",
"(string), pooling type, can be \"max\" for max-pooling "
"and \"avg\" for average-pooling.")
.InEnum({"max", "avg"});
AddAttr<std::vector<int>>("ksize",
"(vector<int>) The pooling window "
"size(height, width) of the pooling operator. "
"If global_pooling = true, ksize and paddings will "
"be ignored.");
AddAttr<bool>("global_pooling",
"(bool) Whether to use the global pooling. "
"If global_pooling = true, kernel size and paddings will be ignored. "
"Default False.")
.SetDefault(false);
AddAttr<std::vector<int>>("strides",
"(vector<int>, default {1, 1}), strides(height, "
"width) of pooling operator.")
.SetDefault({1, 1});
AddAttr<std::vector<int>>("paddings",
"(vector<int>, default {0,0}), paddings(height_top, height_bottom, "
"width_left, wifth_right) of pooling operator."
"If global_pooling = true, paddings and kernel size will be ignored.")
.SetDefault({0, 0});
AddAttr<bool>("exclusive",
"(bool) When true, will exclude the zero-padding in the "
"averaging calculating, otherwise, include the zero-padding. Note, it "
"is only used when pooling_type is avg. The default is True. "
"Default True.")
.SetDefault(true);
AddAttr<bool>("ceil_mode",
"(bool) Whether to use the ceil function to calculate "
"output height and width. False is the default. If it is set to False, "
"the floor function will be used. Default False")
.SetDefault(false);
AddAttr<std::string>("data_format",
"(string, default NCHW) Only used in "
"An optional string from: \"NHWC\", \"NCHW\". "
"Defaults to \"NHWC\". Specify the data format of the output data, "
"the input will be transformed automatically. ")
.SetDefault("NCHW");
AddAttr<bool>("is_test",
"(bool, default false) Set to true for inference only, false "
"for training. Some layers may run faster when this is true.")
.SetDefault(false);
AddAttr<std::string>("padding_algorithm",
"(string, default \"EXPLICIT\") An optional string from: \"EXPLICIT\","
"\"SAME\",\"VALID\". Set to \"EXPLICIT\" for explicit padding. "
"Set to \"SAME\" or \"VALID\" for algorithm of padding. ")
.SetDefault("EXPLICIT");
AddComment(R"DOC(
This operation calculates the pooling output based on
the input, pooling_type and pool_size, pool_stride, pool_padding parameters.
Input(X) and Output(Out) are in NCHW or NHWC format, where N is batch size, C is the
number of channels, H is the height of the feature, and W is the width of the feature.
Parameters(pool_size, pool_stride, pool_padding) hold two integer elements.
These two elements represent height and width, respectively.
The input(X) size and output(Out) size may be different.
)DOC");
}
};
class MpcPoolOpInferVarType : public framework::PassInDtypeAndVarTypeToOutput {
protected:
std::unordered_map<std::string, std::string>& GetInputOutputWithSameType() const override {
static std::unordered_map<std::string, std::string> m{{"X", /*->*/ "Out"}};
return m;
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(
mpc_pool2d, ops::MpcPoolOp, ops::MpcPool2dOpMaker, ops::MpcPoolOpInferVarType,
paddle::framework::DefaultGradOpMaker<paddle::framework::OpDesc, true>,
paddle::framework::DefaultGradOpMaker<paddle::imperative::OpBase, true>);
REGISTER_OPERATOR(mpc_pool2d_grad, ops::MpcPoolOpGrad);
REGISTER_OP_CPU_KERNEL(
mpc_pool2d, ops::MpcPoolKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL(
mpc_pool2d_grad, ops::MpcPoolGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <string>
#include <vector>
#include "mpc_op.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using DDim = framework::DDim;
template <typename T = int>
inline void UpdatePadding(std::vector<T>* paddings, const bool global_pooling,
const std::string& padding_algorithm,
const framework::DDim data_dims,
const std::vector<T>& strides,
const std::vector<T>& ksize) {
// set padding size == data_dims.size() * 2
auto data_shape = framework::vectorize<T>(data_dims);
if (static_cast<int>(paddings->size()) == data_dims.size()) {
for (int i = 0; i < data_dims.size(); ++i) {
T copy_pad = *(paddings->begin() + 2 * i);
paddings->insert(paddings->begin() + 2 * i + 1, copy_pad);
}
} else {
PADDLE_ENFORCE_EQ(data_dims.size() * 2, paddings->size(),
"Paddings size should be the same or twice as the pooling size.");
}
// when padding_algorithm is "VALID" or "SAME"
if (padding_algorithm == "SAME") {
for (int i = 0; i < data_dims.size(); ++i) {
T out_size = (data_dims[i] + strides[i] - 1) / strides[i];
T pad_sum = std::max((out_size - 1) * strides[i] + ksize[i] - data_shape[i], static_cast<T>(0));
T pad_0 = pad_sum / 2;
T pad_1 = pad_sum - pad_0;
*(paddings->begin() + i * 2) = pad_0;
*(paddings->begin() + i * 2 + 1) = pad_1;
}
} else if (padding_algorithm == "VALID") {
for (auto it = paddings->begin(); it != paddings->end(); it++) {
*it = 0;
}
}
// if global_pooling == true, padding will be ignore
if (global_pooling) {
for (auto it = paddings->begin(); it != paddings->end(); it++) {
*it = 0;
}
}
}
template <typename T = int>
inline void UpdateKsize(std::vector<T>* ksize,
const framework::DDim data_dims) {
ksize->resize(static_cast<size_t>(data_dims.size()));
for (size_t i = 0; i < ksize->size(); ++i) {
*(ksize->begin() + i) = static_cast<T>(data_dims[i]);
}
}
template <typename T, typename Func>
void VisitDataStrideWise(DDim in_dims, DDim out_dims,
std::vector<int>& ksize, std::vector<int>& strides, std::vector<int>& paddings,
const T* src, T* target, int src_stride, int target_stride, Func visitor) {
const int share_size = in_dims[0];
const int batch_size = in_dims[1];
const int channel_size = in_dims[2];
const int input_height = in_dims[3];
const int input_width = in_dims[4];
const int out_height = out_dims[3];
const int out_width = out_dims[4];
const int out_mat_numel = out_height * out_width;
const int ksize_height = ksize[0];
const int ksize_width = ksize[1];
const int filter_numel = ksize_height * ksize_width;
const int stride_height = strides[0];
const int stride_width = strides[1];
const int padding_height = paddings[0];
const int padding_width = paddings[1];
int hstart, hend;
int wstart, wend;
int idx = 0;
while (idx++ < batch_size * channel_size) {
for (size_t ph = 0; ph < out_height; ++ph) {
hstart = ph * stride_height - padding_height;
hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0);
for (size_t pw = 0; pw < out_width; ++pw) {
wstart = pw * stride_width - padding_width;
wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0);
visitor(ph, pw, input_height, input_width, out_height, out_width, hstart, hend,
wstart, wend, src, target);
}
}
src += src_stride;
target += target_stride;
}
}
template <typename DeviceContext, typename T>
class MpcPoolKernel : public MpcOpKernel<T> {
public:
void ComputeImpl(const framework::ExecutionContext &context) const override {
const Tensor* in_x = context.Input<Tensor>("X");
Tensor* out = context.Output<Tensor>("Out");
Tensor* out_one_hot_tensor = context.Output<Tensor>("One_hot_tensor");
std::vector<int> ksize = context.Attr<std::vector<int>>("ksize");
std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
std::string data_format = context.Attr<std::string>("data_format"); // NCHW
bool global_pooling = context.Attr<bool>("global_pooling");
std::string padding_algorithm =
context.Attr<std::string>("padding_algorithm");
const T* in_x_data = in_x->data<T>();
T* output_data = out->mutable_data<T>(context.GetPlace());
T* one_hot_tensor_data = out_one_hot_tensor->mutable_data<T>(context.GetPlace());
// update paddings
auto in_x_dims = in_x->dims();
auto out_dims = out->dims();
const int input_stride = in_x_dims[3] * in_x_dims[4];
const int output_stride = out_dims[3] * out_dims[4];
const int one_hot_tensor_stride = ksize[0] * ksize[1] * out_dims[3] * out_dims[4];
// create temp tensor
auto& dev_ctx = context.template device_context<DeviceContext>();
Tensor input2col = context.AllocateTmpTensor<T, DeviceContext>(out_one_hot_tensor->dims(), dev_ctx);
T* input2col_data = input2col.data<T>();
std::fill(input2col_data, input2col_data + input2col.numel(), static_cast<T>(0));
framework::DDim data_dims;
data_dims = framework::slice_ddim(in_x_dims, 3, in_x_dims.size());
// update padding => h, w
UpdatePadding(&paddings, global_pooling, padding_algorithm,
data_dims, strides, ksize);
if (data_dims.size() * 2 == static_cast<int>(paddings.size())) {
for (int i = 0; i < data_dims.size(); ++i) {
paddings.erase(paddings.begin() + i + 1);
}
}
if (global_pooling) {
UpdateKsize(&ksize, data_dims);
}
// share0, share1
const int input_plaintext_size = in_x->numel() / 2;
const int input2col_plaintext_size = out_one_hot_tensor->numel() / 2;
// im2col
auto get_im2col = [=] (int ph, int pw, int input_height, int input_width, int out_height, int out_width,
int hstart, int hend, int wstart, int wend, const T* src, T* target) {
size_t out_index = ph * out_width + pw;
size_t offset = out_height * out_width;
size_t index = 0;
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
target[out_index + index * offset] = src[h * input_width + w]; // share0
target[out_index + index * offset + input2col_plaintext_size] =
src[h * input_width + w + input_plaintext_size]; // share1
++index;
}
}
};
// input2col
// convert in_x_data (S, B, C, H, W) into (S, B, C, filter_size * filter_size, H_output * W_output)
VisitDataStrideWise(in_x_dims, out_dims, ksize, strides, paddings, in_x_data, input2col_data, input_stride, one_hot_tensor_stride, get_im2col);
const T* input2col_data2 = input2col.data<T>();
// maxpooling(input2col_trans), return(max2col, out_one_hot_tensor_trans)
// input2col_trans: (S, filter_size * filter_size, B, C, H_output * W_output)
// max2col: (S, , B, C, H_output * W_output)
// out_one_hot_tensor_trans: (S, filter_size * filter_size, B, C, H_output * W_output)
Tensor input2col_trans;
DDim in2col_dims = input2col.dims();
T* input2col_trans_data = input2col_trans.mutable_data<T>(in2col_dims, context.GetPlace());
input2col_trans.Resize({in2col_dims[0], in2col_dims[3], in2col_dims[1], in2col_dims[2], in2col_dims[4]});
Tensor max2col;
max2col.ShareDataWith(*out);
max2col.Resize({in2col_dims[0], 1, in2col_dims[1], in2col_dims[2], in2col_dims[4]});
Tensor out_one_hot_tensor_trans;
out_one_hot_tensor_trans.mutable_data<T>(out_one_hot_tensor->dims(), context.GetPlace());
out_one_hot_tensor_trans.Resize({in2col_dims[0], in2col_dims[3], in2col_dims[1], in2col_dims[2], in2col_dims[4]});
// convert input2col (S, B, C, filter_size * filter_size, H_output * W_output)
// into input2col_trans (S, filter_size * filter_size, B, C, H_output * W_output)
const int Rank = 5;
Eigen::array<int, Rank> permute;
permute = {0, 3, 1, 2, 4};
auto eigen_in = framework::EigenTensor<T, Rank>::From(input2col);
auto eigen_out = framework::EigenTensor<T, Rank>::From(input2col_trans);
auto* dev = dev_ctx.eigen_device();
eigen_out.device(*dev) = eigen_in.shuffle(permute);
// maxpooling
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->max_pooling(
&input2col_trans, &max2col, &out_one_hot_tensor_trans);
permute = {0, 2, 3, 1, 4};
// convert out_one_hot_tensor_trans: (S, filter_size * filter_size, B, C, H_output * W_output)
// into out_one_hot_tensor (S, B, C, filter_size * filter_size, H_output * W_output)
auto eigen_in2 = framework::EigenTensor<T, Rank>::From(out_one_hot_tensor_trans);
auto eigen_out2 = framework::EigenTensor<T, Rank>::From(*out_one_hot_tensor);
eigen_out2.device(*dev) = eigen_in2.shuffle(permute);
// convert max2col: (S, 1, B, C, H_output * W_output)
// into out_one_hot_tensor (S, B, C, 1, H_output * W_output)
auto eigen_in3 = framework::EigenTensor<T, Rank>::From(max2col);
// flatten height & width
auto flatten_out_dims = out_dims;
flatten_out_dims[3] = 1;
flatten_out_dims[4] = out_dims[3] * out_dims[4];
out->Resize(flatten_out_dims);
auto eigen_out3 = framework::EigenTensor<T, Rank>::From(*out);
eigen_out3.device(*dev) = eigen_in3.shuffle(permute);
// reshape out (S, 1, B, C, H_output * W_output)
// into (S, B, C, H_output * W_output)
out->Resize(out_dims);
}
};
template <typename DeviceContext, typename T>
class MpcPoolGradKernel : public MpcOpKernel<T> {
public:
void ComputeImpl(const framework::ExecutionContext &context) const override {
const Tensor* one_hot_tensor = context.Input<Tensor>("One_hot_tensor");
const Tensor* out_grad = context.Input<Tensor>(framework::GradVarName("Out"));
Tensor* in_x_grad = context.Output<Tensor>(framework::GradVarName("X"));
std::vector<int> ksize = context.Attr<std::vector<int>>("ksize");
std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
std::string data_format = context.Attr<std::string>("data_format"); // NCHW
bool global_pooling = context.Attr<bool>("global_pooling");
std::string padding_algorithm =
context.Attr<std::string>("padding_algorithm");
if (in_x_grad) {
// update padding => h, w
auto in_x_dims = in_x_grad->dims();
auto out_dims = out_grad->dims();
framework::DDim data_dims;
data_dims = framework::slice_ddim(in_x_dims, 3, in_x_dims.size());
UpdatePadding(&paddings, global_pooling, padding_algorithm,
data_dims, strides, ksize);
if (data_dims.size() * 2 == static_cast<int>(paddings.size())) {
for (int i = 0; i < data_dims.size(); ++i) {
paddings.erase(paddings.begin() + i + 1);
}
}
if (global_pooling) {
UpdateKsize(&ksize, data_dims);
}
// create temp tensor
auto& dev_ctx = context.template device_context<DeviceContext>();
Tensor expanded_out_grad_tensor =
context.AllocateTmpTensor<T, DeviceContext>(one_hot_tensor->dims(), dev_ctx);
Tensor mul_result_tensor =
context.AllocateTmpTensor<T, DeviceContext>(one_hot_tensor->dims(), dev_ctx);
// create data var of input and output variable
T* in_x_grad_data = in_x_grad->mutable_data<T>(context.GetPlace());
std::fill(in_x_grad_data, in_x_grad_data + in_x_grad->numel(), static_cast<T>(0));
const T* one_hot_tensor_data = one_hot_tensor->data<T>();
const T* out_grad_data = out_grad->data<T>();
T* expanded_out_grad_data = expanded_out_grad_tensor.data<T>();
T* mul_result_data = mul_result_tensor.data<T>();
const int filter_numel = ksize[0] * ksize[1];
// stride = h * w
const int input_stride = in_x_dims[3] * in_x_dims[4];
const int output_stride = out_dims[3] * out_dims[4];
const int one_hot_tensor_stride = ksize[0] * ksize[1] * out_dims[3] * out_dims[4];
// stride: share0, share1
const int input_plaintext_size = in_x_grad->numel() / 2;
const int output_plaintext_size = out_grad->numel() / 2;
const int one_hot_tensor_plaintext_size = one_hot_tensor->numel() / 2;
// expand out grad
auto get_expand_out_grad = [=] (int ph, int pw, int input_height, int input_width,
int out_height, int out_width, int hstart, int hend,
int wstart, int wend, const T* src, T* target) {
size_t out_grad_index = ph * out_width + pw;
size_t offset = out_height * out_width;
for (size_t index = 0; index < filter_numel; ++index) {
target[out_grad_index + index * offset] = src[out_grad_index]; //share0
target[out_grad_index + index * offset + one_hot_tensor_plaintext_size] =
src[out_grad_index + output_plaintext_size]; // share1
}
};
// expand [S, B, C, H_poolout, W_poolout] into [S, B, C, ksize * ksize, H_poolout*W_poolout]
VisitDataStrideWise(in_x_dims, out_dims, ksize, strides, paddings, out_grad_data,
expanded_out_grad_data, output_stride, one_hot_tensor_stride, get_expand_out_grad);
// compute mul result = out_grad.expand * one_hot_tensor
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->arith_bool_mul(
&expanded_out_grad_tensor, one_hot_tensor, &mul_result_tensor);
// updata input X's grad
auto update_in_grad = [=] (int ph, int pw,
int input_height, int input_width,
int out_height, int out_width,
int hstart, int hend, int wstart, int wend,
const T* src, T* target) {
size_t index = 0;
size_t in_pos = 0;
size_t out_grad_index = ph * out_width + pw;
size_t res_offset = out_height * out_width;
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
in_pos = h * input_width + w;
target[in_pos] += src[out_grad_index + index * res_offset]; // share0
target[in_pos + input_plaintext_size] +=
src[out_grad_index + index * res_offset + one_hot_tensor_plaintext_size]; // share1
++index;
}
}
};
// convert [S, B, C, filter_size * filter_size, ] into [S, B, C, H, W]
VisitDataStrideWise(in_x_dims, out_dims, ksize, strides, paddings, mul_result_data,
in_x_grad_data, one_hot_tensor_stride, input_stride, update_in_grad);
} //if (in_x_grad)
} // void ComputeImpl
}; // class MpcPooliGradKernel
} // namespace operators
} // namespace paddle
......@@ -25,7 +25,8 @@ class MpcReluOp : public framework::OperatorWithKernel {
void InferShape(framework::InferShapeContext* ctx) const override {
auto in_dims = ctx->GetInputDim("X");
ctx->SetOutputDim("Y", in_dims);
ctx->SetOutputDim("Out", in_dims);
ctx->SetOutputDim("Derivative", in_dims);
}
};
......@@ -34,7 +35,8 @@ class MpcReluOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X", "The input tensor.");
AddOutput("Y", "Output of relu_op");
AddOutput("Out", "Output of relu_op");
AddOutput("Derivative", "Derivative of relu_op");
AddComment(R"DOC(
Mpc Relu Operator.
)DOC");
......@@ -47,7 +49,7 @@ class MpcReluGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
auto in_dims = ctx->GetInputDim(framework::GradVarName("Y"));
auto in_dims = ctx->GetInputDim(framework::GradVarName("Out"));
ctx->SetOutputDim(framework::GradVarName("X"), in_dims);
}
};
......@@ -61,8 +63,9 @@ public:
protected:
void Apply(GradOpPtr<T> grad) const override {
grad->SetType("mpc_relu_grad");
grad->SetInput("Y", this->Output("Y"));
grad->SetInput(framework::GradVarName("Y"), this->OutputGrad("Y"));
grad->SetInput("Out", this->Output("Out"));
grad->SetInput("Derivative", this->Output("Derivative"));
grad->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
grad->SetAttrMap(this->Attrs());
grad->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
}
......
......@@ -25,11 +25,14 @@ class MpcReluKernel : public MpcOpKernel<T> {
public:
void ComputeImpl(const framework::ExecutionContext& ctx) const override {
const Tensor* in_t = ctx.Input<Tensor>("X");
Tensor* out_t = ctx.Output<Tensor>("Y");
Tensor* out_t = ctx.Output<Tensor>("Out");
Tensor* der_t = ctx.Output<Tensor>("Derivative");
auto x = in_t->data<T>();
auto y = out_t->mutable_data<T>(ctx.GetPlace());
auto der = der_t->mutable_data<T>(ctx.GetPlace());
PADDLE_ENFORCE_NOT_NULL(mpc::MpcInstance::mpc_protocol, "Protocol %s is not yet created in MPC Protocol.");
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->relu(in_t,out_t);
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()
->relu_with_derivative(in_t,out_t, der_t);
}
};
......@@ -38,11 +41,12 @@ template <typename DeviceContext, typename T>
class MpcReluGradKernel : public MpcOpKernel<T> {
public:
void ComputeImpl(const framework::ExecutionContext& ctx) const override {
auto* dy_t = ctx.Input<Tensor>(framework::GradVarName("Y"));
auto* y_t = ctx.Input<Tensor>("Y");
auto* dy_t = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto* y_t = ctx.Input<Tensor>("Out");
auto* der_t = ctx.Input<Tensor>("Derivative");
auto* dx_t = ctx.Output<Tensor>(framework::GradVarName("X"));
auto dx = dx_t->mutable_data<T>(ctx.GetPlace());
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->relu_grad(y_t, dy_t, dx_t, 0.0);
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->arith_bool_mul(dy_t, der_t, dx_t);
}
};
......
......@@ -72,12 +72,6 @@ public:
" but the received var(%s)'s type is %s",
ctx->InputVarName("Param"), in_var_type);
ctx->SetOutputType("ParamOut", in_var_type);
//for (auto &out_var_n : framework::StaticGraphVarTypeInference::Output(ctx, "ParamOut")) {
// if (ctx->GetVarType(out_var_n) != in_var_type) {
// ctx->SetType(out_var_n, in_var_type);
//}
//}
}
};
......@@ -111,4 +105,4 @@ REGISTER_OPERATOR(
ops::MpcSGDOpInferVarType);
REGISTER_OP_CPU_KERNEL(
mpc_sgd,
ops::MpcSGDOpKernel<paddle::platform::CPUDeviceContext, int64_t>);
ops::MpcSGDOpKernel<paddle::platform::CPUDeviceContext, int64_t, float>);
......@@ -19,7 +19,7 @@
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
template <typename DeviceContext, typename T, typename T1>
class MpcSGDOpKernel : public MpcOpKernel<T> {
public:
void ComputeImpl(const framework::ExecutionContext &ctx) const override{
......@@ -47,14 +47,14 @@ class MpcSGDOpKernel : public MpcOpKernel<T> {
PADDLE_ENFORCE_EQ(param->numel(), sz);
PADDLE_ENFORCE_EQ(grad->numel(), sz);
const double *lr = learning_rate->data<double>();
double lr = *learning_rate->data<T1>();
param_out->mutable_data<T>(ctx.GetPlace());
PADDLE_ENFORCE_NOT_NULL(mpc::MpcInstance::mpc_protocol, "Protocol %s is not yet created in MPC Protocol.");
// update parameters
framework::Tensor temp;
temp.mutable_data<T>(param->dims(), ctx.GetPlace());
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->scale(grad, lr[0], &temp);
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->scale(grad, lr, &temp);
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->sub(param, &temp, param_out);
}
};
......
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "mpc_softmax_with_cross_entropy_op.h"
namespace paddle {
namespace operators {
class MpcSoftmaxWithCrossEntropyOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE_EQ(
ctx->HasInput("Logits"), true,
platform::errors::InvalidArgument("Input(Logits) should be not null."));
PADDLE_ENFORCE_EQ(
ctx->HasInput("Label"), true,
platform::errors::InvalidArgument("Input(Label) should be not null."));
PADDLE_ENFORCE_EQ(ctx->HasOutput("Softmax"), true,
platform::errors::InvalidArgument(
"Output(Softmax) should be not null."));
PADDLE_ENFORCE_EQ(
ctx->HasOutput("Loss"), true,
platform::errors::InvalidArgument("Output(Loss) should be not null."));
auto axis = ctx->Attrs().Get<int>("axis");
auto logits_dims = ctx->GetInputDim("Logits");
auto labels_dims = ctx->GetInputDim("Label");
auto logits_rank = logits_dims.size();
axis = CanonicalAxis(axis, logits_rank);
PADDLE_ENFORCE_GE(axis, logits_rank - 1,
platform::errors::InvalidArgument(
"Attr(axis) value should be -1 or R-1, "
"R is the rank of Input(Logits)."));
for (int i = 0; i < logits_rank; i++) {
if (i != axis) {
if (ctx->IsRuntime() || (logits_dims[i] > 0 && labels_dims[i] > 0)) {
PADDLE_ENFORCE_EQ(logits_dims[i], labels_dims[i],
platform::errors::InvalidArgument(
"Input(Logits) and Input(Label) should in "
"same shape in dimensions except axis."));
}
}
}
bool soft_label = ctx->Attrs().Get<bool>("soft_label");
PADDLE_ENFORCE_EQ(soft_label, true,
platform::errors::InvalidArgument(
"soft_label can only be true! "));
if (soft_label) {
if (ctx->IsRuntime() ||
(logits_dims[axis] > 0 && labels_dims[axis] > 0)) {
PADDLE_ENFORCE_EQ(logits_dims[axis], labels_dims[axis],
platform::errors::InvalidArgument(
"If Attr(soft_label) == true, "
"the axis dimension of "
"Input(X) and Input(Label) should be equal."));
}
}
ctx->SetOutputDim("Softmax", logits_dims);
logits_dims[axis] = 1;
ctx->SetOutputDim("Loss", logits_dims);
ctx->ShareLoD("Logits", /*->*/ "Softmax");
ctx->ShareLoD("Logits", /*->*/ "Loss");
}
};
class MpcSoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Loss")), true,
platform::errors::InvalidArgument(
"Input(Loss@Grad) should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasInput("Softmax"), true,
platform::errors::InvalidArgument(
"Input(Softmax) should be not null."));
PADDLE_ENFORCE_EQ(
ctx->HasInput("Label"), true,
platform::errors::InvalidArgument("Input(Label) should be not null."));
PADDLE_ENFORCE_EQ(ctx->HasOutput(framework::GradVarName("Logits")), true,
platform::errors::InvalidArgument(
"Output(Logits@Grad) should be not null."));
auto axis = ctx->Attrs().Get<int>("axis");
auto softmax_dims = ctx->GetInputDim("Softmax");
auto labels_dims = ctx->GetInputDim("Label");
auto softmax_rank = softmax_dims.size();
axis = CanonicalAxis(axis, softmax_rank);
PADDLE_ENFORCE_GE(axis, softmax_rank - 1,
platform::errors::InvalidArgument(
"Attr(axis) value should be -1 or R-1, "
"R is the rank of Input(Logits)."));
for (int i = 0; i < softmax_rank; i++) {
if (i != axis) {
if (ctx->IsRuntime() || (softmax_dims[i] > 0 && labels_dims[i] > 0)) {
PADDLE_ENFORCE_EQ(
softmax_dims[i], labels_dims[i],
platform::errors::InvalidArgument(
"Input(Logits) and Input(Label) should in same shape in "
"dimensions except axis."));
}
}
}
bool soft_label = ctx->Attrs().Get<bool>("soft_label");
PADDLE_ENFORCE_EQ(soft_label, true,
platform::errors::InvalidArgument(
"soft_label can only be true! "));
if (soft_label) {
if (ctx->IsRuntime() || (softmax_dims[axis] > 0 && labels_dims[axis] > 0)) {
PADDLE_ENFORCE_EQ(softmax_dims[axis], labels_dims[axis],
platform::errors::InvalidArgument(
"If Attr(soft_label) == true, "
"the axis dimension of "
"Input(X) and Input(Label) should be equal."));
}
}
ctx->SetOutputDim(framework::GradVarName("Logits"),
ctx->GetInputDim("Softmax"));
}
};
class MpcSoftmaxWithCrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("Logits",
"(Tensor, default: Tensor<float>), The input tensor of unscaled "
"log probabilities, whose dimension :attr:`axis` should be scaled "
"by softmax.");
AddInput(
"Label",
"(Tensor) The input tensor of groud truth label. If :attr:`soft_label` "
"is set to false, Label is a Tensor<int64> in same shape with "
"Input(Logits) except the shape in dimension :attr:`axis` as 1. If "
"soft_label is set to true, Label is a Tensor<float/double> in same "
"shape with Input(Logits).");
AddOutput(
"Softmax",
"(Tensor, default: Tensor<float>), A tensor in same shape with "
"Input(Logits). "
"The outputs value of softmax activation by given the input batch, "
"which will be used in backward calculation.")
.AsIntermediate();
AddOutput("Loss",
"(Tensor, default: Tensor<float>), A tensor in same shape with "
"Input(Logits) "
"except the shape in dimension :attr:`axis` as 1. The cross "
"entropy loss.");
AddAttr<bool>(
"soft_label",
"(bool, default: false), A flag to indicate whether to interpretant "
"the given labels as soft labels.")
.SetDefault(false);
AddAttr<int>("axis",
"The dimension index of Input(Logits) to perform softmax,"
"default -1 for last dimension")
.SetDefault(-1);
AddAttr<bool>("use_relu", "").SetDefault(false);
AddAttr<bool>("use_long_div", "").SetDefault(true);
AddComment(R"DOC(
Softmax With Cross Entropy Operator.
Cross entropy loss with softmax is used as the output layer extensively. This
operator computes the softmax normalized values for each row of the input
tensor.
Conputing cross-entropy loss is not supported now.
Now, we only support soft_label=true, axis=-1 or (rank-1).
Forward: out = softmax(x). todo: add cross_entropy
backward: dx = dout.expand * (softmax(x) - label)
)DOC");
}
};
template <typename T>
class MpcSoftmaxGradMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> grad_op) const override {
grad_op->SetType("mpc_softmax_with_cross_entropy_grad");
grad_op->SetInput("Label", this->Input("Label"));
grad_op->SetInput("Softmax", this->Output("Softmax"));
grad_op->SetInput(framework::GradVarName("Loss"), this->OutputGrad("Loss"));
grad_op->SetOutput(framework::GradVarName("Logits"),
this->InputGrad("Logits"));
grad_op->SetAttrMap(this->Attrs());
}
};
DECLARE_INPLACE_OP_INFERER(MpcSoftmaxWithCrossEntropyInplaceInference,
{"Logits", "Softmax"});
DECLARE_INPLACE_OP_INFERER(MpcSoftmaxWithCrossEntropyGradInplaceInference,
{"Softmax", framework::GradVarName("Logits")});
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(mpc_softmax_with_cross_entropy, ops::MpcSoftmaxWithCrossEntropyOp,
ops::MpcSoftmaxWithCrossEntropyOpMaker,
ops::MpcSoftmaxGradMaker<paddle::framework::OpDesc>,
ops::MpcSoftmaxGradMaker<paddle::imperative::OpBase>,
ops::MpcSoftmaxWithCrossEntropyInplaceInference);
REGISTER_OPERATOR(mpc_softmax_with_cross_entropy_grad,
ops::MpcSoftmaxWithCrossEntropyOpGrad,
ops::MpcSoftmaxWithCrossEntropyGradInplaceInference);
REGISTER_OP_CPU_KERNEL(mpc_softmax_with_cross_entropy,
ops::MpcSoftmaxWithCrossEntropyKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL(mpc_softmax_with_cross_entropy_grad,
ops::MpcSoftmaxWithCrossEntropyGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "mpc_op.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using DDim = framework::DDim;
static inline int CanonicalAxis(const int axis, const int rank) {
if (axis < 0) {
return axis + rank;
}
return axis;
}
static inline int SizeToAxis(const int axis, DDim dims) {
int size = 1;
for (int i = 0; i < axis; i++) {
size *= dims[i];
}
return size;
}
static inline int SizeFromAxis(const int axis, DDim dims) {
int size = 1;
for (int i = axis; i < dims.size(); i++) {
size *= dims[i];
}
return size;
}
// Out = softmax(Logits) = relu(Logits_i) / sum(relu(Logits_i)): prediction of input.
// todo: loss=?
template <typename DeviceContext, typename T>
class MpcSoftmaxWithCrossEntropyKernel : public MpcOpKernel<T> {
public:
void ComputeImpl(const framework::ExecutionContext &ctx) const override {
auto *in_x_t = ctx.Input<Tensor>("Logits");
auto *out_softmax_t = ctx.Output<Tensor>("Softmax");
auto *out_loss_t = ctx.Output<Tensor>("Loss");
out_softmax_t->mutable_data<T>(ctx.GetPlace());
out_loss_t->mutable_data<T>(ctx.GetPlace());
bool use_relu = ctx.Attr<bool>("use_relu");
bool use_long_div = ctx.Attr<bool>("use_long_div");
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->softmax(
in_x_t, out_softmax_t, use_relu, use_long_div);
}
};
// dx = dout.expand * (softmax(x) - labels)
template <typename DeviceContext, typename T>
class MpcSoftmaxWithCrossEntropyGradKernel : public MpcOpKernel<T> {
public:
void ComputeImpl(const framework::ExecutionContext &ctx) const override {
auto *dout = ctx.Input<Tensor>(framework::GradVarName("Loss"));
auto *in_label_t = ctx.Input<Tensor>("Label");
auto *in_softmax_t = ctx.Input<Tensor>("Softmax");
auto *dx = ctx.Output<Tensor>(framework::GradVarName("Logits"));
const bool soft_label = ctx.Attr<bool>("soft_label");
PADDLE_ENFORCE_EQ(soft_label, true, "soft_label can only be true.");
const int rank = dx->dims().size();
const int axis = CanonicalAxis(ctx.Attr<int>("axis"), rank);
int axis_dim = dx->dims()[axis];
const int n = SizeToAxis(axis, dx->dims());
const int d = SizeFromAxis(axis, dx->dims());
T* dx_data = dx->mutable_data<T>(ctx.GetPlace());
const T* dout_data = dout->data<T>();
// expand dout
Tensor dout_expand;
T* dout_expand_data = dout_expand.mutable_data<T>(dx->dims(), ctx.GetPlace());
for (size_t i = 0; i < n; ++i) {
for (size_t j = 0; j < d; ++j) {
dout_expand_data[i * d + j] = dout_data[i];
}
}
// dx = dout.expand * (softmax - label)
Tensor softmax_minus_label;
T* softmax_minus_label_data = softmax_minus_label.mutable_data<T>(dx->dims(), ctx.GetPlace());
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->sub(in_softmax_t, in_label_t, &softmax_minus_label);
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->mul(&dout_expand, &softmax_minus_label, dx);
}
};
} // namespace operators
} // namespace paddle
......@@ -39,9 +39,9 @@ public:
void init(size_t party, std::shared_ptr<AbstractNetwork> network, block seed,
block seed2) override {
set_num_party(2);
set_party(party);
set_network(network);
set_num_party(2);
if (psi::equals(seed2, psi::g_zero_block)) {
seed2 = psi::block_from_dev_urandom();
......
add_compile_options(-msse4.2 -maes)
set(PRIVC3_SRCS
"./aes.cc"
"./paddle_tensor.cc"
......@@ -8,16 +6,23 @@ set(PRIVC3_SRCS
"./tensor_adapter_factory.cc"
)
if (USE_AES_NI)
add_compile_definitions(USE_AES_NI)
endif (USE_AES_NI)
add_library(privc3_o OBJECT ${PRIVC3_SRCS})
add_dependencies(privc3_o fluid_framework)
add_library(privc3 STATIC $<TARGET_OBJECTS:privc3_o>)
target_link_libraries(privc3 fluid_framework)
if (USE_OPENMP)
target_link_libraries(privc3 fluid_framework OpenMP::OpenMP_CXX OpenMP::OpenMP_C crypto)
else()
target_link_libraries(privc3 fluid_framework crypto)
endif (USE_OPENMP)
cc_test(fixedpoint_util_test SRCS fixedpoint_util_test.cc DEPS privc3)
cc_test(paddle_tensor_test SRCS paddle_tensor_test.cc DEPS privc3)
cc_test(boolean_tensor_test SRCS boolean_tensor_test.cc DEPS privc3)
cc_test(fixedpoint_tensor_test SRCS fixedpoint_tensor_test.cc DEPS privc3)
#set(CMAKE_BUILD_TYPE "Debug")
......@@ -40,9 +40,9 @@ public:
void init(size_t party, std::shared_ptr<AbstractNetwork> network, block seed,
block seed2) override {
set_num_party(3);
set_party(party);
set_network(network);
set_num_party(3);
if (psi::equals(seed, psi::g_zero_block)) {
seed = psi::block_from_dev_urandom();
......
......@@ -23,33 +23,35 @@
namespace aby3 {
template <typename T, size_t N> class FixedPointTensor;
template<typename T, size_t N>
class FixedPointTensor;
template <typename T> class BooleanTensor {
template<typename T>
class BooleanTensor {
public:
BooleanTensor(TensorAdapter<T> *share_tensor[2]);
BooleanTensor(TensorAdapter<T>* share_tensor[2]);
BooleanTensor(TensorAdapter<T> *tensor0, TensorAdapter<T> *tensor1);
BooleanTensor(TensorAdapter<T>* tensor0, TensorAdapter<T>* tensor1);
BooleanTensor();
// ABY3 a2b
template <size_t N>
BooleanTensor &operator=(const FixedPointTensor<T, N> *other);
template<size_t N>
BooleanTensor& operator=(const FixedPointTensor<T, N>* other);
~BooleanTensor() {}
// get share
TensorAdapter<T> *share(size_t idx);
//get share
TensorAdapter<T>* share(size_t idx);
const TensorAdapter<T> *share(size_t idx) const;
const TensorAdapter<T>* share(size_t idx) const;
// reveal boolean tensor to one party
void reveal_to_one(size_t party_num, TensorAdapter<T> *ret) const;
void reveal_to_one(size_t party_num, TensorAdapter<T>* ret) const;
// reveal boolean tensor to all parties
void reveal(TensorAdapter<T> *ret) const;
void reveal(TensorAdapter<T>* ret) const;
const std::vector<size_t> shape() const;
......@@ -61,59 +63,63 @@ public:
// const std::string& rnd_seed = "");
// element-wise xor with BooleanTensor
void bitwise_xor(const BooleanTensor *rhs, BooleanTensor *ret) const;
void bitwise_xor(const BooleanTensor* rhs, BooleanTensor* ret) const;
// element-wise xor with TensorAdapter
void bitwise_xor(const TensorAdapter<T> *rhs, BooleanTensor *ret) const;
void bitwise_xor(const TensorAdapter<T>* rhs, BooleanTensor* ret) const;
// element-wise and with BooleanTensor
void bitwise_and(const BooleanTensor *rhs, BooleanTensor *ret) const;
void bitwise_and(const BooleanTensor* rhs, BooleanTensor* ret) const;
// element-wise and with TensorAdapter
void bitwise_and(const TensorAdapter<T> *rhs, BooleanTensor *ret) const;
void bitwise_and(const TensorAdapter<T>* rhs, BooleanTensor* ret) const;
// element-wise or with BooleanTensor
void bitwise_or(const BooleanTensor *rhs, BooleanTensor *ret) const;
// element-wise or with TensorAdapter
void bitwise_or(const TensorAdapter<T> *rhs, BooleanTensor *ret) const;
// element-wise or
// for both tensor adapter and boolean tensor
template<template<typename U> class CTensor>
void bitwise_or(const CTensor<T>* rhs, BooleanTensor* ret) const;
// element-wise not
void bitwise_not(BooleanTensor *ret) const;
void bitwise_not(BooleanTensor* ret) const;
// element-wise lshift
void lshift(size_t rhs, BooleanTensor *ret) const;
void lshift(size_t rhs, BooleanTensor* ret) const;
// element-wise rshift
void rshift(size_t rhs, BooleanTensor *ret) const;
void rshift(size_t rhs, BooleanTensor* ret) const;
// element-wise logical_rshift
void logical_rshift(size_t rhs, BooleanTensor *ret) const;
void logical_rshift(size_t rhs, BooleanTensor* ret) const;
// element-wise ppa with BooleanTensor
void ppa(const BooleanTensor *rhs, BooleanTensor *ret, size_t nbits) const;
void ppa(const BooleanTensor* rhs, BooleanTensor*ret , size_t nbits) const;
// ABY3 b2a
template <size_t N> void b2a(FixedPointTensor<T, N> *ret) const;
template<size_t N>
void b2a(FixedPointTensor<T, N>* ret) const;
// ABY3 ab mul
// this is an one-bit boolean share
template <size_t N>
void mul(const TensorAdapter<T> *rhs, FixedPointTensor<T, N> *ret,
size_t rhs_party) const;
template<size_t N>
void mul(const TensorAdapter<T>* rhs, FixedPointTensor<T, N>* ret, size_t rhs_party) const;
// ABY3 ab mul
// this is an one-bit boolean share
template <size_t N>
void mul(const FixedPointTensor<T, N> *rhs,
FixedPointTensor<T, N> *ret) const;
template<size_t N>
void mul(const FixedPointTensor<T, N>* rhs, FixedPointTensor<T, N>* ret) const;
// extract to this
template <size_t N>
void bit_extract(size_t i, const FixedPointTensor<T, N> *in);
template<size_t N>
void bit_extract(size_t i, const FixedPointTensor<T, N>* in);
// extract from this to ret
void bit_extract(size_t i, BooleanTensor *ret) const;
void bit_extract(size_t i, BooleanTensor* ret) const;
// turn all 1s to 0s except the last 1 in a col
// given cmp result from max pooling, generate one hot tensor
// indicating which element is max
// inplace transform
void onehot_from_cmp();
private:
static inline std::shared_ptr<AbstractContext> aby3_ctx() {
......@@ -131,9 +137,10 @@ private:
size_t party() const;
private:
TensorAdapter<T> *_share[2];
TensorAdapter<T>* _share[2];
};
} // namespace aby3
} //namespace aby3
#include "boolean_tensor_impl.h"
......@@ -18,49 +18,54 @@
namespace aby3 {
template <typename T> size_t BooleanTensor<T>::pre_party() const {
template<typename T>
size_t BooleanTensor<T>::pre_party() const {
return aby3_ctx()->pre_party();
}
template <typename T> size_t BooleanTensor<T>::next_party() const {
template<typename T>
size_t BooleanTensor<T>::next_party() const {
return aby3_ctx()->next_party();
}
template <typename T> size_t BooleanTensor<T>::party() const {
template<typename T>
size_t BooleanTensor<T>::party() const {
return aby3_ctx()->party();
}
template <typename T>
BooleanTensor<T>::BooleanTensor(TensorAdapter<T> *tensor[2]) {
template<typename T>
BooleanTensor<T>::BooleanTensor(TensorAdapter<T>* tensor[2]) {
// TODO: check if tensor shape equal
_share[0] = tensor[0];
_share[1] = tensor[1];
}
template <typename T>
BooleanTensor<T>::BooleanTensor(TensorAdapter<T> *tensor0,
TensorAdapter<T> *tensor1) {
template<typename T>
BooleanTensor<T>::BooleanTensor(TensorAdapter<T>* tensor0,
TensorAdapter<T>* tensor1) {
// TODO: check if tensor shape equal
_share[0] = tensor0;
_share[1] = tensor1;
}
template <typename T> BooleanTensor<T>::BooleanTensor() {}
template<typename T>
BooleanTensor<T>::BooleanTensor() {
}
template <typename T> TensorAdapter<T> *BooleanTensor<T>::share(size_t idx) {
template<typename T>
TensorAdapter<T>* BooleanTensor<T>::share(size_t idx) {
// TODO: check if idx < 2
return _share[idx];
}
template <typename T>
const TensorAdapter<T> *BooleanTensor<T>::share(size_t idx) const {
template<typename T>
const TensorAdapter<T>* BooleanTensor<T>::share(size_t idx) const {
// TODO: check if idx < 2
return _share[idx];
}
template <typename T>
void BooleanTensor<T>::reveal_to_one(size_t party_num,
TensorAdapter<T> *ret) const {
template<typename T>
void BooleanTensor<T>::reveal_to_one(size_t party_num, TensorAdapter<T>* ret) const {
if (party_num == party()) {
// TODO: check if tensor shape equal
......@@ -75,50 +80,54 @@ void BooleanTensor<T>::reveal_to_one(size_t party_num,
} else if (party_num == next_party()) {
aby3_ctx()->network()->template send(party_num, *share(0));
}
}
template <typename T>
void BooleanTensor<T>::reveal(TensorAdapter<T> *ret) const {
template<typename T>
void BooleanTensor<T>::reveal(TensorAdapter<T>* ret) const {
for (size_t idx = 0; idx < 3; ++idx) {
reveal_to_one(idx, ret);
}
}
template <typename T>
template<typename T>
const std::vector<size_t> BooleanTensor<T>::shape() const {
if (share(0)) {
return share(0)->shape();
} else {
}
else {
return std::vector<size_t>();
}
}
template <typename T> size_t BooleanTensor<T>::numel() const {
template<typename T>
size_t BooleanTensor<T>::numel() const {
if (share(0)) {
return share(0)->numel();
} else {
}
else {
0;
}
}
template <typename T>
void BooleanTensor<T>::bitwise_xor(const BooleanTensor *rhs,
BooleanTensor *ret) const {
template<typename T>
void BooleanTensor<T>::bitwise_xor(const BooleanTensor* rhs,
BooleanTensor* ret) const {
share(0)->bitwise_xor(rhs->share(0), ret->share(0));
share(1)->bitwise_xor(rhs->share(1), ret->share(1));
}
template <typename T>
void BooleanTensor<T>::bitwise_xor(const TensorAdapter<T> *rhs,
BooleanTensor *ret) const {
template<typename T>
void BooleanTensor<T>::bitwise_xor(const TensorAdapter<T>* rhs,
BooleanTensor* ret) const {
share(0)->bitwise_xor(rhs, ret->share(0));
share(1)->bitwise_xor(rhs, ret->share(1));
}
template <typename T>
void BooleanTensor<T>::bitwise_and(const BooleanTensor *rhs,
BooleanTensor *ret) const {
template<typename T>
void BooleanTensor<T>::bitwise_and(const BooleanTensor* rhs,
BooleanTensor* ret) const {
auto tmp_zero = tensor_factory()->template create<T>(ret->shape());
auto tmp0 = tensor_factory()->template create<T>(ret->shape());
......@@ -149,37 +158,36 @@ void BooleanTensor<T>::bitwise_and(const BooleanTensor *rhs,
}
}
template <typename T>
void BooleanTensor<T>::bitwise_and(const TensorAdapter<T> *rhs,
BooleanTensor *ret) const {
template<typename T>
void BooleanTensor<T>::bitwise_and(const TensorAdapter<T>* rhs,
BooleanTensor* ret) const {
share(0)->bitwise_and(rhs, ret->share(0));
share(1)->bitwise_and(rhs, ret->share(1));
}
template <typename T>
void BooleanTensor<T>::bitwise_or(const BooleanTensor *rhs,
BooleanTensor *ret) const {
// ret = x & y
bitwise_and(rhs, ret);
// ret = x & y ^ x
bitwise_xor(ret, ret);
// ret = x & y ^ x ^ y
rhs->bitwise_xor(ret, ret);
}
template<typename T>
template<template<typename U> class CTensor>
void BooleanTensor<T>::bitwise_or(const CTensor<T>* rhs,
BooleanTensor* ret) const {
template <typename T>
void BooleanTensor<T>::bitwise_or(const TensorAdapter<T> *rhs,
BooleanTensor *ret) const {
std::vector<std::shared_ptr<TensorAdapter<T>>> tmp;
for (int i = 0; i < 2; ++i) {
tmp.emplace_back(
tensor_factory()->template create<T>(shape()));
}
BooleanTensor buffer(tmp[0].get(), tmp[1].get());
// ret = x & y
bitwise_and(rhs, ret);
bitwise_and(rhs, &buffer);
// ret = x & y ^ x
bitwise_xor(ret, ret);
bitwise_xor(&buffer, &buffer);
// ret = x & y ^ x ^ y
ret->bitwise_xor(rhs, ret);
buffer.bitwise_xor(rhs, ret);
}
template <typename T>
void BooleanTensor<T>::bitwise_not(BooleanTensor *ret) const {
template<typename T>
void BooleanTensor<T>::bitwise_not(BooleanTensor* ret) const {
if (party() == 0) {
share(0)->bitwise_not(ret->share(0));
share(1)->copy(ret->share(1));
......@@ -192,26 +200,27 @@ void BooleanTensor<T>::bitwise_not(BooleanTensor *ret) const {
}
}
template <typename T>
void BooleanTensor<T>::lshift(size_t rhs, BooleanTensor *ret) const {
template<typename T>
void BooleanTensor<T>::lshift(size_t rhs, BooleanTensor* ret) const {
share(0)->lshift(rhs, ret->share(0));
share(1)->lshift(rhs, ret->share(1));
}
template <typename T>
void BooleanTensor<T>::rshift(size_t rhs, BooleanTensor *ret) const {
template<typename T>
void BooleanTensor<T>::rshift(size_t rhs, BooleanTensor* ret) const {
share(0)->rshift(rhs, ret->share(0));
share(1)->rshift(rhs, ret->share(1));
}
template <typename T>
void BooleanTensor<T>::logical_rshift(size_t rhs, BooleanTensor *ret) const {
template<typename T>
void BooleanTensor<T>::logical_rshift(size_t rhs, BooleanTensor* ret) const {
share(0)->logical_rshift(rhs, ret->share(0));
share(1)->logical_rshift(rhs, ret->share(1));
}
template <typename T>
void BooleanTensor<T>::ppa(const BooleanTensor *rhs, BooleanTensor *ret,
template<typename T>
void BooleanTensor<T>::ppa(const BooleanTensor* rhs,
BooleanTensor* ret,
size_t n_bits) const {
// kogge stone adder from tfe
// https://github.com/tf-encrypted
......@@ -219,11 +228,11 @@ void BooleanTensor<T>::ppa(const BooleanTensor *rhs, BooleanTensor *ret,
const size_t k = std::ceil(std::log2(n_bits));
std::vector<T> keep_masks(k);
for (size_t i = 0; i < k; ++i) {
keep_masks[i] = (T(1) << (T)std::exp2(i)) - 1;
keep_masks[i] = (T(1) << (T) std::exp2(i)) - 1;
}
std::shared_ptr<TensorAdapter<T>> tmp[11];
for (auto &ti : tmp) {
for (auto& ti: tmp) {
ti = tensor_factory()->template create<T>(ret->shape());
}
BooleanTensor<T> g(tmp[0].get(), tmp[1].get());
......@@ -245,6 +254,7 @@ void BooleanTensor<T>::ppa(const BooleanTensor *rhs, BooleanTensor *ret,
g.lshift(std::exp2(i), &g1);
p.lshift(std::exp2(i), &p1);
p1.bitwise_xor(k_mask, &p1);
g1.bitwise_and(&p, &c);
......@@ -257,12 +267,15 @@ void BooleanTensor<T>::ppa(const BooleanTensor *rhs, BooleanTensor *ret,
c.bitwise_xor(&p, ret);
}
template <typename T, size_t N>
void a2b(AbstractContext *aby3_ctx, TensorAdapterFactory *tensor_factory,
const FixedPointTensor<T, N> *a, BooleanTensor<T> *b, size_t n_bits) {
template<typename T, size_t N>
void a2b(AbstractContext* aby3_ctx,
TensorAdapterFactory* tensor_factory,
const FixedPointTensor<T, N>* a,
BooleanTensor<T>* b,
size_t n_bits) {
std::shared_ptr<TensorAdapter<T>> tmp[4];
for (auto &ti : tmp) {
for (auto& ti: tmp) {
ti = tensor_factory->template create<T>(a->shape());
// set 0
std::transform(ti->data(), ti->data() + ti->numel(), ti->data(),
......@@ -305,42 +318,41 @@ void a2b(AbstractContext *aby3_ctx, TensorAdapterFactory *tensor_factory,
lhs->ppa(rhs.get(), b, n_bits);
}
template <typename T>
template <size_t N>
BooleanTensor<T> &BooleanTensor<T>::
operator=(const FixedPointTensor<T, N> *other) {
template<typename T>
template<size_t N>
BooleanTensor<T>& BooleanTensor<T>::operator=(const FixedPointTensor<T, N>* other) {
a2b(aby3_ctx().get(), tensor_factory().get(), other, this, sizeof(T) * 8);
return *this;
}
template <typename T>
void tensor_rshift_transform(const TensorAdapter<T> *lhs, size_t rhs,
TensorAdapter<T> *ret) {
const T *begin = lhs->data();
void tensor_rshift_transform(const TensorAdapter<T>* lhs,
size_t rhs, TensorAdapter<T>* ret) {
const T* begin = lhs->data();
std::transform(begin, begin + lhs->numel(), ret->data(),
[rhs](T in) { return (in >> rhs) & 1; });
};
template <typename T>
template <size_t N>
void BooleanTensor<T>::bit_extract(size_t i, const FixedPointTensor<T, N> *in) {
template<typename T>
template<size_t N>
void BooleanTensor<T>::bit_extract(size_t i, const FixedPointTensor<T, N>* in) {
a2b(aby3_ctx().get(), tensor_factory().get(), in, this, i + 1);
tensor_rshift_transform(share(0), i, share(0));
tensor_rshift_transform(share(1), i, share(1));
}
template <typename T>
void BooleanTensor<T>::bit_extract(size_t i, BooleanTensor *ret) const {
template<typename T>
void BooleanTensor<T>::bit_extract(size_t i, BooleanTensor* ret) const {
tensor_rshift_transform(share(0), i, ret->share(0));
tensor_rshift_transform(share(1), i, ret->share(1));
}
template <typename T>
template <size_t N>
void BooleanTensor<T>::b2a(FixedPointTensor<T, N> *ret) const {
template<typename T>
template<size_t N>
void BooleanTensor<T>::b2a(FixedPointTensor<T, N>* ret) const {
std::shared_ptr<TensorAdapter<T>> tmp[2];
for (auto &ti : tmp) {
for (auto& ti: tmp) {
ti = tensor_factory()->template create<T>(shape());
// set 0
std::transform(ti->data(), ti->data() + ti->numel(), ti->data(),
......@@ -364,7 +376,7 @@ void BooleanTensor<T>::b2a(FixedPointTensor<T, N> *ret) const {
bt.ppa(this, &bt, sizeof(T) * 8);
TensorAdapter<T> *dest = nullptr;
TensorAdapter<T>* dest = nullptr;
if (party() == 0) {
dest = ret->mutable_share(0);
}
......@@ -381,10 +393,10 @@ void BooleanTensor<T>::b2a(FixedPointTensor<T, N> *ret) const {
}
}
template <typename T>
template <size_t N>
void BooleanTensor<T>::mul(const TensorAdapter<T> *rhs,
FixedPointTensor<T, N> *ret,
template<typename T>
template<size_t N>
void BooleanTensor<T>::mul(const TensorAdapter<T>* rhs,
FixedPointTensor<T, N>* ret,
size_t rhs_party) const {
// ot sender
size_t idx0 = rhs_party;
......@@ -396,19 +408,21 @@ void BooleanTensor<T>::mul(const TensorAdapter<T> *rhs,
auto tmp0 = tensor_factory()->template create<T>(ret->shape());
auto tmp1 = tensor_factory()->template create<T>(ret->shape());
TensorAdapter<T> *tmp[2] = {tmp0.get(), tmp1.get()};
TensorAdapter<T>* tmp[2] = {tmp0.get(), tmp1.get()};
TensorAdapter<T> *null_arg[2] = {nullptr, nullptr};
TensorAdapter<T>* null_arg[2] = {nullptr, nullptr};
if (party() == idx0) {
// use ret as buffer
TensorAdapter<T> *m[2] = {ret->mutable_share(0), ret->mutable_share(1)};
TensorAdapter<T>* m[2] = {ret->mutable_share(0), ret->mutable_share(1)};
aby3_ctx()->template gen_zero_sharing_arithmetic(*tmp[0]);
// m0 = a * (b0 ^ b1) + s0
// m1 = a * (1 ^ b0 ^ b1) + s0
share(0)->bitwise_xor(share(1), m[0]);
std::transform(m[0]->data(), m[0]->data() + m[0]->numel(), m[0]->data(),
[](T in) { return 1 & in; });
std::transform(m[0]->data(), m[0]->data() + m[0]->numel(), m[1]->data(),
[](T in) { return 1 ^ in; });
......@@ -419,8 +433,8 @@ void BooleanTensor<T>::mul(const TensorAdapter<T> *rhs,
m[1]->add(tmp[0], m[1]);
aby3_ctx()->template ot(idx0, idx1, idx2, null_arg[0],
const_cast<const aby3::TensorAdapter<T> **>(m), tmp,
null_arg[0]);
const_cast<const aby3::TensorAdapter<T>**>(m),
tmp, null_arg[0]);
// ret0 = s2
// ret1 = s1
......@@ -431,20 +445,18 @@ void BooleanTensor<T>::mul(const TensorAdapter<T> *rhs,
// ret0 = s1
aby3_ctx()->template gen_zero_sharing_arithmetic(*(ret->mutable_share(0)));
// ret1 = a * b + s0
aby3_ctx()->template ot(
idx0, idx1, idx2, share(1),
const_cast<const aby3::TensorAdapter<T> **>(null_arg), tmp,
ret->mutable_share(1));
aby3_ctx()->template ot(idx0, idx1, idx2, share(1),
const_cast<const aby3::TensorAdapter<T>**>(null_arg),
tmp, ret->mutable_share(1));
aby3_ctx()->network()->template send(idx0, *(ret->share(0)));
aby3_ctx()->network()->template send(idx2, *(ret->share(1)));
} else if (party() == idx2) {
// ret0 = a * b + s0
aby3_ctx()->template gen_zero_sharing_arithmetic(*(ret->mutable_share(1)));
// ret1 = s2
aby3_ctx()->template ot(
idx0, idx1, idx2, share(0),
const_cast<const aby3::TensorAdapter<T> **>(null_arg), tmp,
null_arg[0]);
aby3_ctx()->template ot(idx0, idx1, idx2, share(0),
const_cast<const aby3::TensorAdapter<T>**>(null_arg),
tmp, null_arg[0]);
aby3_ctx()->network()->template send(idx0, *(ret->share(1)));
......@@ -452,31 +464,68 @@ void BooleanTensor<T>::mul(const TensorAdapter<T> *rhs,
}
}
template <typename T>
template <size_t N>
void BooleanTensor<T>::mul(const FixedPointTensor<T, N> *rhs,
FixedPointTensor<T, N> *ret) const {
auto tmp0 = tensor_factory()->template create<T>(ret->shape());
auto tmp1 = tensor_factory()->template create<T>(ret->shape());
auto tmp2 = tensor_factory()->template create<T>(ret->shape());
template<typename T>
template<size_t N>
void BooleanTensor<T>::mul(const FixedPointTensor<T, N>* rhs,
FixedPointTensor<T, N>* ret) const {
std::vector<std::shared_ptr<TensorAdapter<T>>> tmp;
for (int i = 0; i < 4; ++i) {
tmp.emplace_back(
tensor_factory()->template create<T>(ret->shape()));
}
FixedPointTensor<T, N> tmp(tmp0.get(), tmp1.get());
FixedPointTensor<T, N> tmp0(tmp[0].get(), tmp[1].get());
FixedPointTensor<T, N> tmp1(tmp[2].get(), tmp[3].get());
if (party() == 0) {
mul(nullptr, ret, 1);
mul(rhs->share(0), &tmp, 0);
ret->add(&tmp, ret);
mul(nullptr, &tmp0, 1);
mul(rhs->share(0), &tmp1, 0);
} else if (party() == 1) {
rhs->share(0)->add(rhs->share(1), tmp2.get());
mul(tmp2.get(), ret, 1);
mul(nullptr, &tmp, 0);
ret->add(&tmp, ret);
rhs->share(0)->add(rhs->share(1), tmp[2].get());
mul(tmp[2].get(), &tmp0, 1);
mul(nullptr, &tmp1, 0);
} else { // party() == 2
mul(nullptr, ret, 1);
mul(nullptr, &tmp, 0);
ret->add(&tmp, ret);
mul(nullptr, &tmp0, 1);
mul(nullptr, &tmp1, 0);
}
tmp0.add(&tmp1, ret);
}
template<typename T>
void BooleanTensor<T>::onehot_from_cmp() {
// cmp is done slice by slice
// suppose that shape = [k, m, n, ...]
// shape of all slices and tmp tensors = [1, m, n]
auto shape_ = shape();
size_t len = shape_[0];
shape_[0] = 1;
std::vector<std::shared_ptr<TensorAdapter<T>>> tmp;
for (int i = 0; i < 4; ++i) {
tmp.emplace_back(
tensor_factory()->template create<T>(shape_));
}
tmp.emplace_back(tensor_factory()->template create<T>());
tmp.emplace_back(tensor_factory()->template create<T>());
BooleanTensor found(tmp[0].get(), tmp[1].get());
assign_to_tensor(tmp[0].get(), T(0));
assign_to_tensor(tmp[1].get(), T(0));
BooleanTensor not_found(tmp[2].get(), tmp[3].get());
// res[i] = !found & input[i]
// found = found 1 res[i]
// to find last 1, we search backward
for (size_t i = len; i > 0; --i) {
share(0)->slice(i - 1, i, tmp[4].get());
share(1)->slice(i - 1, i, tmp[5].get());
BooleanTensor cmp_i(tmp[4].get(), tmp[5].get());
found.bitwise_not(&not_found);
not_found.bitwise_and(&cmp_i, &cmp_i);
cmp_i.bitwise_or(&found, &found);
}
}
} // namespace aby3
......@@ -1215,9 +1215,9 @@ TEST_F(BooleanTensorTest, abmul_test) {
gen1(), gen1(), gen1()};
// lhs = 1
sl[0]->data()[0] = 1;
sl[1]->data()[0] = 0;
sl[2]->data()[0] = 0;
sl[0]->data()[0] = -1;
sl[1]->data()[0] = -3;
sl[2]->data()[0] = 3;
BTensor b0(sl[0].get(), sl[1].get());
BTensor b1(sl[1].get(), sl[2].get());
......@@ -1274,9 +1274,9 @@ TEST_F(BooleanTensorTest, abmul2_test) {
gen1(), gen1(), gen1()};
// lhs = 1
sl[0]->data()[0] = 1;
sl[1]->data()[0] = 0;
sl[2]->data()[0] = 0;
sl[0]->data()[0] = -3;
sl[1]->data()[0] = -1;
sl[2]->data()[0] = 3;
// rhs = 12 = 3 + 4 + 5
sr[0]->data()[0] = 3;
......@@ -1331,4 +1331,197 @@ TEST_F(BooleanTensorTest, abmul2_test) {
}
EXPECT_EQ(1 * 12, p->data()[0]);
}
TEST_F(BooleanTensorTest, abmul3_test) {
std::shared_ptr<TensorAdapter<int64_t>> sl[3] = { gen1(), gen1(), gen1() };
std::shared_ptr<TensorAdapter<int64_t>> sr[3] = { gen1(), gen1(), gen1() };
std::shared_ptr<TensorAdapter<int64_t>> sout[6] = { gen1(), gen1(), gen1(),
gen1(), gen1(), gen1()};
// lhs = 0
sl[0]->data()[0] = 373964488827046757;
sl[1]->data()[0] = -2697357730885869060;
sl[2]->data()[0] = -2332413979122373991;
// rhs = -1
sr[0]->data()[0] = 8388121746490115866;
sr[1]->data()[0] = 5851959018403668595;
sr[2]->data()[0] = 4206663308815767154;
BTensor bl0(sl[0].get(), sl[1].get());
BTensor bl1(sl[1].get(), sl[2].get());
BTensor bl2(sl[2].get(), sl[0].get());
FTensor fr0(sr[0].get(), sr[1].get());
FTensor fr1(sr[1].get(), sr[2].get());
FTensor fr2(sr[2].get(), sr[0].get());
FTensor fout0(sout[0].get(), sout[1].get());
FTensor fout1(sout[2].get(), sout[3].get());
FTensor fout2(sout[4].get(), sout[5].get());
auto p = gen1();
_t[0] = std::thread(
[&] () {
ContextHolder::template run_with_context(
_exec_ctx.get(), _mpc_ctx[0], [&](){
bl0.mul(&fr0, &fout0);
fout0.reveal_to_one(0, p.get());
});
}
);
_t[1] = std::thread(
[&] () {
ContextHolder::template run_with_context(
_exec_ctx.get(), _mpc_ctx[1], [&](){
bl1.mul(&fr1, &fout1);
fout1.reveal_to_one(0, nullptr);
});
}
);
_t[2] = std::thread(
[&] () {
ContextHolder::template run_with_context(
_exec_ctx.get(), _mpc_ctx[2], [&](){
bl2.mul(&fr2, &fout2);
fout2.reveal_to_one(0, nullptr);
});
}
);
for (auto &t: _t) {
t.join();
}
EXPECT_EQ(0, p->data()[0]);
}
TEST_F(BooleanTensorTest, abmul4_test) {
std::shared_ptr<TensorAdapter<int64_t>> sl[3] = { gen1(), gen1(), gen1() };
std::shared_ptr<TensorAdapter<int64_t>> sr[3] = { gen1(), gen1(), gen1() };
std::shared_ptr<TensorAdapter<int64_t>> sout[6] = { gen1(), gen1(), gen1(),
gen1(), gen1(), gen1()};
// lhs = 1
sl[0]->data()[0] = 373964488827046757;
sl[1]->data()[0] = -2697357730885869060;
sl[2]->data()[0] = -2332413979122373992;
// rhs = -1
sr[0]->data()[0] = 8388121746490115866;
sr[1]->data()[0] = 5851959018403668595;
sr[2]->data()[0] = 4206663308815767154;
BTensor bl0(sl[0].get(), sl[1].get());
BTensor bl1(sl[1].get(), sl[2].get());
BTensor bl2(sl[2].get(), sl[0].get());
FTensor fr0(sr[0].get(), sr[1].get());
FTensor fr1(sr[1].get(), sr[2].get());
FTensor fr2(sr[2].get(), sr[0].get());
FTensor fout0(sout[0].get(), sout[1].get());
FTensor fout1(sout[2].get(), sout[3].get());
FTensor fout2(sout[4].get(), sout[5].get());
auto p = gen1();
_t[0] = std::thread(
[&] () {
ContextHolder::template run_with_context(
_exec_ctx.get(), _mpc_ctx[0], [&](){
bl0.mul(&fr0, &fout0);
fout0.reveal_to_one(0, p.get());
});
}
);
_t[1] = std::thread(
[&] () {
ContextHolder::template run_with_context(
_exec_ctx.get(), _mpc_ctx[1], [&](){
bl1.mul(&fr1, &fout1);
fout1.reveal_to_one(0, nullptr);
});
}
);
_t[2] = std::thread(
[&] () {
ContextHolder::template run_with_context(
_exec_ctx.get(), _mpc_ctx[2], [&](){
bl2.mul(&fr2, &fout2);
fout2.reveal_to_one(0, nullptr);
});
}
);
for (auto &t: _t) {
t.join();
}
EXPECT_EQ(-1, p->data()[0]);
}
TEST_F(BooleanTensorTest, onehot_from_cmp_test) {
std::vector<size_t> shape = {4, 1};
std::shared_ptr<TensorAdapter<int64_t>> sout[6] =
{ gen(shape), gen(shape), gen(shape), gen(shape), gen(shape), gen(shape)};
for (auto& ptr: sout) {
assign_to_tensor(ptr.get(), 0l);
}
sout[0].get()->data()[0] = 1;
sout[0].get()->data()[2] = 1;
sout[5].get()->data()[0] = 1;
sout[5].get()->data()[2] = 1;
// input plaintext [1010]
BTensor bout0(sout[0].get(), sout[1].get());
BTensor bout1(sout[2].get(), sout[3].get());
BTensor bout2(sout[4].get(), sout[5].get());
auto p = gen(shape);
_t[0] = std::thread(
[&] () {
ContextHolder::template run_with_context(
_exec_ctx.get(), _mpc_ctx[0], [&](){
bout0.onehot_from_cmp();
bout0.reveal_to_one(0, p.get());
});
}
);
_t[1] = std::thread(
[&] () {
ContextHolder::template run_with_context(
_exec_ctx.get(), _mpc_ctx[1], [&](){
bout1.onehot_from_cmp();
bout1.reveal_to_one(0, nullptr);
});
}
);
_t[2] = std::thread(
[&] () {
ContextHolder::template run_with_context(
_exec_ctx.get(), _mpc_ctx[2], [&](){
bout2.onehot_from_cmp();
bout2.reveal_to_one(0, nullptr);
});
}
);
for (auto &t: _t) {
t.join();
}
EXPECT_EQ(0, p->data()[0]);
EXPECT_EQ(0, p->data()[1]);
EXPECT_EQ(1, p->data()[2]);
EXPECT_EQ(0, p->data()[3]);
}
} // namespace aby3
......@@ -20,123 +20,178 @@
#include "aby3_context.h"
#include "core/paddlefl_mpc/mpc_protocol/context_holder.h"
#include "paddle_tensor.h"
#include "boolean_tensor.h"
#include "core/paddlefl_mpc/mpc_protocol/context_holder.h"
namespace aby3 {
template <typename T, size_t N> class FixedPointTensor {
template<typename T, size_t N>
class FixedPointTensor {
public:
explicit FixedPointTensor(TensorAdapter<T> *share_tensor[2]);
explicit FixedPointTensor(TensorAdapter<T>* share_tensor[2]);
explicit FixedPointTensor(TensorAdapter<T> *share_tensor_0,
TensorAdapter<T> *share_tensor_1);
explicit FixedPointTensor(TensorAdapter<T>* share_tensor_0,
TensorAdapter<T>* share_tensor_1);
~FixedPointTensor(){};
~FixedPointTensor() {};
// get mutable shape of tensor
TensorAdapter<T> *mutable_share(size_t idx);
//get mutable shape of tensor
TensorAdapter<T>* mutable_share(size_t idx);
const TensorAdapter<T> *share(size_t idx) const;
const TensorAdapter<T>* share(size_t idx) const;
size_t numel() const { return _share[0]->numel(); }
size_t numel() const {
return _share[0]->numel();
}
// reveal fixedpointtensor to one party
void reveal_to_one(size_t party, TensorAdapter<T> *ret) const;
void reveal_to_one(size_t party, TensorAdapter<T>* ret) const;
// reveal fixedpointtensor to all parties
void reveal(TensorAdapter<T> *ret) const;
void reveal(TensorAdapter<T>* ret) const;
const std::vector<size_t> shape() const;
// convert TensorAdapter to shares
static void share(const TensorAdapter<T> *input,
TensorAdapter<T> *output_shares[3],
//convert TensorAdapter to shares
static void share(const TensorAdapter<T>* input,
TensorAdapter<T>* output_shares[3],
block seed = g_zero_block);
// element-wise add with FixedPointTensor
void add(const FixedPointTensor *rhs, FixedPointTensor *ret) const;
void add(const FixedPointTensor* rhs, FixedPointTensor* ret) const;
// element-wise add with TensorAdapter
void add(const TensorAdapter<T> *rhs, FixedPointTensor *ret) const;
void add(const TensorAdapter<T>* rhs, FixedPointTensor* ret) const;
// element-wise sub with FixedPointTensor
void sub(const FixedPointTensor *rhs, FixedPointTensor *ret) const;
void sub(const FixedPointTensor* rhs, FixedPointTensor* ret) const;
// element-wise sub with TensorAdapter
void sub(const TensorAdapter<T> *rhs, FixedPointTensor *ret) const;
void sub(const TensorAdapter<T>* rhs, FixedPointTensor* ret) const;
// negative
void negative(FixedPointTensor *ret) const;
void negative(FixedPointTensor* ret) const;
// element-wise mul with FixedPointTensor using truncate1
void mul(const FixedPointTensor *rhs, FixedPointTensor *ret) const;
void mul(const FixedPointTensor* rhs, FixedPointTensor* ret) const;
// element-wise mul with TensorAdapter
void mul(const TensorAdapter<T> *rhs, FixedPointTensor *ret) const;
void mul(const TensorAdapter<T>* rhs, FixedPointTensor* ret) const;
// div by TensorAdapter
void div(const TensorAdapter<T> *rhs, FixedPointTensor *ret) const;
void div(const TensorAdapter<T>* rhs, FixedPointTensor* ret) const;
// div by FixedPointedTensor
// TODO@yqy : not surport operator rhs <= 0 now
void div(const FixedPointTensor* rhs, FixedPointTensor* ret,
size_t iter = 16, double x0 = pow(2, -15)) const;
// long div by boolean circuit
// res_int_len: estimated bit len of the integer part of result
void long_div(const FixedPointTensor* rhs,
FixedPointTensor* ret, size_t res_int_len = 20) const;
// element-wise mul, use trunc2
void mul2(const FixedPointTensor *rhs, FixedPointTensor *ret) const;
void inverse_square_root(FixedPointTensor* ret,
size_t iter = 16, double x0 = 0x1p-10) const;
// dot_mul
template <template <typename U, size_t...> class CTensor, size_t... N1>
void dot_mul(const CTensor<T, N1...> *rhs, FixedPointTensor *ret) const;
template<template<typename U, size_t...> class CTensor,
size_t... N1>
void dot_mul(const CTensor<T, N1...>* rhs, FixedPointTensor* ret) const;
// sum all element
void sum(FixedPointTensor *ret) const;
//sum all element
void sum(FixedPointTensor* ret) const;
// mat_mul with FixedPointTensor
void mat_mul(const FixedPointTensor *rhs, FixedPointTensor *ret) const;
void mat_mul(const FixedPointTensor* rhs, FixedPointTensor* ret) const;
// mat_mul with TensorAdapter
void mat_mul(const TensorAdapter<T> *rhs, FixedPointTensor *ret) const;
void mat_mul(const TensorAdapter<T>* rhs, FixedPointTensor* ret) const;
void exp(FixedPointTensor *ret, size_t iter = 8) const;
// exp approximate: exp(x) = \lim_{n->inf} (1+x/n)^n
// where n = 2^ite
void exp(FixedPointTensor* ret, size_t iter = 8) const;
// element-wise relu
void relu(FixedPointTensor *ret) const;
void relu(FixedPointTensor* ret) const;
// element-wise sigmoid
void sigmoid(FixedPointTensor *ret) const;
// element-wise relu with relu'
void relu_with_derivative(FixedPointTensor* ret, BooleanTensor<T>* derivative) const;
// element-wise sigmoid using 3 piecewise polynomials
void sigmoid(FixedPointTensor* ret) const;
// element-wise sigmoid using 5 pieces polynomial
// see paper [Privacy-preserving collaborative machine learning
// on genomic data using TensorFlow]
void sigmoid_enhanced(FixedPointTensor* ret) const;
// element-wise sigmoid using Chebyshev polynomial approximation
// implemented with ref to tfe[https://github.com/tf-encrypted/tf-encrypted]
void sigmoid_chebyshev(FixedPointTensor* ret) const;
// softmax axis = -1
void softmax(FixedPointTensor *ret) const;
void softmax(FixedPointTensor* ret,
bool use_relu = false,
bool use_long_div = true) const;
// element-wise polynomial
void polynomial(const TensorAdapter<T> *coeff, FixedPointTensor *ret) const;
void polynomial(const TensorAdapter<T>* coeff,
FixedPointTensor* ret) const;
// element-wise piecewise polynomial
void polynomial_piecewise(const TensorAdapter<T> *coeff,
const TensorAdapter<T> *break_point,
FixedPointTensor *ret) const;
void polynomial_piecewise(
const TensorAdapter<T>* coeff,
const TensorAdapter<T>* break_point,
FixedPointTensor* ret) const;
// element-wise compare
// <
template <template <typename U, size_t...> class CTensor, size_t... N1>
void lt(const CTensor<T, N1...> *rhs, BooleanTensor<T> *ret) const;
template<template<typename U, size_t...> class CTensor,
size_t... N1>
void lt(const CTensor<T, N1...>* rhs, BooleanTensor<T>* ret) const;
// <=
template <template <typename U, size_t...> class CTensor, size_t... N1>
void leq(const CTensor<T, N1...> *rhs, BooleanTensor<T> *ret) const;
template<template<typename U, size_t...> class CTensor,
size_t... N1>
void leq(const CTensor<T, N1...>* rhs, BooleanTensor<T>* ret) const;
// >
template <template <typename U, size_t...> class CTensor, size_t... N1>
void gt(const CTensor<T, N1...> *rhs, BooleanTensor<T> *ret) const;
template<template<typename U, size_t...> class CTensor,
size_t... N1>
void gt(const CTensor<T, N1...>* rhs, BooleanTensor<T>* ret) const;
// >=
template <template <typename U, size_t...> class CTensor, size_t... N1>
void geq(const CTensor<T, N1...> *rhs, BooleanTensor<T> *ret) const;
template<template<typename U, size_t...> class CTensor,
size_t... N1>
void geq(const CTensor<T, N1...>* rhs, BooleanTensor<T>* ret) const;
// ==
template <template <typename U, size_t...> class CTensor, size_t... N1>
void eq(const CTensor<T, N1...> *rhs, BooleanTensor<T> *ret) const;
template<template<typename U, size_t...> class CTensor,
size_t... N1>
void eq(const CTensor<T, N1...>* rhs, BooleanTensor<T>* ret) const;
// !=
template <template <typename U, size_t...> class CTensor, size_t... N1>
void neq(const CTensor<T, N1...> *rhs, BooleanTensor<T> *ret) const;
template<template<typename U, size_t...> class CTensor,
size_t... N1>
void neq(const CTensor<T, N1...>* rhs, BooleanTensor<T>* ret) const;
// element-wise max
// if not null, cmp stores true if rhs is bigger
template<template<typename U, size_t...> class CTensor,
size_t... N1>
void max(const CTensor<T, N1...>* rhs,
FixedPointTensor* ret,
BooleanTensor<T>* cmp = nullptr) const;
// for tensor with shape like [k, n, m, ...]
// ret shape is [1, n, m, ...], in which every element is largest of k elements
// pos shape is [k, n, m, ...], each col of pos is an one-hot tensor
// which indicating the max element's position
void max_pooling(FixedPointTensor* ret,
BooleanTensor<T>* pos = nullptr) const;
private:
static inline std::shared_ptr<AbstractContext> aby3_ctx() {
......@@ -147,21 +202,38 @@ private:
return paddle::mpc::ContextHolder::tensor_factory();
}
static void truncate1(FixedPointTensor *op, FixedPointTensor *ret,
static void truncate(const FixedPointTensor* op, FixedPointTensor* ret,
size_t scaling_factor);
template<typename MulFunc>
static void mul_trunc(const FixedPointTensor<T, N>* lhs,
const FixedPointTensor<T, N>* rhs,
FixedPointTensor<T, N>* ret,
MulFunc mul_func);
// truncate3 protocol can avoid losing msb error when truncate
// with acceptable security compromise
static void truncate3(const FixedPointTensor* op, FixedPointTensor* ret,
size_t scaling_factor);
// reduce last dim
static void reduce(FixedPointTensor<T, N> *input,
FixedPointTensor<T, N> *ret);
static void reduce(FixedPointTensor<T, N>* input,
FixedPointTensor<T, N>* ret);
static size_t party() { return aby3_ctx()->party(); }
static size_t party() {
return aby3_ctx()->party();
}
static size_t pre_party() { return aby3_ctx()->pre_party(); }
static size_t pre_party() {
return aby3_ctx()->pre_party();
}
static size_t next_party() { return aby3_ctx()->next_party(); }
static size_t next_party() {
return aby3_ctx()->next_party();
}
static void reshare(const TensorAdapter<T> *send_val,
TensorAdapter<T> *recv_val) {
static void reshare(const TensorAdapter<T>* send_val,
TensorAdapter<T>* recv_val) {
if (party() == 0) {
aby3_ctx()->network()->template recv(next_party(), *recv_val);
aby3_ctx()->network()->template send(pre_party(), *send_val);
......@@ -171,9 +243,17 @@ private:
}
}
TensorAdapter<T> *_share[2];
static void reciprocal(const FixedPointTensor* op, FixedPointTensor* ret,
size_t iter, double x0);
static void inverse_square_root(const FixedPointTensor* op,
FixedPointTensor* ret,
size_t iter, double x0);
TensorAdapter<T>* _share[2];
};
} // namespace aby3
} //namespace aby3
#include "fixedpoint_tensor_imp.h"
......@@ -14,45 +14,45 @@
#pragma once
#include <algorithm>
#include <memory>
#include <algorithm>
#include "paddle/fluid/platform/enforce.h"
#include "prng.h"
namespace aby3 {
template <typename T, size_t N>
FixedPointTensor<T, N>::FixedPointTensor(TensorAdapter<T> *share_tensor[2]) {
template<typename T, size_t N>
FixedPointTensor<T, N>::FixedPointTensor(TensorAdapter<T>* share_tensor[2]) {
// TODO: check tensors' shapes
_share[0] = share_tensor[0];
_share[1] = share_tensor[1];
}
template <typename T, size_t N>
FixedPointTensor<T, N>::FixedPointTensor(TensorAdapter<T> *share_tensor_0,
TensorAdapter<T> *share_tensor_1) {
template<typename T, size_t N>
FixedPointTensor<T, N>::FixedPointTensor(TensorAdapter<T>* share_tensor_0,
TensorAdapter<T>* share_tensor_1) {
// TODO: check tensors' shapes
_share[0] = share_tensor_0;
_share[1] = share_tensor_1;
}
template <typename T, size_t N>
TensorAdapter<T> *FixedPointTensor<T, N>::mutable_share(size_t idx) {
template<typename T, size_t N>
TensorAdapter<T>* FixedPointTensor<T, N>::mutable_share(size_t idx) {
PADDLE_ENFORCE_LT(idx, 2, "Input should be less than 2.");
return _share[idx];
}
template <typename T, size_t N>
const TensorAdapter<T> *FixedPointTensor<T, N>::share(size_t idx) const {
template<typename T, size_t N>
const TensorAdapter<T>* FixedPointTensor<T, N>::share(size_t idx) const {
PADDLE_ENFORCE_LT(idx, 2, "Input should be less than 2.");
return _share[idx];
}
// reveal fixedpointtensor to one party
template <typename T, size_t N>
template<typename T, size_t N>
void FixedPointTensor<T, N>::reveal_to_one(size_t party,
TensorAdapter<T> *ret) const {
TensorAdapter<T>* ret) const {
if (party == this->party()) {
// TODO: check if tensor shape equal
......@@ -71,28 +71,28 @@ void FixedPointTensor<T, N>::reveal_to_one(size_t party,
}
// reveal fixedpointtensor to all parties
template <typename T, size_t N>
void FixedPointTensor<T, N>::reveal(TensorAdapter<T> *ret) const {
template<typename T, size_t N>
void FixedPointTensor<T, N>::reveal(TensorAdapter<T>* ret) const {
for (size_t i = 0; i < 3; ++i) {
reveal_to_one(i, ret);
}
}
template <typename T, size_t N>
template<typename T, size_t N>
const std::vector<size_t> FixedPointTensor<T, N>::shape() const {
return _share[0]->shape();
}
// convert TensorAdapter to shares
template <typename T, size_t N>
void FixedPointTensor<T, N>::share(const TensorAdapter<T> *input,
TensorAdapter<T> *output_shares[3],
//convert TensorAdapter to shares
template<typename T, size_t N>
void FixedPointTensor<T, N>::share(const TensorAdapter<T>* input,
TensorAdapter<T>* output_shares[3],
block seed) {
if (equals(seed, g_zero_block)) {
seed = block_from_dev_urandom();
}
// set seed of prng[2]
//set seed of prng[2]
aby3_ctx()->set_random_seed(seed, 2);
aby3_ctx()->template gen_random_private(*output_shares[0]);
......@@ -106,17 +106,18 @@ void FixedPointTensor<T, N>::share(const TensorAdapter<T> *input,
}
}
template <typename T, size_t N>
void FixedPointTensor<T, N>::add(const FixedPointTensor<T, N> *rhs,
FixedPointTensor<T, N> *ret) const {
template<typename T, size_t N>
void FixedPointTensor<T, N>::add(const FixedPointTensor<T, N>* rhs,
FixedPointTensor<T, N>* ret) const {
_share[0]->add(rhs->_share[0], ret->_share[0]);
_share[1]->add(rhs->_share[1], ret->_share[1]);
}
template <typename T, size_t N>
void FixedPointTensor<T, N>::add(const TensorAdapter<T> *rhs,
FixedPointTensor<T, N> *ret) const {
PADDLE_ENFORCE_EQ(N, rhs->scaling_factor(), "no match scaling factor");
template<typename T, size_t N>
void FixedPointTensor<T, N>::add(const TensorAdapter<T>* rhs,
FixedPointTensor<T, N>* ret) const {
PADDLE_ENFORCE_EQ(N, rhs->scaling_factor(),
"no match scaling factor");
if (party() == 0) {
_share[0]->add(rhs, ret->_share[0]);
_share[1]->copy(ret->_share[1]);
......@@ -129,17 +130,18 @@ void FixedPointTensor<T, N>::add(const TensorAdapter<T> *rhs,
}
}
template <typename T, size_t N>
void FixedPointTensor<T, N>::sub(const FixedPointTensor<T, N> *rhs,
FixedPointTensor<T, N> *ret) const {
template<typename T, size_t N>
void FixedPointTensor<T, N>::sub(const FixedPointTensor<T, N>* rhs,
FixedPointTensor<T, N>* ret) const {
_share[0]->sub(rhs->_share[0], ret->_share[0]);
_share[1]->sub(rhs->_share[1], ret->_share[1]);
}
template <typename T, size_t N>
void FixedPointTensor<T, N>::sub(const TensorAdapter<T> *rhs,
FixedPointTensor<T, N> *ret) const {
PADDLE_ENFORCE_EQ(N, rhs->scaling_factor(), "no match scaling factor");
template<typename T, size_t N>
void FixedPointTensor<T, N>::sub(const TensorAdapter<T>* rhs,
FixedPointTensor<T, N>* ret) const {
PADDLE_ENFORCE_EQ(N, rhs->scaling_factor(),
"no match scaling factor");
if (party() == 0) {
_share[0]->sub(rhs, ret->_share[0]);
_share[1]->copy(ret->_share[1]);
......@@ -152,51 +154,26 @@ void FixedPointTensor<T, N>::sub(const TensorAdapter<T> *rhs,
}
}
template <typename T, size_t N>
void FixedPointTensor<T, N>::negative(FixedPointTensor<T, N> *ret) const {
template<typename T, size_t N>
void FixedPointTensor<T, N>::negative(FixedPointTensor<T, N>* ret) const {
_share[0]->negative(ret->_share[0]);
_share[1]->negative(ret->_share[1]);
}
template <typename T, size_t N>
void FixedPointTensor<T, N>::mul(const FixedPointTensor<T, N> *rhs,
FixedPointTensor<T, N> *ret) const {
auto r_zero = tensor_factory()->template create<T>(this->shape());
aby3_ctx()->gen_zero_sharing_arithmetic(*r_zero.get());
// temp = _share[0] * rhs->_share[0] +
// _share[0] * rhs->_share[1] +
// _share[1] * rhs->_share[0] +
// r_zero
auto temp = tensor_factory()->template create<T>(this->shape());
auto temp1 = tensor_factory()->template create<T>(this->shape());
_share[0]->mul(rhs->_share[0], temp.get());
_share[0]->mul(rhs->_share[1], temp1.get());
temp1->add(temp.get(), temp1.get());
_share[1]->mul(rhs->_share[0], temp.get());
temp1->add(r_zero.get(), temp1.get());
temp->add(temp1.get(), temp.get());
auto temp2 = tensor_factory()->template create<T>(this->shape());
auto temp3 = tensor_factory()->template create<T>(this->shape());
TensorAdapter<int64_t> *temp_array[2] = {temp2.get(), temp3.get()};
std::shared_ptr<FixedPointTensor<T, N>> ret_no_trunc =
std::make_shared<FixedPointTensor<T, N>>(temp_array);
temp->copy(ret_no_trunc->_share[0]);
reshare(temp.get(), ret_no_trunc->_share[1]);
truncate1(ret_no_trunc.get(), ret, N);
template<typename T, size_t N>
void FixedPointTensor<T, N>::mul(const FixedPointTensor<T, N>* rhs,
FixedPointTensor<T, N>* ret) const {
mul_trunc(this, rhs, ret, &TensorAdapter<T>::mul);
}
template <typename T, size_t N>
void FixedPointTensor<T, N>::truncate1(FixedPointTensor<T, N> *op,
FixedPointTensor<T, N> *ret,
template<typename T, size_t N>
void FixedPointTensor<T, N>::truncate(const FixedPointTensor<T, N>* op,
FixedPointTensor<T, N>* ret,
size_t scaling_factor) {
if (scaling_factor == 0) {
op->share(0)->copy(ret->mutable_share(0));
op->share(1)->copy(ret->mutable_share(1));
}
// implement ABY3's truncate1 algorithm
if (party() == 0) {
// party0
......@@ -209,7 +186,10 @@ void FixedPointTensor<T, N>::truncate1(FixedPointTensor<T, N> *op,
aby3_ctx()->template gen_random(*r_12.get(), true);
op->_share[0]->add(op->_share[1], ret->_share[0]);
// trunc from [SecureML, Thm.1]
ret->_share[0]->negative(ret->_share[0]);
ret->_share[0]->rshift(scaling_factor, ret->_share[0]);
ret->_share[0]->negative(ret->_share[0]);
ret->_share[0]->sub(r_12.get(), ret->_share[0]);
aby3_ctx()->network()->template send(0, *(ret->_share[0]));
......@@ -224,68 +204,153 @@ void FixedPointTensor<T, N>::truncate1(FixedPointTensor<T, N> *op,
r_21->copy(ret->_share[0]);
}
return;
}
template <typename T, size_t N>
void FixedPointTensor<T, N>::mul2(const FixedPointTensor<T, N> *rhs,
FixedPointTensor<T, N> *ret) const {
// element-wise mul implemented by ABY3's truncate2 algorithm
// Protocol. `truncate3`
// P2 randomly generates r' \in (-2^62, 2^62), randomly generates r'_0, r_0, r_1 in Z_{2^64},
// P2 compute r'_1 = r' - r'_0, r_2 = r'/2^N - r_0 - r_1, let x2 = r_2
// P2 send r_0, r'_0 to P0, send r_1, r'_1 to P1
// P1 and P0 execute "reveal x - r' to P1"
// P1 compute x1 = (x - r') / 2^N + r_1
// P0 set x0 = r_0
// P0, P1, P2 invoke reshare() with inputs x0, x1, x2 respectively.
template<typename T, size_t N>
void FixedPointTensor<T, N>::truncate3(const FixedPointTensor<T, N>* op,
FixedPointTensor<T, N>* ret,
size_t scaling_factor) {
if (scaling_factor == 0) {
op->share(0)->copy(ret->mutable_share(0));
op->share(1)->copy(ret->mutable_share(1));
return;
}
std::vector<std::shared_ptr<TensorAdapter<T>>> temp;
for (int i = 0; i < 12; ++i) {
temp.emplace_back(tensor_factory()->template create<T>(this->shape()));
}
// gen boolean random share
aby3_ctx()->template gen_random(*temp[0], 0);
aby3_ctx()->template gen_random(*temp[1], 1);
std::shared_ptr<BooleanTensor<T>> r =
std::make_shared<BooleanTensor<T>>(temp[0].get(), temp[1].get());
std::shared_ptr<BooleanTensor<T>> r_integer =
std::make_shared<BooleanTensor<T>>(temp[4].get(), temp[5].get());
r->rshift(N, r_integer.get());
std::shared_ptr<FixedPointTensor<T, N>> r_fixed =
std::make_shared<FixedPointTensor<T, N>>(temp[6].get(), temp[7].get());
std::shared_ptr<FixedPointTensor<T, N>> r_integer_fixed =
std::make_shared<FixedPointTensor<T, N>>(temp[8].get(), temp[9].get());
r->b2a(r_fixed.get());
// r'
r_integer->b2a(r_integer_fixed.get());
// r_zero = gen_zero_share(_shape[0]->shape)
auto r_zero = tensor_factory()->template create<T>(this->shape());
aby3_ctx()->template gen_zero_sharing_arithmetic(*r_zero);
// temp[10] = _share[0] * rhs->_share[0] +
// _share[0] * rhs->_share[1] +
// _share[1] * rhs->_share[0] +
// r_zero - r[0]
_share[0]->mul(rhs->_share[0], temp[11].get());
_share[0]->mul(rhs->_share[1], temp[10].get());
temp[11]->add(temp[10].get(), temp[11].get());
_share[1]->mul(rhs->_share[0], temp[10].get());
temp[11]->add(temp[10].get(), temp[11].get());
r_zero->sub(r_fixed->_share[0], temp[10].get());
temp[10]->add(temp[11].get(), temp[10].get());
// ret = reshare
temp[10]->copy(ret->_share[0]);
reshare(temp[10].get(), ret->_share[1]);
// ret = reconstruct(ret).rshift(N)
// ret = ret + r'
ret->reveal(temp[10].get());
temp[10]->rshift(N, temp[10].get());
r_integer_fixed->add(temp[10].get(), ret);
if (party() == 2) {
for (int i = 0; i < 7; ++i) {
temp.emplace_back(
tensor_factory()->template create<T>(op->shape()));
}
// r', contraint in (-2^62, 2^62)
// notice : when r' is contrainted in (-2^62, 2^62),
// the SD (statistical distance) of x - r' between this
// and r' in Z_{2^64} is equal to |X| / (2^63 + |X|)
// according to http://yuyu.hk/files/ho2.pdf
aby3_ctx()->template gen_random_private(*temp[0]);
int64_t contraint_upper = ~((uint64_t) 1 << 62);
int64_t contraint_low = (uint64_t) 1 << 62;
std::for_each(temp[0]->data(), temp[0]->data() + temp[0]->numel(),
[&contraint_upper, &contraint_low] (T& a) {
// contraint -2^62 < a < 2^62
if (a >= 0) {
a &= contraint_upper;
} else {
a |= contraint_low;
}
});
//r'_0, r'_1
aby3_ctx()->template gen_random_private(*temp[1]);
temp[0]->sub(temp[1].get(), temp[2].get());
// r, r_0, r_1
temp[0]->rshift(scaling_factor, temp[3].get());
aby3_ctx()->template gen_random_private(*temp[4]);
aby3_ctx()->template gen_random_private(*temp[5]);
// r_2
temp[3]->sub(temp[4].get(), temp[6].get());
temp[6]->sub(temp[5].get(), temp[6].get());
aby3_ctx()->network()->template send(1, *temp[2]);
aby3_ctx()->network()->template send(1, *temp[5]);
aby3_ctx()->network()->template send(0, *temp[1]);
aby3_ctx()->network()->template send(0, *temp[4]);
temp[6]->copy(ret->mutable_share(0));
} else if (party() == 1) {
for (int i = 0; i < 4; ++i) {
temp.emplace_back(
tensor_factory()->template create<T>(op->shape()));
}
// r'_1, r_1
aby3_ctx()->network()->template recv(2, *temp[0]);
aby3_ctx()->network()->template recv(2, *temp[1]);
// recv x0 - r'_0 from party 0
aby3_ctx()->network()->template recv(0, *temp[2]);
//reveal x - r' to party 1
op->share(0)->add(op->share(1), temp[3].get());
temp[3]->add(temp[2].get(), temp[3].get());
temp[3]->sub(temp[0].get(), temp[3].get());
// truncate x-r'
temp[3]->rshift(scaling_factor, temp[3].get());
temp[3]->add(temp[1].get(), ret->mutable_share(0));
} else {
for (int i = 0; i < 3; ++i) {
temp.emplace_back(
tensor_factory()->template create<T>(op->shape()));
}
// r'_0, r_0
aby3_ctx()->network()->template recv(2, *temp[0]);
aby3_ctx()->network()->template recv(2, *temp[1]);
//send x0 - r'_0 to party 1
op->share(0)->sub(temp[0].get(), temp[2].get());
aby3_ctx()->network()->template send(1, *temp[2]);
temp[1]->copy(ret->mutable_share(0));
}
reshare(ret->share(0), ret->mutable_share(1));
// compensation for carry in
auto tensor_carry_in = tensor_factory()->template create<T>(ret->shape());
assign_to_tensor(tensor_carry_in.get(), (T)1);
tensor_carry_in->scaling_factor() = N;
ret->add(tensor_carry_in.get(), ret);
}
template <typename T, size_t N>
void FixedPointTensor<T, N>::mul(const TensorAdapter<T> *rhs,
FixedPointTensor<T, N> *ret) const {
template<typename T, size_t N>
template<typename MulFunc>
void FixedPointTensor<T, N>::mul_trunc(const FixedPointTensor<T, N>* lhs,
const FixedPointTensor<T, N>* rhs,
FixedPointTensor<T, N>* ret,
MulFunc mul_func) {
auto r_zero = tensor_factory()->template create<T>(ret->shape());
aby3_ctx()->gen_zero_sharing_arithmetic(*r_zero.get());
// temp = _share[0]->mul(rhs->_share[0]) +
// _share[0]->mul(rhs->_share[1]) +
// _share[1]->mul(rhs->_share[0]) +
// r_zero
auto temp = tensor_factory()->template create<T>(ret->shape());
auto temp1 = tensor_factory()->template create<T>(ret->shape());
// use mul_func to fit both element_wise mul and mat mul
(lhs->share(0)->*mul_func)(rhs->share(0), temp.get());
(lhs->share(0)->*mul_func)(rhs->share(1), temp1.get());
temp1->add(temp.get(), temp1.get());
(lhs->share(1)->*mul_func)(rhs->share(0), temp.get());
temp1->add(r_zero.get(), temp1.get());
temp->add(temp1.get(), temp.get());
auto temp2 = tensor_factory()->template create<T>(ret->shape());
auto temp3 = tensor_factory()->template create<T>(ret->shape());
TensorAdapter<int64_t>* temp_array[2] = {temp2.get(), temp3.get()};
std::shared_ptr<FixedPointTensor<T, N>> ret_no_trunc =
std::make_shared<FixedPointTensor<T, N>>(temp_array);
temp->copy(ret_no_trunc->_share[0]);
reshare(temp.get(), ret_no_trunc->_share[1]);
truncate3(ret_no_trunc.get(), ret, N);
}
template<typename T, size_t N>
void FixedPointTensor<T, N>::mul(const TensorAdapter<T>* rhs,
FixedPointTensor<T, N>* ret) const {
// PADDLE_ENFORCE_EQ(N, rhs->scaling_factor(),
// "no match scaling factor");
auto temp0 = tensor_factory()->template create<T>(this->shape());
......@@ -295,16 +360,16 @@ void FixedPointTensor<T, N>::mul(const TensorAdapter<T> *rhs,
_share[0]->mul(rhs, temp->_share[0]);
_share[1]->mul(rhs, temp->_share[1]);
truncate1(temp.get(), ret, rhs->scaling_factor());
truncate3(temp.get(), ret, rhs->scaling_factor());
}
template <typename T, size_t N>
void FixedPointTensor<T, N>::sum(FixedPointTensor<T, N> *ret) const {
template<typename T, size_t N>
void FixedPointTensor<T, N>::sum(FixedPointTensor<T, N>* ret) const {
PADDLE_ENFORCE_EQ(ret->numel(), 1, "output size should be 1.");
T sum1 = (T)0;
T sum2 = (T)0;
T *iter_0 = _share[0]->data();
T *iter_1 = _share[1]->data();
T sum1 = (T) 0;
T sum2 = (T) 0;
T* iter_0 = _share[0]->data();
T* iter_1 = _share[1]->data();
for (int i = 0; i < this->numel(); ++i) {
sum1 += *(iter_0 + i);
sum2 += *(iter_1 + i);
......@@ -313,10 +378,11 @@ void FixedPointTensor<T, N>::sum(FixedPointTensor<T, N> *ret) const {
assign_to_tensor(ret->_share[1], sum2);
}
template <typename T, size_t N>
template <template <typename U, size_t...> class CTensor, size_t... N1>
void FixedPointTensor<T, N>::dot_mul(const CTensor<T, N1...> *rhs,
FixedPointTensor<T, N> *ret) const {
template<typename T, size_t N>
template<template<typename U, size_t...> class CTensor,
size_t... N1>
void FixedPointTensor<T, N>::dot_mul(const CTensor<T, N1...>* rhs,
FixedPointTensor<T, N>* ret) const {
PADDLE_ENFORCE_EQ(ret->numel(), 1, "output size should be 1.");
auto temp0 = tensor_factory()->template create<T>(this->shape());
......@@ -327,76 +393,61 @@ void FixedPointTensor<T, N>::dot_mul(const CTensor<T, N1...> *rhs,
temp->sum(ret);
}
template <typename T, size_t N>
void FixedPointTensor<T, N>::mat_mul(const FixedPointTensor<T, N> *rhs,
FixedPointTensor<T, N> *ret) const {
auto r_zero = tensor_factory()->template create<T>(ret->shape());
aby3_ctx()->gen_zero_sharing_arithmetic(*r_zero.get());
// temp = _share[0]->mat_mul(rhs->_share[0]) +
// _share[0]->mat_mul(rhs->_share[1]) +
// _share[1]->mat_mul(rhs->_share[0]) +
// r_zero
auto temp = tensor_factory()->template create<T>(ret->shape());
auto temp1 = tensor_factory()->template create<T>(ret->shape());
_share[0]->mat_mul(rhs->_share[0], temp.get());
_share[0]->mat_mul(rhs->_share[1], temp1.get());
temp1->add(temp.get(), temp1.get());
_share[1]->mat_mul(rhs->_share[0], temp.get());
temp1->add(r_zero.get(), temp1.get());
temp->add(temp1.get(), temp.get());
auto temp2 = tensor_factory()->template create<T>(ret->shape());
auto temp3 = tensor_factory()->template create<T>(ret->shape());
TensorAdapter<int64_t> *temp_array[2] = {temp2.get(), temp3.get()};
std::shared_ptr<FixedPointTensor<T, N>> ret_no_trunc =
std::make_shared<FixedPointTensor<T, N>>(temp_array);
temp->copy(ret_no_trunc->_share[0]);
reshare(temp.get(), ret_no_trunc->_share[1]);
truncate1(ret_no_trunc.get(), ret, N);
template<typename T, size_t N>
void FixedPointTensor<T, N>::mat_mul(const FixedPointTensor<T, N>* rhs,
FixedPointTensor<T, N>* ret) const {
mul_trunc(this, rhs, ret, &TensorAdapter<T>::mat_mul);
}
template <typename T, size_t N>
void FixedPointTensor<T, N>::mat_mul(const TensorAdapter<T> *rhs,
FixedPointTensor<T, N> *ret) const {
template<typename T, size_t N>
void FixedPointTensor<T, N>::mat_mul(const TensorAdapter<T>* rhs,
FixedPointTensor<T, N>* ret) const {
_share[0]->mat_mul(rhs, ret->_share[0]);
_share[1]->mat_mul(rhs, ret->_share[1]);
truncate1(ret, ret, rhs->scaling_factor());
truncate3(ret, ret, rhs->scaling_factor());
}
template <typename T, size_t N>
void FixedPointTensor<T, N>::div(const TensorAdapter<T> *rhs,
FixedPointTensor<T, N> *ret) const {
PADDLE_ENFORCE_EQ(N, rhs->scaling_factor(), "no match scaling factor");
template< typename T, size_t N>
void FixedPointTensor<T, N>::div(const TensorAdapter<T>* rhs,
FixedPointTensor<T, N>* ret) const {
PADDLE_ENFORCE_EQ(N, rhs->scaling_factor(),
"no match scaling factor");
auto temp = tensor_factory()->template create<T>(this->shape());
double scale = std::pow(2, rhs->scaling_factor());
auto inverse = [scale](T d) -> T { return 1.0 * scale / d * scale; };
std::transform(rhs->data(), rhs->data() + rhs->numel(), temp->data(),
inverse);
auto inverse = [scale](T d) -> T {
return 1.0 * scale / d * scale; };
std::transform(rhs->data(), rhs->data() + rhs->numel(),
temp->data(), inverse);
temp->scaling_factor() = rhs->scaling_factor();
this->mul(temp.get(), ret);
}
template <typename T, size_t N>
void FixedPointTensor<T, N>::exp(FixedPointTensor<T, N> *ret,
template<typename T, size_t N>
void FixedPointTensor<T, N>::div(const FixedPointTensor<T, N>* rhs,
FixedPointTensor<T, N>* ret,
size_t iter, double x0) const {
auto temp0 = tensor_factory()->template create<T>(ret->shape());
auto temp1 = tensor_factory()->template create<T>(ret->shape());
std::shared_ptr<FixedPointTensor<T, N>> temp =
std::make_shared<FixedPointTensor<T, N>>(temp0.get(), temp1.get());
reciprocal(rhs, temp.get(), iter, x0);
this->mul(temp.get(), ret);
}
template<typename T, size_t N>
void FixedPointTensor<T, N>::exp(FixedPointTensor<T, N>* ret,
size_t iter) const {
// exp approximate: exp(x) = \lim_{n->inf} (1+x/n)^n
// where n = 2^ite
auto pow_iter = tensor_factory()->template create<T>(this->shape());
assign_to_tensor(pow_iter.get(), (T)(pow(2, N - iter)));
assign_to_tensor(pow_iter.get(), (T) (pow(2, N -iter)));
pow_iter->scaling_factor() = N;
auto tensor_one = tensor_factory()->template create<T>(this->shape());
assign_to_tensor(tensor_one.get(), (T)1 << N);
assign_to_tensor(tensor_one.get(), (T) 1 << N);
tensor_one->scaling_factor() = N;
this->mul(pow_iter.get(), ret);
......@@ -408,79 +459,130 @@ void FixedPointTensor<T, N>::exp(FixedPointTensor<T, N> *ret,
}
}
template <typename T, size_t N>
void FixedPointTensor<T, N>::relu(FixedPointTensor<T, N> *ret) const {
// utilize polynomial_piecewise
template< typename T, size_t N>
void FixedPointTensor<T, N>::relu(FixedPointTensor<T, N>* ret) const {
//utilize polynomial_piecewise
// break_point = {0}, coeff[0] = {0, 0}, coeff[1] = {0, 1}
// break_point.shape = {1, this->shape}, coeff.shape = {2, 2, this->shape}
auto shape_ = shape();
// construct break_point
//construct break_point
auto b_shape = shape_;
b_shape.insert(b_shape.begin(), 1);
auto break_point = tensor_factory()->template create<T>(b_shape);
T *b_ptr = break_point->data();
T* b_ptr = break_point->data();
for (size_t i = 0; i < break_point->numel(); ++i) {
b_ptr[i] = 0;
}
break_point->scaling_factor() = N;
// contruct coeff
//contruct coeff
std::vector<size_t> c_shape = {2, 2};
c_shape.insert(c_shape.end(), shape_.begin(), shape_.end());
auto coeff = tensor_factory()->template create<T>(c_shape);
T *c_ptr = coeff->data();
T* c_ptr = coeff->data();
for (size_t i = 0; i < 3 * this->numel(); ++i) {
c_ptr[i] = 0;
}
for (size_t i = 3 * this->numel(); i < 4 * this->numel(); ++i) {
c_ptr[i] = (T)1 << N;
c_ptr[i] = (T) 1 << N;
}
coeff->scaling_factor() = N;
this->polynomial_piecewise(coeff.get(), break_point.get(), ret);
}
template <typename T, size_t N>
void FixedPointTensor<T, N>::sigmoid(FixedPointTensor<T, N> *ret) const {
// utilize polynomial_piecewise
template< typename T, size_t N>
void FixedPointTensor<T, N>::relu_with_derivative(
FixedPointTensor<T, N>* ret, BooleanTensor<T>* derivative) const {
auto shape_ = shape();
auto zero = tensor_factory()->template create<T>(shape_);
assign_to_tensor(zero.get(), (T)0);
zero->scaling_factor() = N;
auto tmp0 = tensor_factory()->template create<T>(shape_);
auto tmp1 = tensor_factory()->template create<T>(shape_);
BooleanTensor<T> der(tmp0.get(), tmp1.get());
gt(zero.get(), &der);
der.mul(this, ret);
if (derivative) {
der.share(0)->copy(derivative->share(0));
der.share(1)->copy(derivative->share(1));
}
}
template< typename T, size_t N>
void FixedPointTensor<T, N>::sigmoid_chebyshev(FixedPointTensor<T, N>* ret) const {
//utilize Chebyshev polynomial approximation
// more accurate in small range, such as [-4, 4]
auto shape = ret->shape();
std::vector<size_t> shape_ = shape;
shape_.insert(shape_.begin(), 10);
auto numel = ret->numel();
auto coeff = tensor_factory()->template create<T>(shape_);
std::vector<double> w;
w.resize(10, 0.0f);
w[0] = 0.5;
w[1] = 0.2159198015;
w[3] = -0.0082176259;
w[5] = 0.0001825597;
w[7] = -0.0000018848;
w[9] = 0.0000000072;
for (int i = 0; i < 10; ++i) {
for (int j = 0; j < numel; ++j) {
*(coeff->data() + i * numel + j) = (T) (w[i] * pow(2, N));
}
}
coeff->scaling_factor() = N;
polynomial(coeff.get(), ret);
}
template< typename T, size_t N>
void FixedPointTensor<T, N>::sigmoid(FixedPointTensor<T, N>* ret) const {
//utilize polynomial_piecewise
// break_point = {-2.5, 2.5}
// coeff[0] = {10^-4, 0}, coeff[1] = {0.5, 0.17}
// coeff[2] = {1 - 10^-4, 0}
// break_point.shape = {2, this->shape}, coeff.shape = {3, 2, this->shape}
// construct break_point
//construct break_point
auto shape_ = shape();
// construct break_point
//construct break_point
auto b_shape = shape_;
b_shape.insert(b_shape.begin(), 2);
auto break_point = tensor_factory()->template create<T>(b_shape);
T *b_ptr = break_point->data();
T* b_ptr = break_point->data();
for (size_t i = 0; i < break_point->numel(); ++i) {
b_ptr[i] = 0;
}
for (size_t i = 0; i < break_point->numel() / 2; ++i) {
b_ptr[i] = (T)(-2.5 * pow(2, N));
b_ptr[i] = (T) (-2.5 * pow(2, N));
}
for (size_t i = break_point->numel() / 2; i < break_point->numel(); ++i) {
b_ptr[i] = (T)(2.5 * pow(2, N));
b_ptr[i] = (T) (2.5 * pow(2, N));
}
break_point->scaling_factor() = N;
// contruct coeff
//contruct coeff
std::vector<size_t> c_shape = {3, 2};
c_shape.insert(c_shape.end(), shape_.begin(), shape_.end());
auto coeff = tensor_factory()->template create<T>(c_shape);
T *c_ptr = coeff->data();
T* c_ptr = coeff->data();
size_t numel = this->numel();
double scale = std::pow(2, N);
......@@ -497,98 +599,306 @@ void FixedPointTensor<T, N>::sigmoid(FixedPointTensor<T, N> *ret) const {
this->polynomial_piecewise(coeff.get(), break_point.get(), ret);
}
template <typename T, size_t N>
void FixedPointTensor<T, N>::softmax(FixedPointTensor<T, N> *ret) const {
// relu_x = relu(this)
auto &shape = this->shape();
auto temp0 = tensor_factory()->template create<T>(this->shape());
auto temp1 = tensor_factory()->template create<T>(this->shape());
std::shared_ptr<FixedPointTensor<T, N>> relu_x =
std::make_shared<FixedPointTensor<T, N>>(temp0.get(), temp1.get());
this->relu(relu_x.get());
template< typename T, size_t N>
void FixedPointTensor<T, N>::sigmoid_enhanced(FixedPointTensor<T, N>* ret) const {
//utilize polynomial_piecewise
// break_point = {-5, -2.5, 2.5, 5}
// coeff[0] = {10^-4, 0}, coeff[1] = {0.145, 0.02776}
// coeff[2] = {0.5, 0.17}, coeff[3] = {0.85498, 0.02776}, coeff[4] = {0.9999, 0}
// break_point.shape = {4, this->shape}, coeff.shape = {5, 2, this->shape}
//construct break_point
auto shape_ = shape();
//construct break_point
auto b_shape = shape_;
b_shape.insert(b_shape.begin(), 4);
// get sum: reduce shape : from this->shape() to
// this->shape()[0],...,shape()[n-2]
std::vector<size_t> shape_sum;
for (int i = 0; i < shape.size() - 1; ++i) {
shape_sum.emplace_back(shape[i]);
auto break_point = tensor_factory()->template create<T>(b_shape);
T* b_ptr = break_point->data();
auto numel = ret->numel();
double scale = std::pow(2, N);
for (size_t i = 0; i < numel; ++i) {
b_ptr[i] = (T) (-5 * scale);
b_ptr[i + numel] = (T) (-2.5 * scale);
b_ptr[i + 2 * numel] = (T) (2.5 * scale);
b_ptr[i + 3 * numel] = (T) (5 * scale);
}
break_point->scaling_factor() = N;
auto temp2 = tensor_factory()->template create<T>(shape_sum);
auto temp3 = tensor_factory()->template create<T>(shape_sum);
std::shared_ptr<FixedPointTensor<T, N>> sum =
std::make_shared<FixedPointTensor<T, N>>(temp2.get(), temp3.get());
//contruct coeff
std::vector<size_t> c_shape = {5, 2};
c_shape.insert(c_shape.end(), shape_.begin(), shape_.end());
auto coeff = tensor_factory()->template create<T>(c_shape);
T* c_ptr = coeff->data();
for (size_t i = 0; i < numel; ++i) {
c_ptr[i] = 0.0001 * scale;
c_ptr[i + numel] = 0;
c_ptr[i + 2 * numel] = 0.145 * scale;
c_ptr[i + 3 * numel] = 0.02776 * scale;
c_ptr[i + 4 * numel] = 0.5 * scale;
c_ptr[i + 5 * numel] = 0.17 * scale;
c_ptr[i + 6 * numel] = 0.85498 * scale;
c_ptr[i + 7 * numel] = 0.02776 * scale;
c_ptr[i + 8 * numel] = 0.9999 * scale;
c_ptr[i + 9 * numel] = 0 * scale;
}
coeff->scaling_factor() = N;
// reduce relu_x's last dim
reduce(relu_x.get(), sum.get());
this->polynomial_piecewise(coeff.get(), break_point.get(), ret);
}
// reveal (TODO: security improve)
auto sum_plain = tensor_factory()->template create<T>(sum->shape());
sum->reveal(sum_plain.get());
template< typename T, size_t N>
void FixedPointTensor<T, N>::softmax(FixedPointTensor<T, N>* ret,
bool use_relu, bool use_long_div) const {
// softmax axis = -1
const size_t col = *(shape().end() - 1);
const size_t row = numel() / col;
// extend sum_plain shape to relu_x->shape(), padding with sum_value
auto sum_extend = tensor_factory()->template create<T>(relu_x->shape());
sum_extend->scaling_factor() = N;
T *sum_ext_ptr = sum_extend->data();
T *sum_plain_ptr = sum_plain->data();
std::vector<std::shared_ptr<TensorAdapter<T>>> temp;
// 11 for allocating temp tensor
for (size_t i = 0; i < 11; ++i) {
temp.emplace_back(
tensor_factory()->template create<T>());
}
size_t ite_size = shape[shape.size() - 1];
for (int j = 0; j < sum_plain->numel(); ++j) {
for (int i = 0; i < ite_size; ++i) {
*(sum_ext_ptr + j * ite_size + i) = *(sum_plain_ptr + j);
temp[0]->reshape({row, col});
temp[1]->reshape({row, col});
FixedPointTensor<T, N> x(temp[0].get(), temp[1].get());
if (!use_relu) {
temp[2]->reshape({col, row});
temp[3]->reshape({col, row});
temp[4]->reshape({1, row});
temp[5]->reshape({1, row});
}
FixedPointTensor<T, N> x_t(temp[2].get(), temp[3].get());
FixedPointTensor<T, N> max_x_t(temp[4].get(), temp[5].get());
temp[6]->reshape({row, 1});
temp[7]->reshape({row, 1});
FixedPointTensor<T, N> max_x(temp[6].get(), temp[7].get());
temp[8]->reshape({row, col});
temp[9]->reshape({row, col});
FixedPointTensor<T, N> max_x_broadcast(temp[8].get(), temp[9].get());
temp[10]->reshape({row, col});
auto exp_lower_bound = temp[10].get();
auto transpose = [](const TensorAdapter<T>* in, TensorAdapter<T>* out) {
// suppose input dims = 2
const size_t col = in->shape()[1];
const size_t row = in->shape()[0];
const size_t numel = in->numel();
for (size_t k = 0; k < numel; ++k) {
size_t i = k / row;
size_t j = k % row;
out->data()[k] = in->data()[j * col + i];
}
};
auto broadcast = [](const TensorAdapter<T>* in, TensorAdapter<T>* out) {
// suppose input dims = 2
// in shape = [row, 1]
const size_t col = out->shape()[1];
const size_t row = out->shape()[0];
for (size_t k = 0; k < out->numel(); ++k) {
size_t i = k / col;
out->data()[k] = in->data()[i];
}
};
share(0)->copy(x.mutable_share(0));
share(1)->copy(x.mutable_share(1));
if (use_relu) {
x.relu(&x);
} else { // use exp
transpose(x.share(0), x_t.mutable_share(0));
transpose(x.share(1), x_t.mutable_share(1));
// x = max(input - max(input), exp_lower_bound)
x_t.max_pooling(&max_x_t);
transpose(max_x_t.share(0), max_x.mutable_share(0));
transpose(max_x_t.share(1), max_x.mutable_share(1));
broadcast(max_x.share(0), max_x_broadcast.mutable_share(0));
broadcast(max_x.share(1), max_x_broadcast.mutable_share(1));
x.sub(&max_x_broadcast, &x);
// n = 64, see exp
assign_to_tensor(exp_lower_bound, (T)(-64 * (1 << N)));
exp_lower_bound->scaling_factor() = N;
x.sub(exp_lower_bound, &x);
x.relu(&x);
x.add(exp_lower_bound, &x);
x.exp(&x);
}
// reuse max_x as sum
reduce(&x, &max_x);
if (!use_long_div) { // invert sum by Newton's method
// divisor range = [1/col, 1.0]
// TODO: find better iter num & init val
reciprocal(&max_x, &max_x, 16, 0.5 / col);
}
relu_x->div(sum_extend.get(), ret);
broadcast(max_x.share(0), max_x_broadcast.mutable_share(0));
broadcast(max_x.share(1), max_x_broadcast.mutable_share(1));
if (use_long_div) {
x.long_div(&max_x_broadcast, &x, 1);
} else {
x.mul(&max_x_broadcast, &x);
}
x.share(0)->copy(ret->mutable_share(0));
x.share(1)->copy(ret->mutable_share(1));
}
template<typename T, size_t N>
void FixedPointTensor<T, N>::long_div(const FixedPointTensor<T, N>* rhs,
FixedPointTensor<T, N>* ret,
size_t int_len) const {
std::vector<std::shared_ptr<TensorAdapter<T>>> temp;
for (int i = 0; i < 16; ++i) {
temp.emplace_back(
tensor_factory()->template create<T>(ret->shape()));
}
BooleanTensor<T> sign_lhs(temp[0].get(), temp[1].get());
BooleanTensor<T> sign_rhs(temp[2].get(), temp[3].get());
BooleanTensor<T> sign_ret(temp[4].get(), temp[5].get());
FixedPointTensor<T, N> abs_lhs(temp[6].get(), temp[7].get());
FixedPointTensor<T, N> abs_rhs(temp[8].get(), temp[9].get());
FixedPointTensor<T, N> sub_rhs(temp[10].get(), temp[11].get());
BooleanTensor<T> cmp_res(temp[12].get(), temp[13].get());
BooleanTensor<T> cmp_res_all(temp[14].get(), temp[15].get());
assign_to_tensor(cmp_res_all.share(0), (T)0);
assign_to_tensor(cmp_res_all.share(1), (T)0);
const size_t msb = sizeof(T) * 8 - 1;
sign_lhs.bit_extract(msb, this);
sign_rhs.bit_extract(msb, rhs);
sign_lhs.bitwise_xor(&sign_rhs, &sign_ret);
auto lshift = [] (const FixedPointTensor<T, N>* in,
size_t rhs,
FixedPointTensor<T, N>* out) {
in->share(0)->lshift(rhs, out->mutable_share(0));
in->share(1)->lshift(rhs, out->mutable_share(1));
};
// abs = val - 2 * sign * val
auto abs = [lshift] (const FixedPointTensor<T, N>* in,
const BooleanTensor<T>* sign,
FixedPointTensor<T, N>* out) {
lshift(in, 1, out);
sign->mul(out, out);
in->sub(out, out);
};
auto out0 = tensor_factory()->template create<T>(ret->shape());
abs(this, &sign_lhs, &abs_lhs);
abs(rhs, &sign_rhs, &abs_rhs);
for (ssize_t i = int_len - 1; i >= 0; --i) {
lshift(&abs_rhs, i, &sub_rhs);
abs_lhs.gt(&sub_rhs, &cmp_res);
cmp_res.mul(&sub_rhs, &sub_rhs);
cmp_res.lshift(N + i, &cmp_res);
abs_lhs.sub(&sub_rhs, &abs_lhs);
cmp_res.bitwise_xor(&cmp_res_all, &cmp_res_all);
}
for (size_t i = 1; i <= N; ++i) {
truncate3(&abs_rhs, &sub_rhs, i);
abs_lhs.gt(&sub_rhs, &cmp_res);
cmp_res.mul(&sub_rhs, &sub_rhs);
cmp_res.lshift(N - i, &cmp_res);
abs_lhs.sub(&sub_rhs, &abs_lhs);
cmp_res.bitwise_xor(&cmp_res_all, &cmp_res_all);
}
// use abs_lhs as buffer
cmp_res_all.b2a(&abs_lhs);
abs(&abs_lhs, &sign_ret, ret);
}
// reduce last dim
template <typename T, size_t N>
void FixedPointTensor<T, N>::reduce(FixedPointTensor<T, N> *input,
FixedPointTensor<T, N> *ret) {
// enfoce shape: input->shape[0 ... (n-2)] == ret shape
auto &shape = input->shape();
void FixedPointTensor<T, N>::reduce(FixedPointTensor<T, N>* input,
FixedPointTensor<T, N>* ret) {
//enfoce shape: input->shape[0 ... (n-2)] == ret shape
auto& shape = input->shape();
size_t ite_size = shape[shape.size() - 1];
T *ret_begin_ptr_0 = ret->_share[0]->data();
T *ret_begin_ptr_1 = ret->_share[1]->data();
T* ret_begin_ptr_0 = ret->_share[0]->data();
T* ret_begin_ptr_1 = ret->_share[1]->data();
T *input_begin_ptr_0 = input->_share[0]->data();
T *input_begin_ptr_1 = input->_share[1]->data();
T* input_begin_ptr_0 = input->_share[0]->data();
T* input_begin_ptr_1 = input->_share[1]->data();
for (int j = 0; j < ret->numel(); ++j) {
*(ret_begin_ptr_0 + j) = *(input_begin_ptr_0 + j * ite_size);
*(ret_begin_ptr_1 + j) = *(input_begin_ptr_1 + j * ite_size);
for (int i = 1; i < ite_size; ++i) {
*(ret_begin_ptr_0 + j) += *(input_begin_ptr_0 + j * ite_size + i);
*(ret_begin_ptr_1 + j) += *(input_begin_ptr_1 + j * ite_size + i);
*(ret_begin_ptr_0 + j) +=
*(input_begin_ptr_0 + j * ite_size + i);
*(ret_begin_ptr_1 + j) +=
*(input_begin_ptr_1 + j * ite_size + i);
}
}
}
template <typename T, size_t N>
void FixedPointTensor<T, N>::polynomial(const TensorAdapter<T> *coeff,
FixedPointTensor<T, N> *ret) const {
template< typename T, size_t N>
void FixedPointTensor<T, N>::polynomial(const TensorAdapter<T>* coeff,
FixedPointTensor<T, N>* ret) const {
// e.g., x.shape = {2, 3}, coeff.shape = {n, 2, 3} (n: polynomial power)
// TODO: improve performance: [ABY3]
//TODO: improve performance: [ABY3]
std::vector<std::shared_ptr<TensorAdapter<T>>> temp;
for (int i = 0; i < 5; ++i) {
temp.emplace_back(tensor_factory()->template create<T>(this->shape()));
for (int i = 0; i < 7; ++i) {
temp.emplace_back(
tensor_factory()->template create<T>(this->shape()));
}
std::shared_ptr<FixedPointTensor<T, N>> x_pow_i =
std::make_shared<FixedPointTensor<T, N>>(temp[0].get(), temp[1].get());
std::make_shared<FixedPointTensor<T, N>>(
temp[0].get(), temp[1].get());
std::shared_ptr<FixedPointTensor<T, N>> temp_fixed =
std::make_shared<FixedPointTensor<T, N>>(temp[2].get(), temp[3].get());
assign_to_tensor(ret->_share[0], (T)0);
assign_to_tensor(ret->_share[1], (T)0);
// x_pow_i.get() = 1;
assign_to_tensor(x_pow_i.get()->_share[0], (T)0);
assign_to_tensor(x_pow_i.get()->_share[1], (T)0);
assign_to_tensor(temp[4].get(), (T)1 << N);
std::make_shared<FixedPointTensor<T, N>>(
temp[2].get(), temp[3].get());
std::shared_ptr<FixedPointTensor<T, N>> result =
std::make_shared<FixedPointTensor<T, N>>(
temp[5].get(), temp[6].get());
assign_to_tensor(result->_share[0], (T) 0);
assign_to_tensor(result->_share[1], (T) 0);
//x_pow_i.get() = 1;
assign_to_tensor(x_pow_i.get()->_share[0], (T) 0);
assign_to_tensor(x_pow_i.get()->_share[1], (T) 0);
assign_to_tensor(temp[4].get(), (T) 1 << N);
temp[4]->scaling_factor() = N;
x_pow_i->add(temp[4].get(), x_pow_i.get());
......@@ -600,31 +910,42 @@ void FixedPointTensor<T, N>::polynomial(const TensorAdapter<T> *coeff,
t_shape.erase(t_shape.begin());
t->reshape(t_shape);
x_pow_i->mul(t.get(), temp_fixed.get());
ret->add(temp_fixed.get(), ret);
result->add(temp_fixed.get(), result.get());
x_pow_i->mul(this, x_pow_i.get());
}
result->share(0)->copy(ret->mutable_share(0));
result->share(1)->copy(ret->mutable_share(1));
}
template <typename T, size_t N>
template< typename T, size_t N>
void FixedPointTensor<T, N>::polynomial_piecewise(
const TensorAdapter<T> *coeff, const TensorAdapter<T> *break_point,
FixedPointTensor<T, N> *ret) const {
const TensorAdapter<T>* coeff,
const TensorAdapter<T>* break_point,
FixedPointTensor<T, N>* ret) const {
// e.g., x.shape = {2, 3},
// break_point.shape = {k, 2, 3} (k: num of break point)
// coeff.shape = {k + 1, n, 2, 3} (n: poly power)
// copy ret
auto ret_cpy_s0 = tensor_factory()->create_int64_t(ret->share(0)->shape());
ret->share(0)->copy(ret_cpy_s0.get());
auto ret_cpy_s1 = tensor_factory()->create_int64_t(ret->share(1)->shape());
ret->share(1)->copy(ret_cpy_s1.get());
std::shared_ptr<FixedPointTensor<T, N>> ret_cpy{new FixedPointTensor<T, N>(ret_cpy_s0.get(), ret_cpy_s1.get())};
std::vector<std::shared_ptr<BooleanTensor<T>>> msb;
int len_break_point = break_point->shape()[0];
int len_coeff = coeff->shape()[0];
// number of temp tensor used
int temp_total =
4 * len_break_point + 2 + 2 * (len_break_point - 1) + 2 + 4 * len_coeff;
//number of temp tensor used
int temp_total = 4 * len_break_point + 2 +
2 * (len_break_point - 1) + 2 + 4 * len_coeff;
std::vector<std::shared_ptr<TensorAdapter<T>>> temp;
for (int i = 0; i < temp_total; ++i) {
temp.emplace_back(tensor_factory()->template create<T>(this->shape()));
temp.emplace_back(tensor_factory()->
template create<T>(this->shape()));
}
int temp_index = 0;
......@@ -641,29 +962,35 @@ void FixedPointTensor<T, N>::polynomial_piecewise(
t_shape.erase(t_shape.begin());
t_break->reshape(t_shape);
temp1.emplace_back(std::make_shared<FixedPointTensor<T, N>>(
temp[temp_index++].get(), temp[temp_index++].get()));
temp1.emplace_back(
std::make_shared<FixedPointTensor<T, N>>(
temp[temp_index++].get(),
temp[temp_index++].get()));
this->sub(t_break.get(), temp1[i].get());
msb.emplace_back(std::make_shared<BooleanTensor<T>>(
temp[temp_index++].get(), temp[temp_index++].get()));
temp[temp_index++].get(),
temp[temp_index++].get()));
msb[i]->bit_extract(sizeof(T) * 8 - 1, temp1[i].get());
}
// b[0] = msb[0], b[i + 1] = ~ msb[i] & msb[i + 1]
std::vector<std::shared_ptr<BooleanTensor<T>>> b;
b.emplace_back(std::make_shared<BooleanTensor<T>>(temp[temp_index++].get(),
b.emplace_back(std::make_shared<BooleanTensor<T>>(
temp[temp_index++].get(),
temp[temp_index++].get()));
b[0] = msb[0];
for (int i = 0; i < len_break_point - 1; ++i) {
b.emplace_back(std::make_shared<BooleanTensor<T>>(
temp[temp_index++].get(), temp[temp_index++].get()));
temp[temp_index++].get(),
temp[temp_index++].get()));
msb[i]->bitwise_not(b[i + 1].get());
b[i + 1]->bitwise_and(msb[i + 1].get(), b[i + 1].get());
}
b.emplace_back(std::make_shared<BooleanTensor<T>>(temp[temp_index++].get(),
b.emplace_back(std::make_shared<BooleanTensor<T>>(
temp[temp_index++].get(),
temp[temp_index++].get()));
msb[len_break_point - 1]->bitwise_not(b[len_break_point].get());
......@@ -671,119 +998,299 @@ void FixedPointTensor<T, N>::polynomial_piecewise(
std::vector<std::shared_ptr<FixedPointTensor<T, N>>> temp_fixed;
std::vector<std::shared_ptr<FixedPointTensor<T, N>>> temp_fixed1;
assign_to_tensor(ret->_share[0], (T)0);
assign_to_tensor(ret->_share[1], (T)0);
assign_to_tensor(ret_cpy->_share[0], (T) 0);
assign_to_tensor(ret_cpy->_share[1], (T) 0);
for (int i = 0; i < len_coeff; ++i) {
temp_fixed.emplace_back(std::make_shared<FixedPointTensor<T, N>>(
temp[temp_index++].get(), temp[temp_index++].get()));
temp_fixed1.emplace_back(std::make_shared<FixedPointTensor<T, N>>(
temp[temp_index++].get(), temp[temp_index++].get()));
temp_fixed.emplace_back(
std::make_shared<FixedPointTensor<T, N>>(
temp[temp_index++].get(),
temp[temp_index++].get()));
temp_fixed1.emplace_back(
std::make_shared<FixedPointTensor<T, N>>(
temp[temp_index++].get(),
temp[temp_index++].get()));
auto t = tensor_factory()->template create<T>();
coeff->slice(i, i + 1, t.get());
auto t_shape = t->shape();
// remove leading 1
t_shape.erase(t_shape.begin());
t->reshape(t_shape);
;
t->reshape(t_shape);;
this->polynomial(t.get(), temp_fixed[i].get());
b[i]->bit_extract(0, b[i].get());
b[i]->mul(temp_fixed[i].get(), temp_fixed1[i].get());
ret->add(temp_fixed1[i].get(), ret);
ret_cpy->add(temp_fixed1[i].get(), ret_cpy.get());
}
ret_cpy->share(0)->copy(ret->mutable_share(0));
ret_cpy->share(1)->copy(ret->mutable_share(1));
}
template <typename T, size_t N>
template <template <typename U, size_t...> class CTensor, size_t... N1>
void FixedPointTensor<T, N>::lt(const CTensor<T, N1...> *rhs,
BooleanTensor<T> *ret) const {
template<typename T, size_t N>
template<template<typename U, size_t...> class CTensor,
size_t... N1>
void FixedPointTensor<T, N>::lt(const CTensor<T, N1...>* rhs,
BooleanTensor<T>* ret) const {
std::vector<std::shared_ptr<TensorAdapter<T>>> temp;
for (int i = 0; i < 2; ++i) {
temp.emplace_back(tensor_factory()->template create<T>(this->shape()));
temp.emplace_back(
tensor_factory()->template create<T>(this->shape()));
}
std::shared_ptr<FixedPointTensor<T, N>> sub_result =
std::make_shared<FixedPointTensor<T, N>>(temp[0].get(), temp[1].get());
std::make_shared<FixedPointTensor<T, N>>(
temp[0].get(), temp[1].get());
this->sub(rhs, sub_result.get());
ret->bit_extract(sizeof(T) * 8 - 1, sub_result.get());
}
template <typename T, size_t N>
template <template <typename U, size_t...> class CTensor, size_t... N1>
void FixedPointTensor<T, N>::leq(const CTensor<T, N1...> *rhs,
BooleanTensor<T> *ret) const {
template<typename T, size_t N>
template<template<typename U, size_t...> class CTensor,
size_t... N1>
void FixedPointTensor<T, N>::leq(const CTensor<T, N1...>* rhs,
BooleanTensor<T>* ret) const {
this->gt(rhs, ret);
auto tensor_one = tensor_factory()->template create<T>(this->shape());
auto tensor_one = tensor_factory()->
template create<T>(this->shape());
assign_to_tensor(tensor_one.get(), (T)1);
assign_to_tensor(tensor_one.get(), (T) 1);
ret->bitwise_xor(tensor_one.get(), ret);
}
template <typename T, size_t N>
template <template <typename U, size_t...> class CTensor, size_t... N1>
void FixedPointTensor<T, N>::gt(const CTensor<T, N1...> *rhs,
BooleanTensor<T> *ret) const {
template<typename T, size_t N>
template<template<typename U, size_t...> class CTensor,
size_t... N1>
void FixedPointTensor<T, N>::gt(const CTensor<T, N1...>* rhs,
BooleanTensor<T>* ret) const {
std::vector<std::shared_ptr<TensorAdapter<T>>> temp;
for (int i = 0; i < 2; ++i) {
temp.emplace_back(tensor_factory()->template create<T>(this->shape()));
temp.emplace_back(
tensor_factory()->template create<T>(this->shape()));
}
std::shared_ptr<FixedPointTensor<T, N>> sub_result =
std::make_shared<FixedPointTensor<T, N>>(temp[0].get(), temp[1].get());
std::make_shared<FixedPointTensor<T, N>>(
temp[0].get(), temp[1].get());
this->sub(rhs, sub_result.get());
sub_result->negative(sub_result.get());
ret->template bit_extract(sizeof(T) * 8 - 1, sub_result.get());
}
template <typename T, size_t N>
template <template <typename U, size_t...> class CTensor, size_t... N1>
void FixedPointTensor<T, N>::geq(const CTensor<T, N1...> *rhs,
BooleanTensor<T> *ret) const {
template<typename T, size_t N>
template<template<typename U, size_t...> class CTensor,
size_t... N1>
void FixedPointTensor<T, N>::geq(const CTensor<T, N1...>* rhs,
BooleanTensor<T>* ret) const {
this->lt(rhs, ret);
auto tensor_one = tensor_factory()->template create<T>(this->shape());
auto tensor_one = tensor_factory()->
template create<T>(this->shape());
assign_to_tensor(tensor_one.get(), (T)1);
assign_to_tensor(tensor_one.get(), (T) 1);
ret->bitwise_xor(tensor_one.get(), ret);
}
template <typename T, size_t N>
template <template <typename U, size_t...> class CTensor, size_t... N1>
void FixedPointTensor<T, N>::eq(const CTensor<T, N1...> *rhs,
BooleanTensor<T> *ret) const {
template<typename T, size_t N>
template<template<typename U, size_t...> class CTensor,
size_t... N1>
void FixedPointTensor<T, N>::eq(const CTensor<T, N1...>* rhs,
BooleanTensor<T>* ret) const {
this->neq(rhs, ret);
auto tensor_one = tensor_factory()->template create<T>(this->shape());
assign_to_tensor(tensor_one.get(), (T)1);
assign_to_tensor(tensor_one.get(), (T) 1);
ret->bitwise_xor(tensor_one.get(), ret);
}
template <typename T, size_t N>
template <template <typename U, size_t...> class CTensor, size_t... N1>
void FixedPointTensor<T, N>::neq(const CTensor<T, N1...> *rhs,
BooleanTensor<T> *ret) const {
template<typename T, size_t N>
template<template<typename U, size_t...> class CTensor,
size_t... N1>
void FixedPointTensor<T, N>::neq(const CTensor<T, N1...>* rhs,
BooleanTensor<T>* ret) const {
std::vector<std::shared_ptr<TensorAdapter<T>>> temp;
for (int i = 0; i < 4; i++) {
temp.emplace_back(tensor_factory()->template create<T>(this->shape()));
for (int i = 0; i < 4; i ++) {
temp.emplace_back(tensor_factory()->
template create<T>(this->shape()));
}
std::shared_ptr<BooleanTensor<T>> lt =
std::make_shared<BooleanTensor<T>>(temp[0].get(), temp[1].get());
std::make_shared<BooleanTensor<T>>(
temp[0].get(), temp[1].get());
std::shared_ptr<BooleanTensor<T>> gt =
std::make_shared<BooleanTensor<T>>(temp[2].get(), temp[3].get());
std::make_shared<BooleanTensor<T>>(
temp[2].get(), temp[3].get());
this->lt(rhs, lt.get());
this->gt(rhs, gt.get());
lt->bitwise_or(gt.get(), ret);
}
template <typename T>
inline void assign_to_tensor(TensorAdapter<T> *input, T assign_num) {
size_t size_one_dim = input->numel();
T *iter = input->data();
for (int i = 0; i < size_one_dim; ++i) {
*(iter + i) = assign_num;
template<typename T, size_t N>
void FixedPointTensor<T, N>::reciprocal(const FixedPointTensor<T, N>* op, FixedPointTensor<T, N>* ret,
size_t iter, double x0) {
auto temp0 = tensor_factory()->template create<T>(ret->shape());
auto temp1 = tensor_factory()->template create<T>(ret->shape());
auto temp2 = tensor_factory()->template create<T>(ret->shape());
auto temp3 = tensor_factory()->template create<T>(ret->shape());
std::shared_ptr<FixedPointTensor<T, N>> result =
std::make_shared<FixedPointTensor<T, N>>(temp0.get(), temp1.get());
std::shared_ptr<FixedPointTensor<T, N>> x_copy =
std::make_shared<FixedPointTensor<T, N>>(temp2.get(), temp3.get());
assign_to_tensor(result->mutable_share(0), (T) 0);
assign_to_tensor(result->mutable_share(1), (T) 0);
auto tensor_x0 = tensor_factory()->template create<T>(op->shape());
assign_to_tensor(tensor_x0.get(), (T)(x0 * pow(2, N)));
tensor_x0->scaling_factor() = N;
result->add(tensor_x0.get(), result.get());
auto tensor_2 = tensor_factory()->template create<T>(op->shape());
tensor_2->scaling_factor() = N;
assign_to_tensor(tensor_2.get(), (T)(2 << N));
for (int i = 0; i < iter; ++i) {
result->share(0)->copy(x_copy->mutable_share(0));
result->share(1)->copy(x_copy->mutable_share(1));
auto res_ptr = result.get();
op->mul(res_ptr, res_ptr);
result->negative(res_ptr);
result->add(tensor_2.get(), res_ptr);
x_copy->mul(res_ptr, res_ptr);
}
result->share(0)->copy(ret->mutable_share(0));
result->share(1)->copy(ret->mutable_share(1));
}
template<typename T, size_t N>
void FixedPointTensor<T, N>::inverse_square_root(FixedPointTensor* ret,
size_t iter,
double x0) const {
inverse_square_root(this, ret, iter, x0);
}
// Newton's method, var naming from Quake III Arena: Q_rsqrt
// float threehalfs = 1.5F;
// x2 = number * 0.5F;
// y = x0; // since 0x5f3759df does not fit fixed-point arithmetic
// y = y * ( threehalfs - ( x2 * y * y ) ); // iteration of Newton's method
template<typename T, size_t N>
void FixedPointTensor<T, N>::inverse_square_root(const FixedPointTensor* op,
FixedPointTensor* ret,
size_t iter,
double x0) {
std::vector<std::shared_ptr<TensorAdapter<T>>> temp;
for (int i = 0; i < 7; ++i) {
temp.emplace_back(
tensor_factory()->template create<T>(op->shape()));
}
std::shared_ptr<FixedPointTensor<T, N>> y =
std::make_shared<FixedPointTensor<T, N>>(temp[0].get(), temp[1].get());
std::shared_ptr<FixedPointTensor<T, N>> x2 =
std::make_shared<FixedPointTensor<T, N>>(temp[2].get(), temp[3].get());
// x2 = 0.5 * op
truncate3(op, x2.get(), 1);
assign_to_tensor(y->mutable_share(0), (T)(x0 * pow(2, N)));
assign_to_tensor(y->mutable_share(1), (T)(x0 * pow(2, N)));
// threehalfs
temp[4]->scaling_factor() = N;
assign_to_tensor(temp[4].get(), T(1.5 * pow(2, N)));
std::shared_ptr<FixedPointTensor<T, N>> y_copy =
std::make_shared<FixedPointTensor<T, N>>(temp[5].get(), temp[6].get());
for (int i = 0; i < iter; ++i) {
y->share(0)->copy(y_copy->mutable_share(0));
y->share(1)->copy(y_copy->mutable_share(1));
y->mul(y.get(), y.get());
y->mul(x2.get(), y.get());
y->negative(y.get());
y->add(temp[4].get(), y.get());
y_copy->mul(y.get(), y.get());
}
y->share(0)->copy(ret->mutable_share(0));
y->share(1)->copy(ret->mutable_share(1));
}
template<typename T, size_t N>
template<template<typename U, size_t...> class CTensor,
size_t... N1>
void FixedPointTensor<T, N>::max(const CTensor<T, N1...>* rhs,
FixedPointTensor* ret,
BooleanTensor<T>* cmp) const {
// max = lhs + (rhs - lhs) if rhs > lhs else lhs
std::vector<std::shared_ptr<TensorAdapter<T>>> temp;
bool output_cmp = cmp != nullptr;
// if cmp is not null, store cmp results in cmp
// else, store them in tmp tensors
for (int i = 0; i < 2 + 2 * (!output_cmp); ++i) {
temp.emplace_back(
tensor_factory()->template create<T>(this->shape()));
}
FixedPointTensor<T, N> delta(temp[0].get(), temp[1].get());
sub(rhs, &delta);
BooleanTensor<T> sign;
if (output_cmp) {
sign = *cmp;
} else {
sign = BooleanTensor<T>(temp[2].get(), temp[3].get());
}
sign.template bit_extract(sizeof(T) * 8 - 1, &delta);
delta.negative(&delta);
sign.mul(&delta, &delta);
add(&delta, ret);
}
template<typename T, size_t N>
void FixedPointTensor<T, N>::max_pooling(FixedPointTensor* ret,
BooleanTensor<T>* pos) const {
size_t k = shape()[0];
std::vector<std::shared_ptr<TensorAdapter<T>>> tmp;
for (int i = 0; i < 4; ++i) {
tmp.emplace_back(
tensor_factory()->template create<T>());
}
FixedPointTensor now(tmp[0].get(), tmp[1].get());
BooleanTensor<T> cmp(tmp[2].get(), tmp[3].get());
auto cmp_ptr = pos ? &cmp : nullptr;
share(0)->slice(0, 1, tmp[0].get());
share(1)->slice(0, 1, tmp[1].get());
tmp[0]->copy(ret->mutable_share(0));
tmp[1]->copy(ret->mutable_share(1));
if (pos) {
pos->share(0)->slice(0, 1, tmp[2].get());
pos->share(1)->slice(0, 1, tmp[3].get());
// set init 1, slice_0 is larger than null
if (party() == 0 || party() == 2) {
size_t idx = 2 + (party() == 2);
assign_to_tensor(tmp[idx].get(), T(1));
assign_to_tensor(tmp[5 - idx].get(), T(0));
} else {
assign_to_tensor(tmp[2].get(), T(0));
assign_to_tensor(tmp[3].get(), T(0));
}
}
for (size_t i = 1; i < k; ++i) {
share(0)->slice(i, i + 1, tmp[0].get());
share(1)->slice(i, i + 1, tmp[1].get());
if (pos) {
pos->share(0)->slice(i, i + 1, tmp[2].get());
pos->share(1)->slice(i, i + 1, tmp[3].get());
}
ret->max(&now, ret, cmp_ptr);
}
if (pos) {
pos->onehot_from_cmp();
}
}
} // namespace aby3
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
......@@ -10,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include <string>
#include <cmath>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/tensor.h"
......@@ -82,7 +86,7 @@ std::shared_ptr<TensorAdapter<int64_t>> gen(std::vector<size_t> shape) {
}
template<typename T, size_t N>
PaddleTensor<T> test_fixedt_gen_paddle_tensor(std::vector<float>& input,
PaddleTensor<T> test_fixedt_gen_paddle_tensor(std::vector<double>& input,
std::vector<size_t>& shape,
paddle::platform::CPUDeviceContext& cpu_ctx) {
......@@ -96,13 +100,15 @@ PaddleTensor<T> test_fixedt_gen_paddle_tensor(std::vector<float>& input,
}
template<typename T>
bool test_fixedt_check_tensor_eq(const TensorAdapter<T>* in1,
const TensorAdapter<T>* in2, double precision = 0.0001) {
bool test_fixedt_check_tensor_eq(const TensorAdapter<T>* result,
const TensorAdapter<T>* expected,
double precision = 0.0001,
bool use_relative_error = false) {
// check shape
std::vector<size_t> shape1, shape2;
shape1 = in1->shape();
shape2 = in2->shape();
size_t scale = in1->scaling_factor();
shape1 = result->shape();
shape2 = expected->shape();
size_t scale = result->scaling_factor();
if (shape1.size() != shape2.size()) {
std::cout << "shape size error: shape1.size: "<<shape1.size()<<
"; shape2.size: "<<shape2.size()<<std::endl;
......@@ -116,15 +122,28 @@ bool test_fixedt_check_tensor_eq(const TensorAdapter<T>* in1,
}
// check each element
for (int i = 0; i < in1->numel(); i++) {
if (std::abs(*(in1->data() + i) - *(in2->data() + i)) >
precision * pow(2, scale)) {
std::cout << "result error: inx: "<<i<<
" in1[i] = "<<*(in1->data() + i)<<
" in2[i] = "<<*(in2->data() + i)<<std::endl;
return false;
}
}
bool return_false = false;
for (int i = 0; i < result->numel(); i++) {
// absolute error
if (!use_relative_error && std::abs(*(result->data() + i) - *(expected->data() + i)) >
precision * std::pow(2, scale)) {
std::cout << "result error: index: "<< i <<
" output[i] = "<< *(result->data() + i) / pow(2, 16) <<
" expected[i] = " << *(expected->data() + i) / pow(2, 16) << std::endl;
return_false = true;
}
// relative error
if (use_relative_error
&& std::abs(*(result->data() + i) - *(expected->data() + i))
/ (std::abs(*(expected->data() + i)) + 0.00000001)
> precision) {
std::cout << "result error: index: "<< i <<
" output[i] = " << *(result->data() + i) / pow(2, 16) <<
" expected[i] = " << *(expected->data() + i) / pow(2, 16) << std::endl;
return_false = true;
}
}
if (return_false) return false;
return true;
}
......@@ -338,23 +357,23 @@ void test_fixedt_mul_fixed(size_t p,
result->reveal(out);
}
void test_fixedt_mul2_fixed(size_t p,
void test_fixedt_mul_plain(size_t p,
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in,
TensorAdapter<int64_t>* out) {
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> temp;
for (int i = 0; i < 6; i++) {
for (int i = 0; i < 4; i++) {
temp.emplace_back(gen(out->shape()));
}
test_fixedt_gen_shares(p, in, temp);
test_fixedt_gen_shares(p, in[0], temp);
Fix64N16* lhs = new Fix64N16(temp[0].get(), temp[1].get());
Fix64N16* rhs = new Fix64N16(temp[2].get(), temp[3].get());
Fix64N16* result = new Fix64N16(temp[4].get(), temp[5].get());
lhs->mul2(rhs, result);
Fix64N16* result = new Fix64N16(temp[2].get(), temp[3].get());
lhs->mul(in[1].get(), result);
result->reveal(out);
}
void test_fixedt_mul_plain(size_t p,
void test_fixedt_div_plain(size_t p,
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in,
TensorAdapter<int64_t>* out) {
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> temp;
......@@ -366,23 +385,23 @@ void test_fixedt_mul_plain(size_t p,
Fix64N16* lhs = new Fix64N16(temp[0].get(), temp[1].get());
Fix64N16* result = new Fix64N16(temp[2].get(), temp[3].get());
lhs->mul(in[1].get(), result);
lhs->div(in[1].get(), result);
result->reveal(out);
}
void test_fixedt_div_plain(size_t p,
void test_fixedt_div_fixed(size_t p,
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in,
TensorAdapter<int64_t>* out) {
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> temp;
for (int i = 0; i < 4; i++) {
for (int i = 0; i < 6; i++) {
temp.emplace_back(gen(out->shape()));
}
test_fixedt_gen_shares(p, in[0], temp);
test_fixedt_gen_shares(p, in, temp);
Fix64N16* lhs = new Fix64N16(temp[0].get(), temp[1].get());
Fix64N16* result = new Fix64N16(temp[2].get(), temp[3].get());
lhs->div(in[1].get(), result);
Fix64N16* rhs = new Fix64N16(temp[2].get(), temp[3].get());
Fix64N16* result = new Fix64N16(temp[4].get(), temp[5].get());
lhs->div(rhs, result);
result->reveal(out);
}
......@@ -496,6 +515,22 @@ void test_fixedt_relu_fixed(size_t p,
result->reveal(out);
}
void test_fixedt_relu2_fixed(size_t p,
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in,
TensorAdapter<int64_t>* out) {
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> temp;
for (int i = 0; i < 4; i++) {
temp.emplace_back(gen(out->shape()));
}
test_fixedt_gen_shares(p, in[0], temp);
Fix64N16* lhs = new Fix64N16(temp[0].get(), temp[1].get());
Fix64N16* result = new Fix64N16(temp[2].get(), temp[3].get());
lhs->relu_with_derivative(result, nullptr);
result->reveal(out);
}
void test_fixedt_softmax_fixed(size_t p,
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in,
TensorAdapter<int64_t>* out) {
......@@ -528,7 +563,7 @@ void test_fixedt_sigmoid_fixed(size_t p,
result->reveal(out);
}
void test_fixedt_exp_fixed(size_t p,
void test_fixedt_sigmoid_enhanced_fixed(size_t p,
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in,
TensorAdapter<int64_t>* out) {
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> temp;
......@@ -540,23 +575,39 @@ void test_fixedt_exp_fixed(size_t p,
Fix64N16* lhs = new Fix64N16(temp[0].get(), temp[1].get());
Fix64N16* result = new Fix64N16(temp[2].get(), temp[3].get());
lhs->exp(result);
lhs->sigmoid_enhanced(result);
result->reveal(out);
}
void test_fixedt_mat_mul_fixed(size_t p,
void test_fixedt_sigmoid_chebyshev_fixed(size_t p,
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in,
TensorAdapter<int64_t>* out) {
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> temp;
for (int i = 0; i < 6; i++) {
for (int i = 0; i < 4; i++) {
temp.emplace_back(gen(out->shape()));
}
test_fixedt_gen_shares(p, in, temp);
test_fixedt_gen_shares(p, in[0], temp);
Fix64N16* lhs = new Fix64N16(temp[0].get(), temp[1].get());
Fix64N16* rhs = new Fix64N16(temp[2].get(), temp[3].get());
Fix64N16* result = new Fix64N16(temp[4].get(), temp[5].get());
lhs->mat_mul(rhs, result);
Fix64N16* result = new Fix64N16(temp[2].get(), temp[3].get());
lhs->sigmoid_chebyshev(result);
result->reveal(out);
}
void test_fixedt_exp_fixed(size_t p,
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in,
TensorAdapter<int64_t>* out) {
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> temp;
for (int i = 0; i < 4; i++) {
temp.emplace_back(gen(out->shape()));
}
test_fixedt_gen_shares(p, in[0], temp);
Fix64N16* lhs = new Fix64N16(temp[0].get(), temp[1].get());
Fix64N16* result = new Fix64N16(temp[2].get(), temp[3].get());
lhs->exp(result);
result->reveal(out);
}
......@@ -829,7 +880,13 @@ void test_fixedt_matmul_fixed(size_t p,
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in,
TensorAdapter<int64_t>* out) {
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> temp;
for (int i = 0; i < 6; i++) {
for (int i = 0; i < 2; i++) {
temp.emplace_back(gen(in[0]->shape()));
}
for (int i = 2; i < 4; i++) {
temp.emplace_back(gen(in[1]->shape()));
}
for (int i = 4; i < 6; i++) {
temp.emplace_back(gen(out->shape()));
}
......@@ -843,24 +900,26 @@ void test_fixedt_matmul_fixed(size_t p,
TEST_F(FixedTensorTest, matmulfixed) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {1.0, 1.0, 1.0, 1.0};
std::vector<float> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<float> res_val = {4.0, 4.0, 4.0, 4.0};
std::vector<size_t> shape = {1, 3};
std::vector<size_t> shape1 = {3, 1};
std::vector<size_t> shape_o = {1, 1};
std::vector<double> in0_val = {1, 0, 0};
std::vector<double> in1_val = {1, 2, 3};
std::vector<double> res_val = {1};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
{gen(shape), gen(shape1)};
test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
shape, _cpu_ctx).copy(in[0].get());
test_fixedt_gen_paddle_tensor<int64_t, 16>(in1_val,
shape, _cpu_ctx).copy(in[1].get());
shape1, _cpu_ctx).copy(in[1].get());
auto out0 = _s_tensor_factory->create<int64_t>(shape);
auto out1 = _s_tensor_factory->create<int64_t>(shape);
auto out2 = _s_tensor_factory->create<int64_t>(shape);
auto out0 = _s_tensor_factory->create<int64_t>(shape_o);
auto out1 = _s_tensor_factory->create<int64_t>(shape_o);
auto out2 = _s_tensor_factory->create<int64_t>(shape_o);
PaddleTensor<int64_t> result =
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape_o, _cpu_ctx);
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
......@@ -892,7 +951,7 @@ TEST_F(FixedTensorTest, matmulfixed) {
TEST_F(FixedTensorTest, share) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in_val = {1.0, 1.0, 1.0, 1.0};
std::vector<double> in_val = {1.0, 1.0, 1.0, 1.0};
PaddleTensor<int64_t> input =
test_fixedt_gen_paddle_tensor<int64_t, 16>(in_val, shape, _cpu_ctx);
auto output = _s_tensor_factory->create<int64_t>(shape);
......@@ -930,9 +989,9 @@ TEST_F(FixedTensorTest, share) {
TEST_F(FixedTensorTest, addfixed) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {1.0, 1.0, 1.0, 1.0};
std::vector<float> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<float> res_val = {3.0, 3.0, 3.0, 3.0};
std::vector<double> in0_val = {0x1p47 - 1, 5+2^-16, 1.0, 1.0};
std::vector<double> in1_val = {1.0, 8+(1-2^-16), 2.0, 2.0};
std::vector<double> res_val = {-0x1p47, 14, 3.0, 3.0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
......@@ -975,9 +1034,9 @@ TEST_F(FixedTensorTest, addfixed) {
TEST_F(FixedTensorTest, addplain) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {1.0, 1.0, 1.0, 1.0};
std::vector<float> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<float> res_val = {3.0, 3.0, 3.0, 3.0};
std::vector<double> in0_val = {1.0, 5+2^-16, 1.0, 1.0};
std::vector<double> in1_val = {0x1p47 - 1, 8+(1-2^-16), 2.0, 2.0};
std::vector<double> res_val = {-0x1p47, 14.0, 3.0, 3.0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
......@@ -1023,9 +1082,9 @@ TEST_F(FixedTensorTest, addplain) {
TEST_F(FixedTensorTest, subfixed) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {3.0, 3.0, 3.0, 3.0};
std::vector<float> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<float> res_val = {1.0, 1.0, 1.0, 1.0};
std::vector<double> in0_val = {3.0, 3.0, 3.0, 3.0};
std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<double> res_val = {1.0, 1.0, 1.0, 1.0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
......@@ -1069,9 +1128,9 @@ TEST_F(FixedTensorTest, subfixed) {
TEST_F(FixedTensorTest, subplain) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {3.0, 3.0, 3.0, 3.0};
std::vector<float> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<float> res_val = {1.0, 1.0, 1.0, 1.0};
std::vector<double> in0_val = {3.0, 3.0, 3.0, 3.0};
std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<double> res_val = {1.0, 1.0, 1.0, 1.0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
......@@ -1117,9 +1176,9 @@ TEST_F(FixedTensorTest, subplain) {
TEST_F(FixedTensorTest, negfixed) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {1.0, 1.0, 1.0, 1.0};
//std::vector<float> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<float> res_val = {-1.0, -1.0, -1.0, -1.0};
std::vector<double> in0_val = {1.0, 1.0, 1.0, 1.0};
//std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<double> res_val = {-1.0, -1.0, -1.0, -1.0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
......@@ -1163,9 +1222,10 @@ TEST_F(FixedTensorTest, negfixed) {
TEST_F(FixedTensorTest, mulfixed) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {1.0, 1.0, 1.0, 1.0};
std::vector<float> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<float> res_val = {2.0, 2.0, 2.0, 2.0};
std::vector<double> in0_val = {1.0, 1.0, 1.0, 1.0};
std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<double> res_val = {2.0, 2.0, 2.0, 2.0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
......@@ -1209,12 +1269,24 @@ TEST_F(FixedTensorTest, mulfixed) {
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, mul2fixed) {
TEST_F(FixedTensorTest, mulfixed_multi_times) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {1.0, 1.0, 1.0, 1.0};
std::vector<float> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<float> res_val = {2.0, 2.0, 2.0, 2.0};
std::vector<size_t> shape = {100000, 1};
std::vector<double> in0_val(shape[0]), in1_val(shape[0]), res_val(shape[0]);
auto fill_mul_data = [&in0_val, &in1_val, &res_val] () {
unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
std::default_random_engine generator(seed);
std::uniform_int_distribution<int64_t> input(-0x1p36, 0x1p36);
std::for_each(in0_val.begin(), in0_val.end(),
[] (double& a){ a = 1.0;});
std::for_each(in1_val.begin(), in1_val.end(),
[&input, &generator] (double& a){ a = input(generator) * pow(2, -16);});
std::transform(in0_val.begin(), in0_val.end(), in1_val.begin(), res_val.begin(),
[] (double& a, double& b){ return a * b;});
};
fill_mul_data();
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
......@@ -1232,19 +1304,19 @@ TEST_F(FixedTensorTest, mul2fixed) {
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_mul2_fixed(0, in, out0.get());
test_fixedt_mul_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_mul2_fixed(1, in, out1.get());
test_fixedt_mul_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_mul2_fixed(2, in, out2.get());
test_fixedt_mul_fixed(2, in, out2.get());
});
});
......@@ -1258,12 +1330,15 @@ TEST_F(FixedTensorTest, mul2fixed) {
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, mulplain) {
TEST_F(FixedTensorTest, mulfixed_overflow) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {1.0, 1.0, 1.0, 1.0};
std::vector<float> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<float> res_val = {2.0, 2.0, 2.0, 2.0};
std::vector<size_t> shape = {1};
// result greater than 2^32 is overflow
// notice: multiplier larger than 2^20 may lead to error result
// as 2^l << 2^k [ stated in ABY3]
std::vector<double> in0_val = {0x1p16};
std::vector<double> in1_val = {0x1p16};
std::vector<double> res_val = {0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
......@@ -1271,33 +1346,29 @@ TEST_F(FixedTensorTest, mulplain) {
shape, _cpu_ctx).copy(in[0].get());
test_fixedt_gen_paddle_tensor<int64_t, 16>(in1_val,
shape, _cpu_ctx).copy(in[1].get());
//not copy scaling factor in copy funtion
dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
scaling_factor() = 16;
dynamic_cast<PaddleTensor<int64_t>*>(in[1].get())->
scaling_factor() = 16;
auto out0 = _s_tensor_factory->create<int64_t>(shape);
auto out1 = _s_tensor_factory->create<int64_t>(shape);
auto out2 = _s_tensor_factory->create<int64_t>(shape);
PaddleTensor<int64_t> result =
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_mul_plain(0, in, out0.get());
test_fixedt_mul_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_mul_plain(1, in, out1.get());
test_fixedt_mul_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_mul_plain(2, in, out2.get());
test_fixedt_mul_fixed(2, in, out2.get());
});
});
......@@ -1311,12 +1382,15 @@ TEST_F(FixedTensorTest, mulplain) {
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, divplain) {
TEST_F(FixedTensorTest, mulfixed_upper_bound) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {1.0, 1.0, 1.0, 1.0};
std::vector<float> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<float> res_val = {0.5, 0.5, 0.5, 0.5};
std::vector<size_t> shape = {1, 2};
// recommend each input less than 2^20
// larger than 2^20 may lead to error result
// as 2^l << 2^k [stated in ABY3]
std::vector<double> in0_val = {1.0, 1.0};
std::vector<double> in1_val = {0x1p20, -0x1p20};
std::vector<double> res_val = {0x1p20, -0x1p20};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
......@@ -1324,33 +1398,29 @@ TEST_F(FixedTensorTest, divplain) {
shape, _cpu_ctx).copy(in[0].get());
test_fixedt_gen_paddle_tensor<int64_t, 16>(in1_val,
shape, _cpu_ctx).copy(in[1].get());
//not copy scaling factor in copy funtion
dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
scaling_factor() = 16;
dynamic_cast<PaddleTensor<int64_t>*>(in[1].get())->
scaling_factor() = 16;
auto out0 = _s_tensor_factory->create<int64_t>(shape);
auto out1 = _s_tensor_factory->create<int64_t>(shape);
auto out2 = _s_tensor_factory->create<int64_t>(shape);
PaddleTensor<int64_t> result =
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_div_plain(0, in, out0.get());
test_fixedt_mul_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_div_plain(1, in, out1.get());
test_fixedt_mul_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_div_plain(2, in, out2.get());
test_fixedt_mul_fixed(2, in, out2.get());
});
});
......@@ -1364,45 +1434,42 @@ TEST_F(FixedTensorTest, divplain) {
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, sum) {
TEST_F(FixedTensorTest, mulfixed_low_bound) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {1.0, 1.0, 1.0, 1.0};
//std::vector<float> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<float> res_val = {4.0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
std::vector<size_t> shape = {1};
std::vector<double> in0_val = {1.0};
std::vector<double> in1_val = {0x1p-16};
std::vector<double> res_val = {0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
shape, _cpu_ctx).copy(in[0].get());
//not copy scaling factor in copy funtion
dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
scaling_factor() = 16;
std::vector<size_t> ret_shape = {1};
auto out0 = _s_tensor_factory->create<int64_t>(ret_shape);
auto out1 = _s_tensor_factory->create<int64_t>(ret_shape);
auto out2 = _s_tensor_factory->create<int64_t>(ret_shape);
test_fixedt_gen_paddle_tensor<int64_t, 16>(in1_val,
shape, _cpu_ctx).copy(in[1].get());
auto out0 = _s_tensor_factory->create<int64_t>(shape);
auto out1 = _s_tensor_factory->create<int64_t>(shape);
auto out2 = _s_tensor_factory->create<int64_t>(shape);
PaddleTensor<int64_t> result =
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, ret_shape, _cpu_ctx);
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_sum_fixed(0, in, out0.get());
test_fixedt_mul_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_sum_fixed(1, in, out1.get());
test_fixedt_mul_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_sum_fixed(2, in, out2.get());
test_fixedt_mul_fixed(2, in, out2.get());
});
});
......@@ -1416,12 +1483,12 @@ TEST_F(FixedTensorTest, sum) {
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, mat_mulfixed) {
TEST_F(FixedTensorTest, mulplain) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {1.0, 1.0, 1.0, 1.0};
std::vector<float> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<float> res_val = {4.0, 4.0, 4.0, 4.0};
std::vector<double> in0_val = {1.0, 1.0, 1.0, 1.0};
std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<double> res_val = {2.0, 2.0, 2.0, 2.0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
......@@ -1429,29 +1496,33 @@ TEST_F(FixedTensorTest, mat_mulfixed) {
shape, _cpu_ctx).copy(in[0].get());
test_fixedt_gen_paddle_tensor<int64_t, 16>(in1_val,
shape, _cpu_ctx).copy(in[1].get());
//not copy scaling factor in copy funtion
dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
scaling_factor() = 16;
dynamic_cast<PaddleTensor<int64_t>*>(in[1].get())->
scaling_factor() = 16;
auto out0 = _s_tensor_factory->create<int64_t>(shape);
auto out1 = _s_tensor_factory->create<int64_t>(shape);
auto out2 = _s_tensor_factory->create<int64_t>(shape);
PaddleTensor<int64_t> result =
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_mat_mul_fixed(0, in, out0.get());
test_fixedt_mul_plain(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_mat_mul_fixed(1, in, out1.get());
test_fixedt_mul_plain(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_mat_mul_fixed(2, in, out2.get());
test_fixedt_mul_plain(2, in, out2.get());
});
});
......@@ -1465,12 +1536,12 @@ TEST_F(FixedTensorTest, mat_mulfixed) {
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, mat_mulplain) {
TEST_F(FixedTensorTest, divplain) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {1.0, 1.0, 1.0, 1.0};
std::vector<float> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<float> res_val = {4.0, 4.0, 4.0, 4.0};
std::vector<double> in0_val = {1.0, 1.0, 1.0, 1.0};
std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<double> res_val = {0.5, 0.5, 0.5, 0.5};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
......@@ -1492,19 +1563,19 @@ TEST_F(FixedTensorTest, mat_mulplain) {
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_mat_mul_plain(0, in, out0.get());
test_fixedt_div_plain(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_mat_mul_plain(1, in, out1.get());
test_fixedt_div_plain(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_mat_mul_plain(2, in, out2.get());
test_fixedt_div_plain(2, in, out2.get());
});
});
......@@ -1518,12 +1589,12 @@ TEST_F(FixedTensorTest, mat_mulplain) {
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, dot_mul_fixed) {
TEST_F(FixedTensorTest, divfixed) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {1.0, 1.0, 1.0, 1.0};
std::vector<float> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<float> res_val = {8.0};
std::vector<double> in0_val = {1.0, 1.0, 1.0, 1.0};
std::vector<double> in1_val = {1.0, 10.0, 1000.0, 700.0};
std::vector<double> res_val = {1.0, 0.1, 0.001, 1.0 / 700};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
......@@ -1531,35 +1602,29 @@ TEST_F(FixedTensorTest, dot_mul_fixed) {
shape, _cpu_ctx).copy(in[0].get());
test_fixedt_gen_paddle_tensor<int64_t, 16>(in1_val,
shape, _cpu_ctx).copy(in[1].get());
//not copy scaling factor in copy funtion
dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
scaling_factor() = 16;
std::vector<size_t> ret_shape = {1};
auto out0 = _s_tensor_factory->create<int64_t>(ret_shape);
auto out1 = _s_tensor_factory->create<int64_t>(ret_shape);
auto out2 = _s_tensor_factory->create<int64_t>(ret_shape);
auto out0 = _s_tensor_factory->create<int64_t>(shape);
auto out1 = _s_tensor_factory->create<int64_t>(shape);
auto out2 = _s_tensor_factory->create<int64_t>(shape);
PaddleTensor<int64_t> result =
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, ret_shape, _cpu_ctx);
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_dot_mul_fixed(0, in, out0.get());
test_fixedt_div_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_dot_mul_fixed(1, in, out1.get());
test_fixedt_div_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_dot_mul_fixed(2, in, out2.get());
test_fixedt_div_fixed(2, in, out2.get());
});
});
......@@ -1570,15 +1635,16 @@ TEST_F(FixedTensorTest, dot_mul_fixed) {
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get()));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get()));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result, 0.2, true));
}
TEST_F(FixedTensorTest, dot_mul_plain) {
TEST_F(FixedTensorTest, divfixed_low_bound) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {1.0, 1.0, 1.0, 1.0};
std::vector<float> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<float> res_val = {8.0};
std::vector<size_t> shape = {1};
std::vector<double> in0_val = {1.0};
// divisor > 1/x0, default x0 = 2^-15
std::vector<double> in1_val = {0x1p15};
std::vector<double> res_val = {0x1p-15};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
......@@ -1586,37 +1652,29 @@ TEST_F(FixedTensorTest, dot_mul_plain) {
shape, _cpu_ctx).copy(in[0].get());
test_fixedt_gen_paddle_tensor<int64_t, 16>(in1_val,
shape, _cpu_ctx).copy(in[1].get());
//not copy scaling factor in copy funtion
dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
scaling_factor() = 16;
dynamic_cast<PaddleTensor<int64_t>*>(in[1].get())->
scaling_factor() = 16;
std::vector<size_t> ret_shape = {1};
auto out0 = _s_tensor_factory->create<int64_t>(ret_shape);
auto out1 = _s_tensor_factory->create<int64_t>(ret_shape);
auto out2 = _s_tensor_factory->create<int64_t>(ret_shape);
auto out0 = _s_tensor_factory->create<int64_t>(shape);
auto out1 = _s_tensor_factory->create<int64_t>(shape);
auto out2 = _s_tensor_factory->create<int64_t>(shape);
PaddleTensor<int64_t> result =
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, ret_shape, _cpu_ctx);
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_dot_mul_plain(0, in, out0.get());
test_fixedt_div_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_dot_mul_plain(1, in, out1.get());
test_fixedt_div_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_dot_mul_plain(2, in, out2.get());
test_fixedt_div_fixed(2, in, out2.get());
});
});
......@@ -1627,49 +1685,48 @@ TEST_F(FixedTensorTest, dot_mul_plain) {
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get()));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get()));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result, 0.0001, true));
}
TEST_F(FixedTensorTest, gt_plain) {
TEST_F(FixedTensorTest, sum) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {3.0, 3.0, 3.0, 3.0};
std::vector<float> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<float> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 1 / pow(2, 16), 1 / pow(2, 16)};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
std::vector<double> in0_val = {1.0, 1.0, 1.0, 1.0};
//std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<double> res_val = {4.0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
shape, _cpu_ctx).copy(in[0].get());
test_fixedt_gen_paddle_tensor<int64_t, 16>(in1_val,
shape, _cpu_ctx).copy(in[1].get());
//not copy scaling factor in copy funtion
dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
scaling_factor() = 16;
dynamic_cast<PaddleTensor<int64_t>*>(in[1].get())->
scaling_factor() = 16;
auto out0 = _s_tensor_factory->create<int64_t>(shape);
auto out1 = _s_tensor_factory->create<int64_t>(shape);
auto out2 = _s_tensor_factory->create<int64_t>(shape);
std::vector<size_t> ret_shape = {1};
auto out0 = _s_tensor_factory->create<int64_t>(ret_shape);
auto out1 = _s_tensor_factory->create<int64_t>(ret_shape);
auto out2 = _s_tensor_factory->create<int64_t>(ret_shape);
PaddleTensor<int64_t> result =
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, ret_shape, _cpu_ctx);
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_gt_plain(0, in, out0.get());
test_fixedt_sum_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_gt_plain(1, in, out1.get());
test_fixedt_sum_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_gt_plain(2, in, out2.get());
test_fixedt_sum_fixed(2, in, out2.get());
});
});
......@@ -1683,12 +1740,12 @@ TEST_F(FixedTensorTest, gt_plain) {
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, gt_fixed) {
TEST_F(FixedTensorTest, mat_mulplain) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {3.0, 3.0, 3.0, 3.0};
std::vector<float> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<float> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 1 / pow(2, 16), 1 / pow(2, 16)};
std::vector<double> in0_val = {1.0, 1.0, 1.0, 1.0};
std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<double> res_val = {4.0, 4.0, 4.0, 4.0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
......@@ -1710,19 +1767,19 @@ TEST_F(FixedTensorTest, gt_fixed) {
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_gt_fixed(0, in, out0.get());
test_fixedt_mat_mul_plain(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_gt_fixed(1, in, out1.get());
test_fixedt_mat_mul_plain(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_gt_fixed(2, in, out2.get());
test_fixedt_mat_mul_plain(2, in, out2.get());
});
});
......@@ -1736,12 +1793,12 @@ TEST_F(FixedTensorTest, gt_fixed) {
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, lt_plain) {
TEST_F(FixedTensorTest, dot_mul_fixed) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {2.0, 2.0, 3.0, 3.0};
std::vector<float> in1_val = {3.0, 3.0, 2.0, 2.0};
std::vector<float> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 0, 0};
std::vector<double> in0_val = {1.0, 1.0, 1.0, 1.0};
std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<double> res_val = {8.0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
......@@ -1752,30 +1809,32 @@ TEST_F(FixedTensorTest, lt_plain) {
//not copy scaling factor in copy funtion
dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
scaling_factor() = 16;
dynamic_cast<PaddleTensor<int64_t>*>(in[1].get())->
scaling_factor() = 16;
auto out0 = _s_tensor_factory->create<int64_t>(shape);
auto out1 = _s_tensor_factory->create<int64_t>(shape);
auto out2 = _s_tensor_factory->create<int64_t>(shape);
PaddleTensor<int64_t> result =
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
std::vector<size_t> ret_shape = {1};
auto out0 = _s_tensor_factory->create<int64_t>(ret_shape);
auto out1 = _s_tensor_factory->create<int64_t>(ret_shape);
auto out2 = _s_tensor_factory->create<int64_t>(ret_shape);
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_lt_plain(0, in, out0.get());
PaddleTensor<int64_t> result =
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, ret_shape, _cpu_ctx);
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_dot_mul_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_lt_plain(1, in, out1.get());
test_fixedt_dot_mul_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_lt_plain(2, in, out2.get());
test_fixedt_dot_mul_fixed(2, in, out2.get());
});
});
......@@ -1789,12 +1848,69 @@ TEST_F(FixedTensorTest, lt_plain) {
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, lt_fixed) {
TEST_F(FixedTensorTest, dot_mul_plain) {
std::vector<size_t> shape = {2, 2};
std::vector<double> in0_val = {1.0, 1.0, 1.0, 1.0};
std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<double> res_val = {8.0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
shape, _cpu_ctx).copy(in[0].get());
test_fixedt_gen_paddle_tensor<int64_t, 16>(in1_val,
shape, _cpu_ctx).copy(in[1].get());
//not copy scaling factor in copy funtion
dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
scaling_factor() = 16;
dynamic_cast<PaddleTensor<int64_t>*>(in[1].get())->
scaling_factor() = 16;
std::vector<size_t> ret_shape = {1};
auto out0 = _s_tensor_factory->create<int64_t>(ret_shape);
auto out1 = _s_tensor_factory->create<int64_t>(ret_shape);
auto out2 = _s_tensor_factory->create<int64_t>(ret_shape);
PaddleTensor<int64_t> result =
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, ret_shape, _cpu_ctx);
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_dot_mul_plain(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_dot_mul_plain(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_dot_mul_plain(2, in, out2.get());
});
});
_t[0].join();
_t[1].join();
_t[2].join();
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get()));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get()));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, gt_plain) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {2.0, 2.0, 3.0, 3.0};
std::vector<float> in1_val = {3.0, 3.0, 2.0, 2.0};
std::vector<float> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 0, 0};
std::vector<double> in0_val = {3.0, 3.0, 3.0, 3.0};
std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<double> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 1 / pow(2, 16), 1 / pow(2, 16)};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
......@@ -1816,19 +1932,19 @@ TEST_F(FixedTensorTest, lt_fixed) {
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_lt_fixed(0, in, out0.get());
test_fixedt_gt_plain(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_lt_fixed(1, in, out1.get());
test_fixedt_gt_plain(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_lt_fixed(2, in, out2.get());
test_fixedt_gt_plain(2, in, out2.get());
});
});
......@@ -1842,12 +1958,12 @@ TEST_F(FixedTensorTest, lt_fixed) {
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, leq_plain) {
TEST_F(FixedTensorTest, gt_fixed) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {2.0, 3.0, 3.0, 3.0};
std::vector<float> in1_val = {3.0, 3.0, 2.0, 2.0};
std::vector<float> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 0, 0};
std::vector<double> in0_val = {3.0, 3.0, 3.0, 3.0};
std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<double> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 1 / pow(2, 16), 1 / pow(2, 16)};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
......@@ -1869,19 +1985,19 @@ TEST_F(FixedTensorTest, leq_plain) {
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_leq_plain(0, in, out0.get());
test_fixedt_gt_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_leq_plain(1, in, out1.get());
test_fixedt_gt_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_leq_plain(2, in, out2.get());
test_fixedt_gt_fixed(2, in, out2.get());
});
});
......@@ -1895,12 +2011,12 @@ TEST_F(FixedTensorTest, leq_plain) {
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, leq_fixed) {
TEST_F(FixedTensorTest, lt_plain) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {2.0, 3.0, 3.0, 3.0};
std::vector<float> in1_val = {3.0, 3.0, 2.0, 2.0};
std::vector<float> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 0, 0};
std::vector<double> in0_val = {2.0, 2.0, 3.0, 3.0};
std::vector<double> in1_val = {3.0, 3.0, 2.0, 2.0};
std::vector<double> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 0, 0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
......@@ -1922,19 +2038,19 @@ TEST_F(FixedTensorTest, leq_fixed) {
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_leq_fixed(0, in, out0.get());
test_fixedt_lt_plain(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_leq_fixed(1, in, out1.get());
test_fixedt_lt_plain(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_leq_fixed(2, in, out2.get());
test_fixedt_lt_plain(2, in, out2.get());
});
});
......@@ -1948,12 +2064,12 @@ TEST_F(FixedTensorTest, leq_fixed) {
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, geq_plain) {
TEST_F(FixedTensorTest, lt_fixed) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {3.0, 3.0, 2.0, 2.0};
std::vector<float> in1_val = {2.0, 3.0, 3.0, 3.0};
std::vector<float> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 0, 0};
std::vector<double> in0_val = {2.0, 2.0, 3.0, 3.0};
std::vector<double> in1_val = {3.0, 3.0, 2.0, 2.0};
std::vector<double> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 0, 0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
......@@ -1975,19 +2091,19 @@ TEST_F(FixedTensorTest, geq_plain) {
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_geq_plain(0, in, out0.get());
test_fixedt_lt_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_geq_plain(1, in, out1.get());
test_fixedt_lt_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_geq_plain(2, in, out2.get());
test_fixedt_lt_fixed(2, in, out2.get());
});
});
......@@ -2001,12 +2117,12 @@ TEST_F(FixedTensorTest, geq_plain) {
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, geq_fixed) {
TEST_F(FixedTensorTest, leq_plain) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {3.0, 3.0, 2.0, 2.0};
std::vector<float> in1_val = {2.0, 3.0, 3.0, 3.0};
std::vector<float> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 0, 0};
std::vector<double> in0_val = {2.0, 3.0, 3.0, 3.0};
std::vector<double> in1_val = {3.0, 3.0, 2.0, 2.0};
std::vector<double> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 0, 0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
......@@ -2028,19 +2144,19 @@ TEST_F(FixedTensorTest, geq_fixed) {
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_geq_fixed(0, in, out0.get());
test_fixedt_leq_plain(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_geq_fixed(1, in, out1.get());
test_fixedt_leq_plain(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_geq_fixed(2, in, out2.get());
test_fixedt_leq_plain(2, in, out2.get());
});
});
......@@ -2054,12 +2170,12 @@ TEST_F(FixedTensorTest, geq_fixed) {
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, eq_plain) {
TEST_F(FixedTensorTest, leq_fixed) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {3.0, 3.0, 2.0, 3.0};
std::vector<float> in1_val = {3.0, 3.0, 3.0, 2.0};
std::vector<float> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 0, 0};
std::vector<double> in0_val = {2.0, 3.0, 3.0, 3.0};
std::vector<double> in1_val = {3.0, 3.0, 2.0, 2.0};
std::vector<double> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 0, 0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
......@@ -2081,19 +2197,19 @@ TEST_F(FixedTensorTest, eq_plain) {
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_eq_plain(0, in, out0.get());
test_fixedt_leq_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_eq_plain(1, in, out1.get());
test_fixedt_leq_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_eq_plain(2, in, out2.get());
test_fixedt_leq_fixed(2, in, out2.get());
});
});
......@@ -2107,12 +2223,12 @@ TEST_F(FixedTensorTest, eq_plain) {
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, eq_fixed) {
TEST_F(FixedTensorTest, geq_plain) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {3.0, 3.0, 2.0, 3.0};
std::vector<float> in1_val = {3.0, 3.0, 3.0, 2.0};
std::vector<float> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 0, 0};
std::vector<double> in0_val = {3.0, 3.0, 2.0, 2.0};
std::vector<double> in1_val = {2.0, 3.0, 3.0, 3.0};
std::vector<double> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 0, 0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
......@@ -2134,19 +2250,19 @@ TEST_F(FixedTensorTest, eq_fixed) {
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_eq_fixed(0, in, out0.get());
test_fixedt_geq_plain(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_eq_fixed(1, in, out1.get());
test_fixedt_geq_plain(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_eq_fixed(2, in, out2.get());
test_fixedt_geq_plain(2, in, out2.get());
});
});
......@@ -2160,12 +2276,12 @@ TEST_F(FixedTensorTest, eq_fixed) {
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, neq_plain) {
TEST_F(FixedTensorTest, geq_fixed) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {2.0, 3.0, 3.0, 3.0};
std::vector<float> in1_val = {3.0, 2.0, 3.0, 3.0};
std::vector<float> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 0, 0};
std::vector<double> in0_val = {3.0, 3.0, 2.0, 2.0};
std::vector<double> in1_val = {2.0, 3.0, 3.0, 3.0};
std::vector<double> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 0, 0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
......@@ -2187,19 +2303,19 @@ TEST_F(FixedTensorTest, neq_plain) {
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_neq_plain(0, in, out0.get());
test_fixedt_geq_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_neq_plain(1, in, out1.get());
test_fixedt_geq_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_neq_plain(2, in, out2.get());
test_fixedt_geq_fixed(2, in, out2.get());
});
});
......@@ -2213,12 +2329,12 @@ TEST_F(FixedTensorTest, neq_plain) {
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, neq_fixed) {
TEST_F(FixedTensorTest, eq_plain) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {3.0, 2.0, 3.0, 3.0};
std::vector<float> in1_val = {2.0, 3.0, 3.0, 3.0};
std::vector<float> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 0, 0};
std::vector<double> in0_val = {3.0, 3.0, 2.0, 3.0};
std::vector<double> in1_val = {3.0, 3.0, 3.0, 2.0};
std::vector<double> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 0, 0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
......@@ -2240,19 +2356,19 @@ TEST_F(FixedTensorTest, neq_fixed) {
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_neq_fixed(0, in, out0.get());
test_fixedt_eq_plain(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_neq_fixed(1, in, out1.get());
test_fixedt_eq_plain(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_neq_fixed(2, in, out2.get());
test_fixedt_eq_plain(2, in, out2.get());
});
});
......@@ -2266,42 +2382,46 @@ TEST_F(FixedTensorTest, neq_fixed) {
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, exp_fixed) {
TEST_F(FixedTensorTest, eq_fixed) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {0.0, 0.0, 1.0, 1.0};
//std::vector<float> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<float> res_val = {1.0, 1.0, 2.7183, 2.7183};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
std::vector<double> in0_val = {3.0, 3.0, 2.0, 3.0};
std::vector<double> in1_val = {3.0, 3.0, 3.0, 2.0};
std::vector<double> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 0, 0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
shape, _cpu_ctx).copy(in[0].get());
test_fixedt_gen_paddle_tensor<int64_t, 16>(in1_val,
shape, _cpu_ctx).copy(in[1].get());
//not copy scaling factor in copy funtion
dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
scaling_factor() = 16;
dynamic_cast<PaddleTensor<int64_t>*>(in[1].get())->
scaling_factor() = 16;
auto out0 = _s_tensor_factory->create<int64_t>(shape);
auto out1 = _s_tensor_factory->create<int64_t>(shape);
auto out2 = _s_tensor_factory->create<int64_t>(shape);
PaddleTensor<int64_t> result =
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_exp_fixed(0, in, out0.get());
test_fixedt_eq_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_exp_fixed(1, in, out1.get());
test_fixedt_eq_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_exp_fixed(2, in, out2.get());
test_fixedt_eq_fixed(2, in, out2.get());
});
});
......@@ -2310,47 +2430,51 @@ TEST_F(FixedTensorTest, exp_fixed) {
_t[1].join();
_t[2].join();
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get(), 0.1));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get(), 0.1));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result, 0.1));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get()));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get()));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, polynomial) {
// y = 1 + x
TEST_F(FixedTensorTest, neq_plain) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {-1.0, 2.0, 2.0, 2.0};
//std::vector<float> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<float> res_val = {0.0, 3.0, 3.0, 3.0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
std::vector<double> in0_val = {2.0, 3.0, 3.0, 3.0};
std::vector<double> in1_val = {3.0, 2.0, 3.0, 3.0};
std::vector<double> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 0, 0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
shape, _cpu_ctx).copy(in[0].get());
test_fixedt_gen_paddle_tensor<int64_t, 16>(in1_val,
shape, _cpu_ctx).copy(in[1].get());
//not copy scaling factor in copy funtion
dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
scaling_factor() = 16;
dynamic_cast<PaddleTensor<int64_t>*>(in[1].get())->
scaling_factor() = 16;
auto out0 = _s_tensor_factory->create<int64_t>(shape);
auto out1 = _s_tensor_factory->create<int64_t>(shape);
auto out2 = _s_tensor_factory->create<int64_t>(shape);
PaddleTensor<int64_t> result =
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_poly_fixed(0, in, out0.get());
test_fixedt_neq_plain(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_poly_fixed(1, in, out1.get());
test_fixedt_neq_plain(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_poly_fixed(2, in, out2.get());
test_fixedt_neq_plain(2, in, out2.get());
});
});
......@@ -2364,43 +2488,46 @@ TEST_F(FixedTensorTest, polynomial) {
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, polynomial_wise) {
// y = x + 1 (x >= 0)
// y = 1 (x < 0)
TEST_F(FixedTensorTest, neq_fixed) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {-1.0, 1.0, 2.0, 2.0};
//std::vector<float> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<float> res_val = {1.0, 2.0, 3.0, 3.0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
std::vector<double> in0_val = {3.0, 2.0, 3.0, 3.0};
std::vector<double> in1_val = {2.0, 3.0, 3.0, 3.0};
std::vector<double> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 0, 0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
{gen(shape), gen(shape)};
test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
shape, _cpu_ctx).copy(in[0].get());
test_fixedt_gen_paddle_tensor<int64_t, 16>(in1_val,
shape, _cpu_ctx).copy(in[1].get());
//not copy scaling factor in copy funtion
dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
scaling_factor() = 16;
dynamic_cast<PaddleTensor<int64_t>*>(in[1].get())->
scaling_factor() = 16;
auto out0 = _s_tensor_factory->create<int64_t>(shape);
auto out1 = _s_tensor_factory->create<int64_t>(shape);
auto out2 = _s_tensor_factory->create<int64_t>(shape);
PaddleTensor<int64_t> result =
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_poly_wise_fixed(0, in, out0.get());
test_fixedt_neq_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_poly_wise_fixed(1, in, out1.get());
test_fixedt_neq_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_poly_wise_fixed(2, in, out2.get());
test_fixedt_neq_fixed(2, in, out2.get());
});
});
......@@ -2414,12 +2541,11 @@ TEST_F(FixedTensorTest, polynomial_wise) {
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, relu) {
TEST_F(FixedTensorTest, exp_fixed) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {1.0, -1.0, -2, 2};
//std::vector<float> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<float> res_val = {1.0, 0.0, 0.0, 2};
std::vector<double> in0_val = {0.0, 0.0, 1.0, 1.0};
std::vector<double> res_val = {1.0, 1.0, 2.71828, 2.71828};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
......@@ -2437,19 +2563,19 @@ TEST_F(FixedTensorTest, relu) {
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_relu_fixed(0, in, out0.get());
test_fixedt_exp_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_relu_fixed(1, in, out1.get());
test_fixedt_exp_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_relu_fixed(2, in, out2.get());
test_fixedt_exp_fixed(2, in, out2.get());
});
});
......@@ -2458,17 +2584,19 @@ TEST_F(FixedTensorTest, relu) {
_t[1].join();
_t[2].join();
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get()));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get()));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get(), 0.01, true));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get(), 0.01, true));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result, 0.01, true));
}
TEST_F(FixedTensorTest, softmax) {
TEST_F(FixedTensorTest, exp_fixed_low_bound) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {1.0, 1.0, 1, 1};
//std::vector<float> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<float> res_val = {0.5, 0.5, 0.5, 0.5};
std::vector<size_t> shape = {1, 3};
// exp approximate: exp(x) = \lim_{n->inf} (1+x/n)^n
// where n = 2^ite = 256, therefore, exp(-512) = exp(0),
// exp(-511) = exp(-1), exp(-256) = 0
std::vector<double> in0_val = {-512, -511, -256};
std::vector<double> res_val = {1, 0.367879, 0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
......@@ -2486,19 +2614,19 @@ TEST_F(FixedTensorTest, softmax) {
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_softmax_fixed(0, in, out0.get());
test_fixedt_exp_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_softmax_fixed(1, in, out1.get());
test_fixedt_exp_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_softmax_fixed(2, in, out2.get());
test_fixedt_exp_fixed(2, in, out2.get());
});
});
......@@ -2507,17 +2635,16 @@ TEST_F(FixedTensorTest, softmax) {
_t[1].join();
_t[2].join();
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get()));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get()));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get(), 0.01, true));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get(), 0.01, true));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result, 0.01, true));
}
TEST_F(FixedTensorTest, sigmoid) {
std::vector<size_t> shape = {2, 2};
std::vector<float> in0_val = {0.0, 0.0, -0.5, 0.5};
//std::vector<float> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<float> res_val = {0.5, 0.5, 0.3775, 0.6225};
TEST_F(FixedTensorTest, exp_fixed_upper_bound) {
std::vector<size_t> shape = {1};
// input large than 15 may get error result because of multiplication error
std::vector<double> in0_val = {15};
std::vector<double> res_val = {3269017.37};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
......@@ -2535,19 +2662,19 @@ TEST_F(FixedTensorTest, sigmoid) {
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_sigmoid_fixed(0, in, out0.get());
test_fixedt_exp_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_sigmoid_fixed(1, in, out1.get());
test_fixedt_exp_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_sigmoid_fixed(2, in, out2.get());
test_fixedt_exp_fixed(2, in, out2.get());
});
});
......@@ -2556,9 +2683,758 @@ TEST_F(FixedTensorTest, sigmoid) {
_t[1].join();
_t[2].join();
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get(), 0.1));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get(), 0.1));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result, 0.1));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get(), 0.4, true));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get(), 0.4, true));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result, 0.4, true));
}
TEST_F(FixedTensorTest, polynomial) {
// y = 1 + x
std::vector<size_t> shape = {2, 2};
std::vector<double> in0_val = {-1.0, 2.0, 2.0, 2.0};
//std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<double> res_val = {0.0, 3.0, 3.0, 3.0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
shape, _cpu_ctx).copy(in[0].get());
//not copy scaling factor in copy funtion
dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
scaling_factor() = 16;
auto out0 = _s_tensor_factory->create<int64_t>(shape);
auto out1 = _s_tensor_factory->create<int64_t>(shape);
auto out2 = _s_tensor_factory->create<int64_t>(shape);
PaddleTensor<int64_t> result =
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_poly_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_poly_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_poly_fixed(2, in, out2.get());
});
});
_t[0].join();
_t[1].join();
_t[2].join();
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get()));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get()));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, polynomial_wise) {
// y = x + 1 (x >= 0)
// y = 1 (x < 0)
std::vector<size_t> shape = {2, 2};
std::vector<double> in0_val = {-1.0, 1.0, 2.0, 2.0};
//std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<double> res_val = {1.0, 2.0, 3.0, 3.0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
shape, _cpu_ctx).copy(in[0].get());
//not copy scaling factor in copy funtion
dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
scaling_factor() = 16;
auto out0 = _s_tensor_factory->create<int64_t>(shape);
auto out1 = _s_tensor_factory->create<int64_t>(shape);
auto out2 = _s_tensor_factory->create<int64_t>(shape);
PaddleTensor<int64_t> result =
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_poly_wise_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_poly_wise_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_poly_wise_fixed(2, in, out2.get());
});
});
_t[0].join();
_t[1].join();
_t[2].join();
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get()));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get()));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, relu) {
std::vector<size_t> shape = {2, 2};
std::vector<double> in0_val = {1.0, -1.0, -2, 2};
//std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<double> res_val = {1.0, 0.0, 0.0, 2};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
shape, _cpu_ctx).copy(in[0].get());
//not copy scaling factor in copy funtion
dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
scaling_factor() = 16;
auto out0 = _s_tensor_factory->create<int64_t>(shape);
auto out1 = _s_tensor_factory->create<int64_t>(shape);
auto out2 = _s_tensor_factory->create<int64_t>(shape);
PaddleTensor<int64_t> result =
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_relu_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_relu_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_relu_fixed(2, in, out2.get());
});
});
_t[0].join();
_t[1].join();
_t[2].join();
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get()));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get()));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, relu_low_bound) {
std::vector<size_t> shape = {1};
std::vector<double> in0_val = {-0x1p-20};
std::vector<double> res_val = {0.0};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
shape, _cpu_ctx).copy(in[0].get());
//not copy scaling factor in copy funtion
dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
scaling_factor() = 16;
auto out0 = _s_tensor_factory->create<int64_t>(shape);
auto out1 = _s_tensor_factory->create<int64_t>(shape);
auto out2 = _s_tensor_factory->create<int64_t>(shape);
PaddleTensor<int64_t> result =
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_relu_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_relu_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_relu_fixed(2, in, out2.get());
});
});
_t[0].join();
_t[1].join();
_t[2].join();
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get()));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get()));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, relu_upper_bound) {
std::vector<size_t> shape = {1};
std::vector<double> in0_val = {0x1p20};
std::vector<double> res_val = {0x1p20};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
shape, _cpu_ctx).copy(in[0].get());
//not copy scaling factor in copy funtion
dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
scaling_factor() = 16;
auto out0 = _s_tensor_factory->create<int64_t>(shape);
auto out1 = _s_tensor_factory->create<int64_t>(shape);
auto out2 = _s_tensor_factory->create<int64_t>(shape);
PaddleTensor<int64_t> result =
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_relu_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_relu_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_relu_fixed(2, in, out2.get());
});
});
_t[0].join();
_t[1].join();
_t[2].join();
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get()));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get()));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, relu2) {
std::vector<size_t> shape = {2, 2};
std::vector<double> in0_val = {1.0, -1.0, -2, 2};
//std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<double> res_val = {1.0, 0.0, 0.0, 2};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
shape, _cpu_ctx).copy(in[0].get());
//not copy scaling factor in copy funtion
dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
scaling_factor() = 16;
auto out0 = _s_tensor_factory->create<int64_t>(shape);
auto out1 = _s_tensor_factory->create<int64_t>(shape);
auto out2 = _s_tensor_factory->create<int64_t>(shape);
PaddleTensor<int64_t> result =
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_relu2_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_relu2_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_relu2_fixed(2, in, out2.get());
});
});
_t[0].join();
_t[1].join();
_t[2].join();
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get()));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get()));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
}
TEST_F(FixedTensorTest, softmax) {
std::vector<size_t> shape = {2, 2};
std::vector<double> in0_val = {1.0, 1.0, 1, 1};
//std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
std::vector<double> res_val = {0.5, 0.5, 0.5, 0.5};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
shape, _cpu_ctx).copy(in[0].get());
//not copy scaling factor in copy funtion
dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
scaling_factor() = 16;
auto out0 = _s_tensor_factory->create<int64_t>(shape);
auto out1 = _s_tensor_factory->create<int64_t>(shape);
auto out2 = _s_tensor_factory->create<int64_t>(shape);
PaddleTensor<int64_t> result =
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_softmax_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_softmax_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_softmax_fixed(2, in, out2.get());
});
});
_t[0].join();
_t[1].join();
_t[2].join();
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get(), 0.1));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get(), 0.1));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result, 0.1));
}
TEST_F(FixedTensorTest, sigmoid_chebyshev) {
std::vector<size_t> shape = {2, 2};
// larger error when input < -3 or >4
std::vector<double> in0_val = {1.0, 2.0, -3.0, 4.0};
std::vector<double> res_val = {0.73105, 0.88079, 0.0474, 0.9820};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
shape, _cpu_ctx).copy(in[0].get());
//not copy scaling factor in copy funtion
dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
scaling_factor() = 16;
auto out0 = _s_tensor_factory->create<int64_t>(shape);
auto out1 = _s_tensor_factory->create<int64_t>(shape);
auto out2 = _s_tensor_factory->create<int64_t>(shape);
PaddleTensor<int64_t> result =
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_sigmoid_chebyshev_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_sigmoid_chebyshev_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_sigmoid_chebyshev_fixed(2, in, out2.get());
});
});
_t[0].join();
_t[1].join();
_t[2].join();
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get(), 0.03));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get(), 0.03));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result, 0.03));
}
TEST_F(FixedTensorTest, sigmoid) {
std::vector<size_t> shape = {2, 2};
std::vector<double> in0_val = {0.0, 3, 7, 0.5};
std::vector<double> res_val = {0.5, 0.9525, 0.999, 0.6225};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
shape, _cpu_ctx).copy(in[0].get());
//not copy scaling factor in copy funtion
dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
scaling_factor() = 16;
auto out0 = _s_tensor_factory->create<int64_t>(shape);
auto out1 = _s_tensor_factory->create<int64_t>(shape);
auto out2 = _s_tensor_factory->create<int64_t>(shape);
PaddleTensor<int64_t> result =
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_sigmoid_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_sigmoid_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_sigmoid_fixed(2, in, out2.get());
});
});
_t[0].join();
_t[1].join();
_t[2].join();
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get(), 0.08));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get(), 0.08));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result, 0.08));
}
TEST_F(FixedTensorTest, sigmoid_enhanced) {
std::vector<size_t> shape = {2, 2};
std::vector<double> in0_val = {0.0, 3, 7, 0.5};
std::vector<double> res_val = {0.5, 0.9525, 0.999, 0.6225};
std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
shape, _cpu_ctx).copy(in[0].get());
//not copy scaling factor in copy funtion
dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
scaling_factor() = 16;
auto out0 = _s_tensor_factory->create<int64_t>(shape);
auto out1 = _s_tensor_factory->create<int64_t>(shape);
auto out2 = _s_tensor_factory->create<int64_t>(shape);
PaddleTensor<int64_t> result =
test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
_t[0] = std::thread([this, in, out0]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
test_fixedt_sigmoid_enhanced_fixed(0, in, out0.get());
});
});
_t[1] = std::thread([this, in, out1]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
test_fixedt_sigmoid_enhanced_fixed(1, in, out1.get());
});
});
_t[2] = std::thread([this, in, out2]() mutable {
g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
test_fixedt_sigmoid_enhanced_fixed(2, in, out2.get());
});
});
_t[0].join();
_t[1].join();
_t[2].join();
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get(), 0.08));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get(), 0.08));
EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result, 0.08));
}
TEST_F(FixedTensorTest, max_test) {
std::vector<size_t> shape = { 1 };
std::shared_ptr<TensorAdapter<int64_t>> sl[3] = { gen(shape), gen(shape), gen(shape) };
std::shared_ptr<TensorAdapter<int64_t>> sr[3] = { gen(shape), gen(shape), gen(shape) };
std::shared_ptr<TensorAdapter<int64_t>> sout[6] = { gen(shape), gen(shape), gen(shape),
gen(shape), gen(shape), gen(shape)};
std::shared_ptr<TensorAdapter<int64_t>> sbout[6] = {
gen(shape), gen(shape), gen(shape), gen(shape), gen(shape), gen(shape)};
// lhs = 6 = 1 + 2 + 3
sl[0]->data()[0] = 1;
sl[1]->data()[0] = 2;
sl[2]->data()[0] = 3;
// rhs = 15 = 4 + 5 + 6
sr[0]->data()[0] = 4;
sr[1]->data()[0] = 5;
sr[2]->data()[0] = 6;
Fix64N16 fl0(sl[0].get(), sl[1].get());
Fix64N16 fl1(sl[1].get(), sl[2].get());
Fix64N16 fl2(sl[2].get(), sl[0].get());
Fix64N16 fr0(sr[0].get(), sr[1].get());
Fix64N16 fr1(sr[1].get(), sr[2].get());
Fix64N16 fr2(sr[2].get(), sr[0].get());
Fix64N16 fout0(sout[0].get(), sout[1].get());
Fix64N16 fout1(sout[2].get(), sout[3].get());
Fix64N16 fout2(sout[4].get(), sout[5].get());
BooleanTensor<int64_t> bout0(sbout[0].get(), sbout[1].get());
BooleanTensor<int64_t> bout1(sbout[2].get(), sbout[3].get());
BooleanTensor<int64_t> bout2(sbout[4].get(), sbout[5].get());
auto p = gen(shape);
auto pb = gen(shape);
_t[0] = std::thread(
[&] () {
g_ctx_holder::template run_with_context(
_exec_ctx.get(), _mpc_ctx[0], [&](){
fl0.max(&fr0, &fout0, &bout0);
fout0.reveal_to_one(0, p.get());
bout0.reveal_to_one(0, pb.get());
});
}
);
_t[1] = std::thread(
[&] () {
g_ctx_holder::template run_with_context(
_exec_ctx.get(), _mpc_ctx[1], [&](){
fl1.max(&fr1, &fout1, &bout1);
fout1.reveal_to_one(0, nullptr);
bout1.reveal_to_one(0, nullptr);
});
}
);
_t[2] = std::thread(
[&] () {
g_ctx_holder::template run_with_context(
_exec_ctx.get(), _mpc_ctx[2], [&](){
fl2.max(&fr2, &fout2, &bout2);
fout2.reveal_to_one(0, nullptr);
bout2.reveal_to_one(0, nullptr);
});
}
);
for (auto &t: _t) {
t.join();
}
EXPECT_EQ(std::max(6, 15), p->data()[0]);
EXPECT_EQ(1, pb->data()[0]);
}
TEST_F(FixedTensorTest, max_test2) {
std::vector<size_t> shape = { 1 };
std::shared_ptr<TensorAdapter<int64_t>> sl[3] = { gen(shape), gen(shape), gen(shape) };
std::shared_ptr<TensorAdapter<int64_t>> sout[6] = { gen(shape), gen(shape), gen(shape),
gen(shape), gen(shape), gen(shape)};
// lhs = 6 = 1 + 2 + 3
sl[0]->data()[0] = 1 << 16;
sl[1]->data()[0] = 2 << 16;
sl[2]->data()[0] = 3 << 16;
auto pr = gen(shape);
// rhs = 15
pr->data()[0] = 15 << 16;
pr->scaling_factor() = 16;
Fix64N16 fl0(sl[0].get(), sl[1].get());
Fix64N16 fl1(sl[1].get(), sl[2].get());
Fix64N16 fl2(sl[2].get(), sl[0].get());
Fix64N16 fout0(sout[0].get(), sout[1].get());
Fix64N16 fout1(sout[2].get(), sout[3].get());
Fix64N16 fout2(sout[4].get(), sout[5].get());
auto p = gen(shape);
_t[0] = std::thread(
[&] () {
g_ctx_holder::template run_with_context(
_exec_ctx.get(), _mpc_ctx[0], [&](){
fl0.max(pr.get(), &fout0);
fout0.reveal_to_one(0, p.get());
});
}
);
_t[1] = std::thread(
[&] () {
g_ctx_holder::template run_with_context(
_exec_ctx.get(), _mpc_ctx[1], [&](){
fl1.max(pr.get(), &fout1);
fout1.reveal_to_one(0, nullptr);
});
}
);
_t[2] = std::thread(
[&] () {
g_ctx_holder::template run_with_context(
_exec_ctx.get(), _mpc_ctx[2], [&](){
fl2.max(pr.get(), &fout2);
fout2.reveal_to_one(0, nullptr);
});
}
);
for (auto &t: _t) {
t.join();
}
EXPECT_EQ(std::max(6, 15), p->data()[0] >> 16);
}
TEST_F(FixedTensorTest, max_pooling_test) {
std::vector<size_t> shape = { 4, 1 };
std::vector<size_t> shape_ = { 1, 1 };
std::shared_ptr<TensorAdapter<int64_t>> sl[3] = { gen(shape), gen(shape), gen(shape) };
std::shared_ptr<TensorAdapter<int64_t>> sfout[6] = {
gen(shape_), gen(shape_), gen(shape_), gen(shape_), gen(shape_), gen(shape_)};
std::shared_ptr<TensorAdapter<int64_t>> sbout[6] = {
gen(shape), gen(shape), gen(shape), gen(shape), gen(shape), gen(shape)};
assign_to_tensor(sl[1].get(), 0l);
assign_to_tensor(sl[2].get(), 0l);
sl[0]->data()[0] = 2;
sl[0]->data()[1] = 1;
sl[0]->data()[2] = 4;
sl[0]->data()[3] = 3;
// input [2 1 4 3]
auto pmax = gen(shape_);
auto ppos = gen(shape);
Fix64N16 fl0(sl[0].get(), sl[1].get());
Fix64N16 fl1(sl[1].get(), sl[2].get());
Fix64N16 fl2(sl[2].get(), sl[0].get());
Fix64N16 fout0(sfout[0].get(), sfout[1].get());
Fix64N16 fout1(sfout[2].get(), sfout[3].get());
Fix64N16 fout2(sfout[4].get(), sfout[5].get());
BooleanTensor<int64_t> bout0(sbout[0].get(), sbout[1].get());
BooleanTensor<int64_t> bout1(sbout[2].get(), sbout[3].get());
BooleanTensor<int64_t> bout2(sbout[4].get(), sbout[5].get());
_t[0] = std::thread(
[&] () {
g_ctx_holder::template run_with_context(
_exec_ctx.get(), _mpc_ctx[0], [&](){
fl0.max_pooling(&fout0, &bout0);
fout0.reveal_to_one(0, pmax.get());
bout0.reveal_to_one(0, ppos.get());
});
}
);
_t[1] = std::thread(
[&] () {
g_ctx_holder::template run_with_context(
_exec_ctx.get(), _mpc_ctx[1], [&](){
fl1.max_pooling(&fout1, &bout1);
fout1.reveal_to_one(0, nullptr);
bout1.reveal_to_one(0, nullptr);
});
}
);
_t[2] = std::thread(
[&] () {
g_ctx_holder::template run_with_context(
_exec_ctx.get(), _mpc_ctx[2], [&](){
fl2.max_pooling(&fout2, &bout2);
fout2.reveal_to_one(0, nullptr);
bout2.reveal_to_one(0, nullptr);
});
}
);
for (auto &t: _t) {
t.join();
}
EXPECT_EQ(4, pmax->data()[0]);
EXPECT_EQ(0, ppos->data()[0]);
EXPECT_EQ(0, ppos->data()[1]);
EXPECT_EQ(1, ppos->data()[2]);
EXPECT_EQ(0, ppos->data()[3]);
}
TEST_F(FixedTensorTest, inv_sqrt_test) {
std::vector<size_t> shape = { 1 };
std::shared_ptr<TensorAdapter<int64_t>> sl[3] = { gen(shape), gen(shape), gen(shape) };
std::shared_ptr<TensorAdapter<int64_t>> sfout[6] = {
gen(shape), gen(shape), gen(shape), gen(shape), gen(shape), gen(shape)};
sl[0]->data()[0] = 0x4p16;
sl[1]->data()[0] = 0;
sl[2]->data()[0] = 0;
// input [4]
auto p = gen(shape);
Fix64N16 fl0(sl[0].get(), sl[1].get());
Fix64N16 fl1(sl[1].get(), sl[2].get());
Fix64N16 fl2(sl[2].get(), sl[0].get());
Fix64N16 fout0(sfout[0].get(), sfout[1].get());
Fix64N16 fout1(sfout[2].get(), sfout[3].get());
Fix64N16 fout2(sfout[4].get(), sfout[5].get());
_t[0] = std::thread(
[&] () {
g_ctx_holder::template run_with_context(
_exec_ctx.get(), _mpc_ctx[0], [&](){
fl0.inverse_square_root(&fout0);
fout0.reveal_to_one(0, p.get());
});
}
);
_t[1] = std::thread(
[&] () {
g_ctx_holder::template run_with_context(
_exec_ctx.get(), _mpc_ctx[1], [&](){
fl1.inverse_square_root(&fout1);
fout1.reveal_to_one(0, nullptr);
});
}
);
_t[2] = std::thread(
[&] () {
g_ctx_holder::template run_with_context(
_exec_ctx.get(), _mpc_ctx[2], [&](){
fl2.inverse_square_root(&fout2);
fout2.reveal_to_one(0, nullptr);
});
}
);
for (auto &t: _t) {
t.join();
}
// inv_sqrt(4) = 1/2
EXPECT_NEAR(0.5, p->data()[0] / 0x1p16f, 2 / 0x1p16f);
}
} // namespace aby3
......@@ -20,29 +20,19 @@
#include "prng_utils.h"
namespace aby3 {
template <typename T, size_t N> class FixedPointUtil {
template<typename T, size_t N>
class FixedPointUtil {
public:
static double reveal(T *shares[3]) {
// reveal
static double reveal(T* shares[3]) {
//reveal
T sum = *shares[0] + *shares[1] + *shares[2];
// to double
int neg = sum < 0 ? -1 : 1;
sum = sum * neg;
T high = sum >> N;
T low = sum & (((T)1 << N) - 1);
double ret = high + low / pow(2, N);
return neg * ret;
return sum / pow(2, N);
}
static void share(double input, T *ret[3]) {
// to int
int neg = input < 0 ? -1 : 1;
double val = input * neg;
T high = val;
double low = val - high;
T ll_in = ((T)high << N) + (T)(low * pow(2, N));
ll_in *= neg;
// share
static void share(double input, T* ret[3]) {
T ll_in = (T) (input * pow(2, N));
//share
*ret[0] = _s_prng.get<T>();
*ret[1] = _s_prng.get<T>();
*ret[2] = ll_in - *ret[0] - *ret[1];
......@@ -51,8 +41,7 @@ public:
static PseudorandomNumberGenerator _s_prng;
};
template <typename T, size_t N>
PseudorandomNumberGenerator
FixedPointUtil<T, N>::_s_prng(block_from_dev_urandom());
template<typename T, size_t N>
PseudorandomNumberGenerator FixedPointUtil<T, N>::_s_prng(block_from_dev_urandom());
} // namespace aby3
} //namespace aby3
......@@ -14,7 +14,7 @@
#include "fixedpoint_util.h"
#include "gtest/gtest.h"
// test
namespace aby3 {
TEST(FixedPointUtil, int64_test) {
......
......@@ -19,76 +19,81 @@
namespace aby3 {
template <typename T> class TensorAdapter {
template <typename T>
class TensorAdapter {
public:
TensorAdapter() = default;
virtual ~TensorAdapter() = default;
virtual T *data() = 0;
virtual T* data() = 0;
virtual const T *data() const = 0;
virtual const T* data() const = 0;
virtual std::vector<size_t> shape() const = 0;
virtual void reshape(const std::vector<size_t> &shape) = 0;
virtual void reshape(const std::vector<size_t>& shape) = 0;
virtual size_t numel() const = 0;
virtual void copy(TensorAdapter *ret) const {
virtual void copy(TensorAdapter* ret) const {
// TODO: check shape equals
std::copy(data(), data() + numel(), ret->data());
}
// element wise op, need operands' dim are same
virtual void add(const TensorAdapter *rhs, TensorAdapter *ret) const = 0;
virtual void add(const TensorAdapter* rhs, TensorAdapter* ret) const = 0;
// element wise op, need operands' dim are same
virtual void sub(const TensorAdapter *rhs, TensorAdapter *ret) const = 0;
virtual void sub(const TensorAdapter* rhs, TensorAdapter* ret) const = 0;
virtual void negative(TensorAdapter *ret) const = 0;
virtual void negative(TensorAdapter* ret) const = 0;
// element wise op, need operands' dim are same
virtual void mul(const TensorAdapter *rhs, TensorAdapter *ret) const = 0;
virtual void mul(const TensorAdapter* rhs, TensorAdapter* ret) const = 0;
// element wise op, need operands' dim are same
virtual void div(const TensorAdapter *rhs, TensorAdapter *ret) const = 0;
virtual void div(const TensorAdapter* rhs, TensorAdapter* ret) const = 0;
// 2d matrix muliply, need operands' rank are 2
virtual void mat_mul(const TensorAdapter *rhs, TensorAdapter *ret) const = 0;
virtual void mat_mul(const TensorAdapter* rhs, TensorAdapter* ret) const = 0;
// element wise op, need operands' dim are same
virtual void bitwise_xor(const TensorAdapter *rhs,
TensorAdapter *ret) const = 0;
virtual void bitwise_xor(const TensorAdapter* rhs, TensorAdapter* ret) const = 0;
// element wise op, need operands' dim are same
virtual void bitwise_and(const TensorAdapter *rhs,
TensorAdapter *ret) const = 0;
virtual void bitwise_and(const TensorAdapter* rhs, TensorAdapter* ret) const = 0;
// element wise op, need operands' dim are same
virtual void bitwise_or(const TensorAdapter *rhs,
TensorAdapter *ret) const = 0;
virtual void bitwise_or(const TensorAdapter* rhs, TensorAdapter* ret) const = 0;
// element wise op, need operands' dim are same
virtual void bitwise_not(TensorAdapter *ret) const = 0;
virtual void bitwise_not(TensorAdapter* ret) const = 0;
virtual void lshift(size_t rhs, TensorAdapter *ret) const = 0;
virtual void lshift(size_t rhs, TensorAdapter* ret) const = 0;
virtual void rshift(size_t rhs, TensorAdapter *ret) const = 0;
virtual void rshift(size_t rhs, TensorAdapter* ret) const = 0;
virtual void logical_rshift(size_t rhs, TensorAdapter *ret) const = 0;
virtual void logical_rshift(size_t rhs, TensorAdapter* ret) const = 0;
// when using an integer type T as fixed-point number
// value of T val is interpreted as val / 2 ^ scaling_factor()
virtual size_t scaling_factor() const = 0;
virtual size_t &scaling_factor() = 0;
virtual size_t& scaling_factor() = 0;
// slice by shape[0]
// e.g. x.shape = [ 2, 3, 4]
// x.slice(1, 2, y)
// y.shape = [ 1, 3, 4]
virtual void slice(size_t begin_idx, size_t end_idx,
TensorAdapter *out) const = 0;
virtual void slice(size_t begin_idx, size_t end_idx, TensorAdapter* out) const = 0;
};
template<typename T>
inline void assign_to_tensor(TensorAdapter<T>* input, T assign_num) {
std::transform(input->data(), input->data() + input->numel(),
input->data(), [assign_num](T) { return assign_num; });
}
} // namespace aby3
add_compile_options(-msse4.2 -maes)
set(PSI_SRCS
"./aes.cc"
"./cuckoo_hash.cc"
......@@ -16,7 +14,11 @@ add_dependencies(psi_o crypto)
add_library(psi SHARED $<TARGET_OBJECTS:psi_o>)
target_link_libraries(psi crypto)
if (USE_OPENMP)
target_link_libraries(psi OpenMP::OpenMP_CXX OpenMP::OpenMP_C crypto)
else()
target_link_libraries(psi crypto)
endif (USE_OPENMP)
cc_test(aes_test SRCS aes_test.cc DEPS psi)
cc_test(ot_test SRCS ot_test.cc DEPS psi)
......
......@@ -14,10 +14,13 @@
#include "aes.h"
#ifdef USE_AES_NI
#include <wmmintrin.h>
#endif
namespace psi {
#ifdef USE_AES_NI
static block aes128_key_expansion(block key, block key_rcon) {
key_rcon = _mm_shuffle_epi32(key_rcon, _MM_SHUFFLE(3, 3, 3, 3));
key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
......@@ -26,9 +29,7 @@ static block aes128_key_expansion(block key, block key_rcon) {
return _mm_xor_si128(key, key_rcon);
}
AES::AES(const block &user_key) { set_key(user_key); }
void AES::set_key(const block &user_key) {
void AES::set_key(const block& user_key) {
_round_key[0] = user_key;
_round_key[1] = aes128_key_expansion(
_round_key[0], _mm_aeskeygenassist_si128(_round_key[0], 0x01));
......@@ -52,7 +53,7 @@ void AES::set_key(const block &user_key) {
_round_key[9], _mm_aeskeygenassist_si128(_round_key[9], 0x36));
}
void AES::ecb_enc_block(const block &plaintext, block &cyphertext) const {
void AES::ecb_enc_block(const block& plaintext, block& cyphertext) const {
cyphertext = _mm_xor_si128(plaintext, _round_key[0]);
cyphertext = _mm_aesenc_si128(cyphertext, _round_key[1]);
cyphertext = _mm_aesenc_si128(cyphertext, _round_key[2]);
......@@ -66,57 +67,37 @@ void AES::ecb_enc_block(const block &plaintext, block &cyphertext) const {
cyphertext = _mm_aesenclast_si128(cyphertext, _round_key[10]);
}
block AES::ecb_enc_block(const block &plaintext) const {
block ret;
ecb_enc_block(plaintext, ret);
return ret;
#else
// openssl aes
void AES::set_key(const block& user_key) {
// sizeof block = 128 bit
AES_set_encrypt_key(reinterpret_cast<const unsigned char*>(&user_key),
128, &_aes_key);
}
#define REPEATED_FUNC(func, idx, out, in, k) \
do { \
out[idx + 0] = func(in[idx + 0], k); \
out[idx + 1] = func(in[idx + 1], k); \
out[idx + 2] = func(in[idx + 2], k); \
out[idx + 3] = func(in[idx + 3], k); \
out[idx + 4] = func(in[idx + 4], k); \
out[idx + 5] = func(in[idx + 5], k); \
out[idx + 6] = func(in[idx + 6], k); \
out[idx + 7] = func(in[idx + 7], k); \
} while (0)
void AES::ecb_enc_block(const block& plaintext, block& cyphertext) const {
AES_encrypt(reinterpret_cast<const unsigned char*>(&plaintext),
reinterpret_cast<unsigned char*>(&cyphertext),
&_aes_key);
}
#endif
void AES::ecb_enc_blocks(const block *plaintexts, size_t block_num,
block *cyphertext) const {
const size_t step = 8;
size_t idx = 0;
size_t length = block_num - block_num % step;
void AES::ecb_enc_blocks(const block* plaintexts, size_t block_num,
block* cyphertext) const {
for (; idx < length; idx += step) {
REPEATED_FUNC(_mm_xor_si128, idx, cyphertext, plaintexts, _round_key[0]);
REPEATED_FUNC(_mm_aesenc_si128, idx, cyphertext, cyphertext, _round_key[1]);
REPEATED_FUNC(_mm_aesenc_si128, idx, cyphertext, cyphertext, _round_key[2]);
REPEATED_FUNC(_mm_aesenc_si128, idx, cyphertext, cyphertext, _round_key[3]);
REPEATED_FUNC(_mm_aesenc_si128, idx, cyphertext, cyphertext, _round_key[4]);
REPEATED_FUNC(_mm_aesenc_si128, idx, cyphertext, cyphertext, _round_key[5]);
REPEATED_FUNC(_mm_aesenc_si128, idx, cyphertext, cyphertext, _round_key[6]);
REPEATED_FUNC(_mm_aesenc_si128, idx, cyphertext, cyphertext, _round_key[7]);
REPEATED_FUNC(_mm_aesenc_si128, idx, cyphertext, cyphertext, _round_key[8]);
REPEATED_FUNC(_mm_aesenc_si128, idx, cyphertext, cyphertext, _round_key[9]);
REPEATED_FUNC(_mm_aesenclast_si128, idx, cyphertext, cyphertext,
_round_key[10]);
#pragma omp parallel num_threads(4)
#pragma omp for
for (size_t i = 0; i < block_num; ++i) {
ecb_enc_block(plaintexts[i], cyphertext[i]);
}
}
for (; idx < block_num; ++idx) {
cyphertext[idx] = _mm_xor_si128(plaintexts[idx], _round_key[0]);
cyphertext[idx] = _mm_aesenc_si128(cyphertext[idx], _round_key[1]);
cyphertext[idx] = _mm_aesenc_si128(cyphertext[idx], _round_key[2]);
cyphertext[idx] = _mm_aesenc_si128(cyphertext[idx], _round_key[3]);
cyphertext[idx] = _mm_aesenc_si128(cyphertext[idx], _round_key[4]);
cyphertext[idx] = _mm_aesenc_si128(cyphertext[idx], _round_key[5]);
cyphertext[idx] = _mm_aesenc_si128(cyphertext[idx], _round_key[6]);
cyphertext[idx] = _mm_aesenc_si128(cyphertext[idx], _round_key[7]);
cyphertext[idx] = _mm_aesenc_si128(cyphertext[idx], _round_key[8]);
cyphertext[idx] = _mm_aesenc_si128(cyphertext[idx], _round_key[9]);
cyphertext[idx] = _mm_aesenclast_si128(cyphertext[idx], _round_key[10]);
}
AES::AES(const block& user_key) { set_key(user_key); }
block AES::ecb_enc_block(const block& plaintext) const {
block ret;
ecb_enc_block(plaintext, ret);
return ret;
}
} // namespace psi
......@@ -16,6 +16,10 @@
#include <emmintrin.h>
#ifndef USE_AES_NI
#include <openssl/aes.h>
#endif
namespace psi {
using block = __m128i;
......@@ -24,23 +28,27 @@ class AES {
public:
AES() {}
AES(const block &user_key);
AES(const AES &other) = delete;
AES(const block& user_key);
AES &operator=(const AES &other) = delete;
AES(const AES& other) = delete;
void set_key(const block &user_key);
AES& operator=(const AES& other) = delete;
void ecb_enc_block(const block &plaintext, block &cyphertext) const;
void set_key(const block& user_key);
block ecb_enc_block(const block &plaintext) const;
void ecb_enc_block(const block& plaintext, block& cyphertext) const;
void ecb_enc_blocks(const block *plaintexts, size_t block_num,
block *ciphertext) const;
block ecb_enc_block(const block& plaintext) const;
void ecb_enc_blocks(const block* plaintexts, size_t block_num,
block* ciphertext) const;
private:
#ifdef USE_AES_NI
block _round_key[11];
#else
AES_KEY _aes_key;
#endif
};
} // namespace psi
......@@ -14,7 +14,9 @@
#include "aes.h"
#include <chrono>
#include <cstring>
#include <iostream>
#include "gtest/gtest.h"
......@@ -24,16 +26,13 @@ namespace psi {
TEST(aes, base_test) {
std::string plain("\x00\x11\x22\x33\x44\x55\x66\x77"
"\x88\x99\xaa\xbb\xcc\xdd\xee\xff",
16);
"\x88\x99\xaa\xbb\xcc\xdd\xee\xff", 16);
std::string key("\x00\x01\x02\x03\x04\x05\x06\x07"
"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
16);
"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", 16);
std::string cipher("\x69\xc4\xe0\xd8\x6a\x7b\x04\x30"
"\xd8\xcd\xb7\x80\x70\xb4\xc5\x5a",
16);
"\xd8\xcd\xb7\x80\x70\xb4\xc5\x5a", 16);
block p;
......@@ -58,4 +57,31 @@ TEST(aes, base_test) {
EXPECT_TRUE(equals(c, c_));
}
const size_t bench_size = 0x10000;
block p[bench_size];
block c[bench_size];
TEST(aes, bench) {
std::string key("\x00\x01\x02\x03\x04\x05\x06\x07"
"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", 16);
block k;
std::memcpy(&k, key.data(), key.size());
AES aes(k);
const size_t rep = 0x100;
auto t0 = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < rep; ++i) {
aes.ecb_enc_blocks(p, bench_size, c);
}
auto t1 = std::chrono::high_resolution_clock::now();
auto d = std::chrono::duration_cast<std::chrono::nanoseconds>(t1 - t0);
std::cerr << d.count() * 1.0 / (rep * bench_size) << " ns per op\n";
}
} // namespace psi
......@@ -138,6 +138,9 @@ public:
if (ret < 0) {
throw std::runtime_error("socket error: recv, errno: " +
std::to_string(errno));
} else if (ret == 0) {
throw std::runtime_error("socket error: 0 byte recved, "
"socket shutdown by peer");
}
recved += ret;
}
......
......@@ -23,38 +23,47 @@ namespace psi {
class PseudorandomNumberGenerator {
public:
PseudorandomNumberGenerator() = default;
PseudorandomNumberGenerator(const block &seed);
PseudorandomNumberGenerator(const PseudorandomNumberGenerator &other) =
delete;
PseudorandomNumberGenerator(
const PseudorandomNumberGenerator &other) = delete;
PseudorandomNumberGenerator &
operator=(const PseudorandomNumberGenerator &other) = delete;
PseudorandomNumberGenerator &operator=(
const PseudorandomNumberGenerator &other) = delete;
void set_seed(const block &b);
template <typename T> T get() {
template <typename T>
T get() {
T data;
get_array(&data, sizeof(T));
return data;
}
void get_array(void *res, size_t len);
void get_array(void* res, size_t len);
// for std::shuffle
typedef uint64_t result_type;
constexpr static uint64_t min() { return 0; }
constexpr static uint64_t min() {
return 0;
}
constexpr static uint64_t max() { return -1ull; }
constexpr static uint64_t max() {
return -1ull;
}
uint64_t operator()() { return get<uint64_t>(); }
uint64_t operator()() {
return get<uint64_t>();
}
private:
// buffer num for aes cipher
static const size_t _s_buffer_size = 0x100;
static const size_t _s_buffer_size = 0x100000;
static const size_t _s_byte_capacity = _s_buffer_size * sizeof(block);
......@@ -71,3 +80,4 @@ private:
void refill_buffer();
};
} // namespace psi
......@@ -298,24 +298,6 @@ const size_t PsiApi::_s_recv_step_len = 0x1000000;
// default sync sock, no timeout
int PsiApi::_s_timeout_s = 0;
int get_err_code(const char *err) {
std::string s(err);
if (s.find("socket error: recv timeout") != std::string::npos) {
return SOCKET_TIMEOUT;
} else if (s.find("socket error") != std::string::npos) {
return SOCKET_ERROR;
} else if (s.find("openssl error") != std::string::npos) {
return OPENSSL_ERROR;
} else if (s.find("np ot error") != std::string::npos) {
return INTERNAL_ERROR;
} else if (s.find("ot ext error") != std::string::npos) {
return INTERNAL_ERROR;
} else if (s.find("psi error") != std::string::npos) {
return INTERNAL_ERROR;
}
return UNKNOWN_ERROR;
}
int psi_send(int port, const std::set<std::string> &in,
std::atomic<int> *psi_progress) {
try {
......@@ -331,14 +313,9 @@ int psi_send(int port, const std::set<std::string> &in,
if (psi_progress) {
*psi_progress = -1;
}
auto err = get_err_code(e.what());
if (err != UNKNOWN_ERROR) {
return err;
} else {
throw;
}
}
return PSI_OK;
return 0;
}
int psi_recv(const std::string &remote_ip, int port,
......@@ -357,14 +334,9 @@ int psi_recv(const std::string &remote_ip, int port,
if (psi_progress) {
*psi_progress = -1;
}
auto err = get_err_code(e.what());
if (err != UNKNOWN_ERROR) {
return err;
} else {
throw;
}
}
return PSI_OK;
return 0;
}
void set_psi_timeout(int timeout_s) { PsiApi::set_psi_timeout(timeout_s); }
......
......@@ -21,15 +21,6 @@
namespace psi {
enum PsiReturnCode {
PSI_OK = 0,
INTERNAL_ERROR = -1,
OPENSSL_ERROR = -2,
SOCKET_ERROR = -3,
SOCKET_TIMEOUT = -4,
UNKNOWN_ERROR = -5
};
int psi_send(int port, const std::set<std::string> &in,
std::atomic<int> *psi_progress = nullptr);
......
......@@ -27,7 +27,6 @@ public:
int _port;
static const int _s_test_size = 1e3;
public:
PsiAPITest() {
for (int i = 0; i < _s_test_size; ++i) {
......@@ -42,8 +41,18 @@ public:
TEST_F(PsiAPITest, full_test) {
auto test_send = [this]() {
// find valid port
for (int ret = SOCKET_ERROR; ret == SOCKET_ERROR; ++_port) {
ret = psi_send(_port, _input, nullptr);
for (;; ++_port) {
try {
psi_send(_port, _input, nullptr);
break;
} catch (const std::exception& e){
std::string s(e.what());
if (s.find("socket error") != std::string::npos) {
continue;
} else {
throw;
}
}
}
};
auto t_send = std::thread(test_send);
......@@ -56,7 +65,7 @@ TEST_F(PsiAPITest, full_test) {
t_send.join();
std::set<std::string> out_set;
for (auto &x : output) {
for (auto& x: output) {
out_set.emplace(x);
}
ASSERT_EQ(out_set, _input);
......
......@@ -16,15 +16,13 @@
#include <array>
#include <wmmintrin.h>
namespace psi {
void sse_load_sub_square(std::array<block, 2> &out, std::array<block, 128> &in,
void sse_load_sub_square(std::array<block, 2>& out, std::array<block, 128>& in,
size_t x, size_t y) {
std::array<std::array<uint8_t, 16>, 2> &out_byte_view =
std::array<std::array<uint8_t, 16>, 2>& out_byte_view =
*reinterpret_cast<std::array<std::array<uint8_t, 16>, 2> *>(&out);
std::array<std::array<uint8_t, 16>, 128> &in_byte_view =
std::array<std::array<uint8_t, 16>, 128>& in_byte_view =
*reinterpret_cast<std::array<std::array<uint8_t, 16>, 128> *>(&in);
for (size_t l = 0; l < 16; l++) {
......@@ -33,9 +31,9 @@ void sse_load_sub_square(std::array<block, 2> &out, std::array<block, 128> &in,
}
}
void sse_transpose_sub_square(std::array<block, 128> &out,
std::array<block, 2> &in, size_t x, size_t y) {
std::array<std::array<uint16_t, 8>, 128> &out_u16_view =
void sse_transpose_sub_square(std::array<block, 128>& out,
std::array<block, 2>& in, size_t x, size_t y) {
std::array<std::array<uint16_t, 8>, 128>& out_u16_view =
*reinterpret_cast<std::array<std::array<uint16_t, 8>, 128> *>(&out);
for (size_t j = 0; j < 8; j++) {
......@@ -47,7 +45,7 @@ void sse_transpose_sub_square(std::array<block, 128> &out,
}
}
void sse_transpose128(std::array<block, 128> &in_out) {
void sse_transpose128(std::array<block, 128>& in_out) {
std::array<block, 2> a, b;
for (size_t j = 0; j < 8; j++) {
......
docs/source/_static/FL-framework.png

84.5 KB | W: | H:

docs/source/_static/FL-framework.png

84.1 KB | W: | H:

docs/source/_static/FL-framework.png
docs/source/_static/FL-framework.png
docs/source/_static/FL-framework.png
docs/source/_static/FL-framework.png
  • 2-up
  • Swipe
  • Onion skin
......@@ -213,4 +213,4 @@ while not trainer.stop():
To show the effectiveness of DPSGD-based federated learning with PaddleFL, a simulated experiment is conducted on an open source dataset MNIST. From the figure given below, model evaluation results are similar between DPSGD-based federated learning and traditional parameter server training when the overall privacy budget *epsilon* is 1.3 or 0.13.
<img src="fl_dpsgd_benchmark.png" height=400 width=600 hspace='10'/> <br />
<img src="_static/fl_dpsgd_benchmark.png" height=400 width=600 hspace='10'/> <br />
......@@ -109,4 +109,4 @@ wget https://paddle-zwh.bj.bcebos.com/gru4rec_paddlefl_benchmark/gru4rec_benchma
| 1/4 of the whole dataset | private training | - | 0.269 |
| 1/4 of the whole dataset | private training | - | 0.282 |
<img src="fl_benchmark.png" height=300 width=500 hspace='10'/> <br />
<img src="_static/fl_benchmark.png" height=300 width=500 hspace='10'/> <br />
## Instructions for PaddleFL-MPC UCI Housing Demo
([简体中文](./README_CN.md)|English)
This document introduces how to run UCI Housing demo based on Paddle-MPC, which has two ways of running, i.e., single machine and multi machines.
### 1. Running on Single Machine
......
......@@ -8,7 +8,8 @@ Data is becoming more and more expensive nowadays, and sharing of raw data is ve
## Overview of PaddleFL
<img src='../../../images/FL-framework.png' width = "1000" height = "320" align="middle"/>
<img src='_static/FL-framework.png' width = "1000" height = "320" align="middle"/>
In PaddleFL, horizontal and vertical federated learning strategies will be implemented according to the categorization given in [4]. Application demonstrations in natural language processing, computer vision and recommendation will be provided in PaddleFL.
......@@ -36,7 +37,7 @@ Besides, PFM is implemented based on secure multi-party computation (MPC) to ena
### Data Parallel
<img src='images/FL-training.png' width = "1000" height = "400" align="middle"/>
<img src='_static/FL-training.png' width = "1000" height = "400" align="middle"/>
In Data Parallel, components for defining a federated learning task and training a federated learning job are as follows:
......@@ -60,7 +61,7 @@ In Data Parallel, components for defining a federated learning task and training
### Federated Learning with MPC
<img src='../../../images/PFM-overview.png' width = "1000" height = "446" align="middle"/>
<img src='_static/PFM-overview.png' width = "1000" height = "446" align="middle"/>
Paddle FL MPC implements secure training and inference tasks based on the underlying MPC protocol like ABY3[11], which is a high efficient three-party computing model.
......
......@@ -67,5 +67,7 @@ from . import data_utils
from .io import *
from .version import version
from .layers import mpc_math_op_patch
from . import input
from . import initializer
mpc_math_op_patch.monkey_patch_mpc_variable()
......@@ -30,6 +30,7 @@ from paddle.fluid import log_helper
import paddle.fluid
import paddle.fluid.backward as backward
from .framework import is_mpc_parameter
import mpc_data_utils as mdu
_logger = log_helper.get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
......@@ -40,7 +41,7 @@ def _create_loss_op_desc_(loss):
"fill_constant", {},
{"Out": [backward._append_grad_suffix_(loss.name)]}, {
"shape": [2, 1],
"value": 21845,
"value": mdu.mpc_one_share,
"dtype": loss.dtype,
"force_cpu": False,
core.op_proto_and_checker_maker.kOpRoleAttrName():
......
......@@ -21,6 +21,7 @@ import six
import paddle
import paddle.fluid as fluid
import mpc_data_utils as mdu
from ..layers import __all__ as all_ops
__all__ = [
'encrypt',
......@@ -35,12 +36,22 @@ __all__ = [
'decrypt_model',
]
# operators that should be skipped when encrypt and decrypt
op_to_skip = ['feed', 'fetch', 'scale', 'mpc_init']
# operators that are supported currently for model encryption and decryption
supported_mpc_ops = all_ops + ['fill_constant', 'sgd'] + op_to_skip
# variables that used as plain variables and need no encryption
plain_vars = ['learning_rate_0']
SHARE_NUM = 3
ABY3_SHARE_DIM = 2
ABY3_MODEL_NAME = "__model__.aby3"
MODEL_NAME = "__model__"
MODEL_SHARE_DIR = "model_share"
MPC_OP_PREFIX = "mpc_"
# the MPC value of plain value 1, which is used for
# default value of fill_constant OP
MPC_ONE_SHARE = mdu.mpc_one_share
def encrypt(number):
......@@ -252,57 +263,144 @@ def batch(reader, batch_size, drop_last=False):
return reshaped_batch_reader
def encrypt_model(plain_model, mpc_model_dir, model_filename=None):
def transpile(program=None):
"""
Encrypts model, and save to files for mpc inference.
Transpile Paddle program into MPC program.
Args:
plain_model: The directory of paddle model.
mpc_model_dir: The directory that save mpc model shares.
model_filename: The name of model file.
program: The plain Paddle model program, default to
default_main_program.
Returns: The MPC program.
"""
if program is None:
program = fluid.default_main_program()
place = fluid.CPUPlace()
exe = fluid.Executor(place)
[main_prog, _, _] = fluid.io.load_inference_model(
dirname=plain_model, executor=exe, model_filename=model_filename)
# TODO(xukun): support more blocks. Tips: may be just adding "for loop" for all blocks.
if main_prog.num_blocks > 1:
if program.num_blocks > 1:
raise NotImplementedError(
"The number of blocks in current main program"
"is {}, which is not supported in this version."
.format(main_prog.num_blocks()))
global_block = main_prog.global_block()
.format(program.num_blocks()))
global_block = program.global_block()
g_scope = fluid.global_scope()
for op in global_block.ops:
if op.type != "feed" and op.type != "fetch":
# TODO: needs to check if the mpc op exists
op.desc.set_type(MPC_OP_PREFIX + op.type)
for input_arg_name in op.input_arg_names:
var = global_block.var(input_arg_name)
mpc_vars_names = _transpile_type_and_shape(block=global_block)
# encrypt tensor values for each variable in mpc_var_names
for mpc_var_name in mpc_vars_names:
if g_scope.find_var(mpc_var_name) is not None:
param = g_scope.find_var(mpc_var_name)
param_tensor = np.array(param.get_tensor())
mpc_var = global_block.var(mpc_var_name)
if mpc_var_name not in plain_vars:
param.get_tensor()._set_dims(mpc_var.shape)
# process initialized params that should be 0
set_tensor_value = np.array([param_tensor, param_tensor]).astype(np.int64)
param.get_tensor().set(set_tensor_value, place)
else:
param.get_tensor().set(np.array(param.get_tensor()).astype('float64'), place)
# trigger sync to replace old ops.
op_num = global_block.desc.op_size()
_ = global_block.desc.append_op()
global_block.desc._remove_op(op_num, op_num + 1)
return program
def _transpile_type_and_shape(block):
"""
Transpile dtype and shape of plain variables into MPC dtype and shape.
And transpile op type into MPC type.
Args:
block: The block in Paddle program.
Returns: A set of variable names to encrypt.
"""
mpc_vars_names = set()
# store variable name in mpc_vars_names, and encrypt dtype and shape
for var_name in block.vars:
var = block.var(var_name)
if var.name != "feed" and var.name != "fetch":
mpc_vars_names.add(var.name)
if var_name in plain_vars:
var.desc.set_dtype(fluid.framework.convert_np_dtype_to_dtype_(np.float64))
continue
# set mpc param shape = [2, old_shape]
encrypted_var_shape = (ABY3_SHARE_DIM, ) + var.shape
encrypted_var_shape = (ABY3_SHARE_DIM,) + var.shape
var.desc.set_dtype(fluid.framework.convert_np_dtype_to_dtype_(np.int64))
var.desc.set_shape(encrypted_var_shape)
if g_scope.find_var(input_arg_name) is not None:
param = g_scope.find_var(input_arg_name)
param_tensor_shares = make_shares(
np.array(param.get_tensor()))
# encrypt op type, or other attrs if needed
for op in block.ops:
if _is_supported_op(op.type):
if op.type == 'fill_constant':
op._set_attr(name='shape', val=(2L, 1L))
# set default MPC value for fill_constant OP
op._set_attr(name='value', val=MPC_ONE_SHARE)
op._set_attr(name='dtype', val=3)
elif op.type in op_to_skip:
pass
else:
op.desc.set_type(MPC_OP_PREFIX + op.type)
else:
raise NotImplementedError('Operator {} is unsupported.'
.format(op.type))
return mpc_vars_names
def encrypt_model(program, mpc_model_dir=None, model_filename=None):
"""
Encrypt model, and save encrypted model (i.e., MPC model shares) into
files for MPC training, updating, or inference.
Args:
program: The loaded program of paddle model.
mpc_model_dir: The directory that save MPC model shares.
model_filename: The name of MPC model file, default is __model__.aby3.
"""
place = fluid.CPUPlace()
exe = fluid.Executor(place)
# TODO(xukun): support more blocks. Tips: may just adding "for loop" for all blocks.
if program.num_blocks > 1:
raise NotImplementedError(
"The number of blocks in current main program"
"is {}, which is not supported in this version."
.format(program.num_blocks()))
global_block = program.global_block()
g_scope = fluid.global_scope()
mpc_vars_names = _transpile_type_and_shape(global_block)
# encrypt tensor values for each variable in mpc_var_names
for mpc_var_name in mpc_vars_names:
if g_scope.find_var(mpc_var_name) is not None:
param = g_scope.find_var(mpc_var_name)
param_tensor = np.array(param.get_tensor())
param_tensor_shares = make_shares(param_tensor)
mpc_var = global_block.var(mpc_var_name)
for idx in six.moves.range(SHARE_NUM):
param.get_tensor()._set_dims(encrypted_var_shape)
param.get_tensor().set(
get_aby3_shares(param_tensor_shares, idx), place)
if mpc_var_name not in plain_vars:
param.get_tensor()._set_dims(mpc_var.shape)
set_tensor_value = get_aby3_shares(param_tensor_shares, idx)
param.get_tensor().set(set_tensor_value, place)
else:
param.get_tensor().set(np.array(param.get_tensor()).astype('float64'), place)
param_share_dir = os.path.join(
mpc_model_dir, MODEL_SHARE_DIR + "_" + str(idx))
fluid.io.save_vars(
executor=exe,
dirname=param_share_dir,
vars=[var],
filename=input_arg_name)
# trigger sync to replace old ops
vars=[mpc_var],
filename=mpc_var_name)
# trigger sync to replace old ops.
op_num = global_block.desc.op_size()
_ = global_block.desc.append_op()
global_block.desc._remove_op(op_num, op_num + 1)
......@@ -317,17 +415,19 @@ def encrypt_model(plain_model, mpc_model_dir, model_filename=None):
os.makedirs(model_share_dir)
model_name = os.path.join(model_share_dir, model_basename)
with open(model_name, "wb") as f:
f.write(main_prog.desc.serialize_to_string())
f.write(program.desc.serialize_to_string())
def decrypt_model(mpc_model_dir, plain_model_path, model_filename=None):
def decrypt_model(mpc_model_dir, plain_model_path, mpc_model_filename=None, plain_model_filename=None):
"""
Reveal a paddle model.
Reveal a paddle model. Load encrypted model (i.e., MPC model shares) from files and decrypt it
into paddle model.
Args:
mpc_model_dir: The directory of all model shares.
plain_model_path: The directory to save revealed paddle model.
model_filename: The name of model file.
mpc_model_filename: The name of encrypted model file.
plain_model_filename: The name of decrypted model file.
"""
share_dirs = []
for sub_dir in os.listdir(mpc_model_dir):
......@@ -337,7 +437,7 @@ def decrypt_model(mpc_model_dir, plain_model_path, model_filename=None):
place = fluid.CPUPlace()
exe = fluid.Executor(place=place)
mpc_model_basename = os.path.basename(
model_filename) if model_filename is not None else ABY3_MODEL_NAME
mpc_model_filename) if mpc_model_filename is not None else ABY3_MODEL_NAME
[main_prog, _, _] = fluid.io.load_inference_model(
dirname=share_dirs[0], executor=exe, model_filename=mpc_model_basename)
......@@ -349,38 +449,65 @@ def decrypt_model(mpc_model_dir, plain_model_path, model_filename=None):
global_block = main_prog.global_block()
g_scope = fluid.global_scope()
for op in global_block.ops:
# rename ops
if str(op.type).startswith(MPC_OP_PREFIX):
new_type = str(op.type)[len(MPC_OP_PREFIX):]
op.desc.set_type(new_type)
for input_arg_name in op.input_arg_names:
var = global_block.var(input_arg_name)
if var.name != "feed" and var.name != "fetch":
if var.shape[0] != ABY3_SHARE_DIM:
# a set storing unique variables to decrypt
vars_set = set()
# store variable name in vars_set, and decrypt dtype and shape
for mpc_var_name in global_block.vars:
mpc_var = global_block.var(mpc_var_name)
if mpc_var.name != "feed" and mpc_var.name != "fetch":
vars_set.add(mpc_var.name)
if mpc_var_name in plain_vars:
# var.desc.set_dtype(fluid.framework.convert_np_dtype_to_dtype_(np.float64))
continue
elif mpc_var.shape[0] != ABY3_SHARE_DIM:
raise ValueError(
"Variable:{} shape: {} in saved model should start with 2."
.format(var.name, var.shape))
plain_var_shape = var.shape[1:]
old_var_shape = var.shape
var.desc.set_shape(plain_var_shape)
if g_scope.find_var(input_arg_name) is not None:
param = g_scope.find_var(input_arg_name)
.format(mpc_var.name, mpc_var.shape))
else:
plain_var_shape = mpc_var.shape[1:]
mpc_var.desc.set_shape(plain_var_shape)
mpc_var.desc.set_dtype(fluid.framework.convert_np_dtype_to_dtype_(np.float32))
# remove init op
first_mpc_op = global_block.ops[0]
if first_mpc_op.type == 'mpc_init':
global_block._remove_op(0)
# decrypt op type, or other attrs if needed
for mpc_op in global_block.ops:
# rename ops
if str(mpc_op.type).startswith(MPC_OP_PREFIX):
new_type = str(mpc_op.type)[len(MPC_OP_PREFIX):]
mpc_op.desc.set_type(new_type)
elif mpc_op.type == 'fill_constant':
mpc_op._set_attr(name='shape', val=(1L))
mpc_op._set_attr(name='value', val=1.0)
mpc_op._set_attr(name='dtype', val=5)
# decrypt tensor values for each variable in vars_set
for var_name in vars_set:
var = global_block.var(var_name)
if g_scope.find_var(var_name) is not None:
param = g_scope.find_var(var_name)
if var_name in plain_vars:
pass
else:
# reconstruct plaintext
param_tensor_shares = _get_param_all_shares(
input_arg_name, share_dirs, mpc_model_basename)
var_name, share_dirs, mpc_model_basename)
param_tensor = reconstruct(
param_tensor_shares, type=np.float32)
param.get_tensor()._set_dims(plain_var_shape)
param.get_tensor()._set_dims(var.shape)
param.get_tensor().set(param_tensor, place)
fluid.io.save_vars(
executor=exe,
dirname=plain_model_path,
vars=[var],
filename=input_arg_name)
filename=var_name)
# trigger sync to replace old ops
op_num = global_block.desc.op_size()
_ = global_block.desc.append_op()
......@@ -388,7 +515,7 @@ def decrypt_model(mpc_model_dir, plain_model_path, model_filename=None):
# save plaintext model file.
model_basename = os.path.basename(
model_filename) if model_filename is not None else MODEL_NAME
plain_model_filename) if plain_model_filename is not None else MODEL_NAME
if not os.path.exists(plain_model_path):
os.makedirs(plain_model_path)
model_name = os.path.join(plain_model_path, model_basename)
......@@ -404,7 +531,9 @@ def _get_param_all_shares(param_name, share_dirs, model_file):
param_name: The name of parameter.
share_dirs: The directories which storing model shares.
model_file: The name of model file.
:return:
Returns:
ndarray. The loaded shares.
"""
exe = fluid.Executor(place=fluid.CPUPlace())
param_shares = []
......@@ -416,3 +545,152 @@ def _get_param_all_shares(param_name, share_dirs, model_file):
param_tensor = np.array(param.get_tensor())
param_shares.append(param_tensor)
return np.array(param_shares, dtype=np.int64)
def _is_supported_op(op_name):
"""
Check if op is supported for encryption and decryption.
Args:
op_name: The name of op.
Returns:
True if supported.
"""
if op_name not in supported_mpc_ops:
if str(op_name).endswith('_grad'):
_is_supported_op(str(op_name)[:-5])
else:
return False
return True
def load_mpc_model(exe, mpc_model_dir, mpc_model_filename, inference=False):
"""
Load MPC model from files. The loaded program of the model would be inserted
init OP and then switched to default_main_program for further MPC procedure.
Args:
exe: The executor used for loading.
mpc_model_dir: The directory of MPC model.
mpc_model_filename: The filename of MPC model.
inference: Whether the model to load is used for inference. If true, the
model to load should be an inference model, and feed_name, fetch_targets
would be returned with the loaded program after inserting init OP. Otherwise,
after inserting init OP, the loaded program would be switched to
default_main_program and returned. Default value is False.
Returns:
default_main_program if inference is False. Otherwise, default_main_program,
feed_name, and fetch_targets would be returned.
"""
mpc_program, feed_names, fetch_targets = fluid.io.load_inference_model(executor=exe,
dirname=mpc_model_dir,
model_filename=mpc_model_filename)
# find init OP
global_block = fluid.default_main_program().global_block()
init_op_idx = _find_init_op_idx(global_block)
if init_op_idx < 0:
raise RuntimeError('No mpc_init op in global block, '
'maybe you should use paddle_fl.mpc.init() first.')
init_op = global_block.ops[init_op_idx]
# find the last feed OP for inserting init OP
last_feed_op_idx = _find_last_feed_op_idx(mpc_program.global_block())
# insert init OP as the first OP of MPC program if no feed OP,
# otherwise, insert it after the last feed OP.
insert_idx = 0 if last_feed_op_idx < 0 else last_feed_op_idx + 1
loaded_mpc_program = _insert_init_op(main_prog=mpc_program,
init_op=init_op,
index=insert_idx)
if inference:
return loaded_mpc_program, feed_names, fetch_targets
else:
# switch loaded_mpc_program to default_main_program
fluid.framework.switch_main_program(loaded_mpc_program)
return fluid.default_main_program()
def _find_init_op_idx(block):
"""
Find the index of mpc_init op.
Args:
block: The block of program.
Returns:
The index of mpc_init op.
"""
for idx, op in enumerate(block.ops):
if op.type == 'mpc_init':
return idx
return -1
def _find_last_feed_op_idx(block):
"""
Find the index of the last feed OP.
Args:
block: The block of program.
Returns:
The index of the last feed OP.
"""
feed_idx = -1
for idx, op in enumerate(block.ops):
if op.type == 'feed':
feed_idx = idx
return feed_idx
def save_trainable_model(exe, model_dir, model_filename=None, program=None):
"""
Save trainable model, which includes saving program and
persistable parameters into files. The saved model can be
loaded by fluid.io.load_inference_model for further training
or updating.
Args:
exe: The executor used for saving.
model_dir: The directory of model to save.
model_filename: The filename of model to save.
program: The program to save, default to default_main_program.
TODO: can move this to paddle_mpc/python/paddle_fl/mpc/io.py
"""
if not os.path.exists(model_dir):
os.makedirs(model_dir)
model_basename = os.path.basename(
model_filename) if model_filename is not None else ABY3_MODEL_NAME
# save program
model_name = os.path.join(model_dir, model_basename)
if program is None:
program = fluid.default_main_program()
with open(model_name, "wb") as f:
f.write(program.desc.serialize_to_string())
# save parameters
fluid.io.save_persistables(executor=exe,
dirname=model_dir,
main_program=program)
def _insert_init_op(main_prog, init_op, index):
"""
Insert init OP into main_prog according to the index.
Args:
main_prog: The program to insert init OP.
init_op: The init OP for MPC running.
index: The place that the init_op to insert.
Returns:
The program after inserting init OP.
"""
main_prog.global_block()._sync_with_cpp()
op_desc = main_prog.global_block().desc._insert_op(index)
mpc_init_op = fluid.framework.Operator(block=main_prog.global_block(),
desc=op_desc,
type=init_op.type,
attrs=init_op.all_attrs())
main_prog.global_block().ops.insert(index, mpc_init_op)
return main_prog
......@@ -15,10 +15,11 @@
This module provide data alignment tools, implemented by OT (Oblivious Transfer)-based
PSI (Private Set Intersection) algorithm.
"""
from multiprocessing.connection import Client, Listener
import os
import sys
import mpc_data_utils as mdu
from multiprocessing.connection import Client, Listener
__all__ = ['align', ]
......
......@@ -20,6 +20,15 @@ Encrypted data files of feature and label would be generated and saved in `/tmp`
#### (2). Launch Demo with A Shell Script
You should set the env params as follow:
```
export PYTHON=/yor/python
export PATH_TO_REDIS_BIN=/path/to/redis_bin
export LOCALHOST=/your/localhost
export REDIS_PORT=/your/redis/port
```
Launch demo with the `run_standalone.sh` script. The concrete command is:
```bash
......@@ -32,11 +41,14 @@ Besides, predictions would be made in this demo once training is finished. The p
#### (3). Decrypt Data
Decrypt the saved prediction data and save the decrypted prediction results into a specified file using `decrypt_data_to_file()` in `process_data.py` script. For example, users can write the following code into a python script named `decrypt_save.py`, and then run the script with command `python decrypt_save.py`. The decrypted prediction results would be saved into `mpc_label`.
Decrypt the saved prediction data and save the decrypted prediction results into a specified file using `decrypt_data_to_file()` in `process_data.py` script. For example, users can write the following code into a python script named `decrypt_save.py`, and then run the script with command `python decrypt_save.py decrypt_file`. The decrypted prediction results would be saved into `decrypt_file`.
```python
import sys
decrypt_file=sys.argv[1]
import process_data
process_data.decrypt_data_to_file("/tmp/mnist_output_prediction", (BATCH_SIZE,), "mpc_label")
process_data.decrypt_data_to_file("/tmp/mnist_output_prediction", (BATCH_SIZE,), decrypt_file)
```
**Note** that remember to delete the prediction files in `/tmp` directory generated in last running, in case of any influence on the decrypted results of current running. For simplifying users operations, we provide the following commands in `run_standalone.sh`, which can delete the files mentioned above when running this script.
......@@ -91,10 +103,13 @@ Similarly, predictions with cypher text format would be saved in `/tmp` director
#### (5). Decrypt Prediction Data
Each computation party sends `mnist_output_prediction.part` file in `/tmp` directory to the `/tmp` directory of data owner. Data owner decrypts the prediction data and saves the decrypted prediction results into a specified file using `decrypt_data_to_file()` in `process_data.py` script. For example, users can write the following code into a python script named `decrypt_save.py`, and then run the script with command `python decrypt_save.py`. The decrypted prediction results would be saved into file `mpc_label`.
Each computation party sends `mnist_output_prediction.part` file in `/tmp` directory to the `/tmp` directory of data owner. Data owner decrypts the prediction data and saves the decrypted prediction results into a specified file using `decrypt_data_to_file()` in `process_data.py` script. For example, users can write the following code into a python script named `decrypt_save.py`, and then run the script with command `python decrypt_save.py decrypt_file`. The decrypted prediction results would be saved into file `decrypt_file`.
```python
import sys
decrypt_file=sys.argv[1]
import process_data
process_data.decrypt_data_to_file("/tmp/mnist_output_prediction", (BATCH_SIZE,), "mpc_label")
process_data.decrypt_data_to_file("/tmp/mnist_output_prediction", (BATCH_SIZE,), decrypt_file)
```
......@@ -20,7 +20,16 @@ process_data.generate_encrypted_test_data()
#### 2. 使用shell脚本启动demo
使用`run_standalone.sh`脚本,启动并运行demo,命令如下:
运行demo之前,需设置以下环境变量:
```
export PYTHON=/yor/python
export PATH_TO_REDIS_BIN=/path/to/redis_bin
export LOCALHOST=/your/localhost
export REDIS_PORT=/your/redis/port
```
然后使用`run_standalone.sh`脚本,启动并运行demo,命令如下:
```bash 
bash run_standalone.sh mnist_demo.py
......@@ -32,11 +41,14 @@ bash run_standalone.sh mnist_demo.py
#### 3. 解密数据
使用`process_data.py`脚本中的`decrypt_data_to_file()`,将保存的密文预测结果进行解密,并且将解密得到的明文预测结果保存到指定文件中。例如,将下面的内容写到一个`decrypt_save.py`脚本中,然后`python decrypt_save.py`,将把明文预测结果保存在`mpc_label`文件中。
使用`process_data.py`脚本中的`decrypt_data_to_file()`,将保存的密文预测结果进行解密,并且将解密得到的明文预测结果保存到指定文件中。例如,将下面的内容写到一个`decrypt_save.py`脚本中,然后`python decrypt_save.py decrypt_file`,将把明文预测结果保存在`decrypt_file`文件中。
```python
import sys
decrypt_file=sys.argv[1]
import process_data
process_data.decrypt_data_to_file("/tmp/mnist_output_prediction", (BATCH_SIZE,), "mpc_label")
process_data.decrypt_data_to_file("/tmp/mnist_output_prediction", (BATCH_SIZE,), decrypt_file)
```
**注意**:再次启动运行demo之前,请先将上次在`/tmp`保存的预测密文结果文件删除,以免影响本次密文数据的恢复结果。为了简化用户操作,我们在`run_standalone.sh`脚本中加入了如下的内容,可以在执行脚本时删除上次的数据。
......@@ -93,10 +105,13 @@ $PYTHON_EXECUTABLE mnist_demo.py $PARTY_ID $SERVER $PORT
#### 5. 解密预测数据
各计算party将`/tmp`目录下的`mnist_output_prediction.part`文件发送到数据方的/tmp目录下。数据方使用`process_data.py`脚本中的`decrypt_data_to_file()`,将密文预测结果进行解密,并且将解密得到的明文预测结果保存到指定文件中。例如,将下面的内容写到一个`decrypt_save.py`脚本中,然后`python decrypt_save.py`,将把明文预测结果保存在`mpc_label`文件中。
各计算party将`/tmp`目录下的`mnist_output_prediction.part`文件发送到数据方的/tmp目录下。数据方使用`process_data.py`脚本中的`decrypt_data_to_file()`,将密文预测结果进行解密,并且将解密得到的明文预测结果保存到指定文件中。例如,将下面的内容写到一个`decrypt_save.py`脚本中,然后`python decrypt_save.py decrypt_file`,将把明文预测结果保存在`decrypt_file`文件中。
```python
import sys
decrypt_file=sys.argv[1]
import process_data
process_data.decrypt_data_to_file("/tmp/mnist_output_prediction", (BATCH_SIZE,), "mpc_label")
process_data.decrypt_data_to_file("/tmp/mnist_output_prediction", (BATCH_SIZE,), decrypt_file)
```
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Decrypt Prediction Data.
"""
import sys
import process_data
decrypt_file=sys.argv[1]
BATCH_SIZE=128
process_data.decrypt_data_to_file("/tmp/mnist_output_prediction", (BATCH_SIZE,), decrypt_file)
......@@ -16,6 +16,7 @@ MNIST Demo
"""
import sys
import os
import numpy as np
import time
......@@ -78,18 +79,20 @@ test_loader.set_batch_generator(test_batch_sample, places=place)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
start_time = time.time()
step = 0
for epoch_id in range(epoch_num):
start_time = time.time()
step = 0
# feed data via loader
for sample in loader():
batch_start = time.time()
exe.run(feed=sample, fetch_list=[cost.name])
batch_end = time.time()
if step % 50 == 0:
print('Epoch={}, Step={}'.format(epoch_id, step))
print('Epoch={}, Step={}, batch_cost={:.4f} s'.format(epoch_id, step, (batch_end - batch_start)))
step += 1
end_time = time.time()
print('Mpc Training of Epoch={} Batch_size={}, cost time in seconds:{}'
end_time = time.time()
print('Mpc Training of Epoch={} Batch_size={}, epoch_cost={:.4f} s'
.format(epoch_num, BATCH_SIZE, (end_time - start_time)))
# prediction
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Prepare data for MNIST.
"""
import process_data
process_data.generate_encrypted_data()
process_data.generate_encrypted_test_data()
......@@ -17,6 +17,7 @@ Process data for MNIST.
import numpy as np
import paddle
import six
import os
from paddle_fl.mpc.data_utils import aby3
sample_reader = paddle.dataset.mnist.train()
......@@ -77,10 +78,12 @@ def load_decrypt_data(filepath, shape):
p = aby3.reconstruct(np.array(instance))
print(p)
def decrypt_data_to_file(filepath, shape, decrypted_filepath):
def decrypt_data_to_file(filepath, shape, decrypted_file):
"""
load the encrypted data and reconstruct to a file
"""
if os.path.exists(decrypted_file):
os.remove(decrypted_file)
part_readers = []
for id in six.moves.range(3):
part_readers.append(aby3.load_aby3_shares(filepath, id=id, shape=shape))
......@@ -88,6 +91,6 @@ def decrypt_data_to_file(filepath, shape, decrypted_filepath):
for instance in aby3_share_reader():
p = aby3.reconstruct(np.array(instance))
with open(decrypted_filepath, 'a+') as f:
with open(decrypted_file, 'a+') as f:
for i in p:
f.write(str(i) + '\n')
......@@ -31,12 +31,13 @@
# bash run_standalone.sh TEST_SCRIPT_NAME
#
# modify the following vars according to your environment
PYTHON="python"
REDIS_HOME="path_to_redis_bin"
SERVER="localhost"
PORT=9937
# please set the following environment vars according in your environment
PYTHON=${PYTHON}
REDIS_HOME=${PATH_TO_REDIS_BIN}
SERVER=${LOCALHOST}
PORT=${REDIS_PORT}
echo "redis home in ${REDIS_HOME}, server is ${SERVER}, port is ${PORT}"
function usage() {
echo 'run_standalone.sh SCRIPT_NAME [ARG...]'
exit 0
......@@ -63,10 +64,21 @@ $REDIS_BIN -h $SERVER -p $PORT flushall
# remove temp data generated in last time
PRED_FILE="/tmp/mnist_output_prediction.*"
if [ "$PRED_FILE" ]; then
ls ${PRED_FILE}
if [ $? -eq 0 ]; then
rm -rf $PRED_FILE
fi
TRAINING_FILE="/tmp/mnist2_feature.part*"
ls ${TRAINING_FILE}
if [ $? -ne 0 ]; then
echo "There is no data in /tmp, please prepare data with "python prepare.py" firstly"
exit 1
else
echo "There are data for mnist:"
echo "`ls ${TRAINING_FILE}`"
fi
# kick off script with roles of 1 and 2, and redirect output to /dev/null
for role in {1..2}; do
......
## Data Alignment Tool
This is an example of using the `alignment` function to build a command line tool of PSI (Private Set Intersection).
### Usage
```bash
python align.py --party_id=$PARTY_ID --endpoints=$END_POINTS --data_file=$FILE_NAME [--is_receiver]
```
### Example
Take data alignment between two parties , e.g., Alice (whose party_id is 0, IP address is 'A.A.A.A', port is 11111) and Bob (whose party_id is 1, IP address is 'B.B.B.B', port is 22222), as an example. Alice and Bob would like to find the intersection of alice_data.txt and bob_data.txt respectively, and Bob is intended to receive the final result.
On each party:
* **Alice**
```bash
python align.py --party_id=0 --endpoints=0:A.A.A.A:11111,1:B.B.B.B:22222 --data_file=alice_data.txt
```
* **Bob**
```bash
python align.py --party_id=1 --endpoints=0:A.A.A.A:11111,1:B.B.B.B:22222 --data_file=bob_data.txt --is_receiver
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Data alignment.
"""
import argparse
import paddle_fl.mpc.data_utils.alignment as alignment
def parse_args():
"""
Parse arguments.
"""
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--party_id", type=int, help="the id of this party")
parser.add_argument("--endpoints", type=str,
default='0:127.0.0.1:11111,1:127.0.0.1:22222',
help="id:ip:port info")
parser.add_argument("--data_file", type=str, help="data file")
parser.add_argument("--is_receiver", action='store_true', help="whether is receiver")
args = parser.parse_args()
return args
def do_align(args):
"""
Do alignment.
"""
# read data from file
input_set = set()
for line in open(args.data_file, 'r'):
input_set.add(line.strip())
# do alignment
result = alignment.align(input_set=input_set,
party_id=args.party_id,
endpoints=args.endpoints,
is_receiver=args.is_receiver)
return result
if __name__ == '__main__':
# use signal for interrupt from keyboard
import signal
signal.signal(signal.SIGINT, signal.SIG_DFL)
args = parse_args()
print('ARGUMENTS: party_id={}, endpoints={}, is_receiver={}, data_file={}'
.format(args.party_id, args.endpoints, args.is_receiver, args.data_file))
align_rst = do_align(args)
print("Alignment result is: {}".format(align_rst))
......@@ -19,18 +19,44 @@ Encrypted data files of feature and label would be generated and saved in `/tmp`
#### (2). Launch Demo with A Shell Script
You should set the env params as follow:
```
export PYTHON=/yor/python
export PATH_TO_REDIS_BIN=/path/to/redis_bin
export LOCALHOST=/your/localhost
export REDIS_PORT=/your/redis/port
```
Launch demo with the `run_standalone.sh` script. The concrete command is:
```bash
bash run_standalone.sh uci_housing_demo.py
bash run_standalone.sh uci_demo.py
```
The loss with cypher text format will be displayed on screen while training. At the same time, the loss data would be also save in `/tmp` directory, and the format of file name is similar to what is described in Step 1.
Besides, predictions would be made in this demo once training is finished. The predictions with cypher text format would also be save in `/tmp` directory.
#### (3). Decrypt Data
Finally, using `load_decrypt_data()` in `process_data.py` script, this demo would decrypt and print the loss and predictions, which can be compared with related results of Paddle plain text model.
For example, users can write the following code into a python script named `decrypt_save.py`, and then run the script with command `python decrypt_save.py decrypt_loss_file decrypt_prediction_file`. The decrypted loss and prediction results would be saved into two files correspondingly.
```python
import sys
import process_data
decrypt_loss_file=sys.argv[1]
decrypt_prediction_file=sys.argv[2]
BATCH_SIZE=10
process_data.load_decrypt_data("/tmp/uci_loss", (1, ), decrypt_loss_file)
process_data.load_decrypt_data("/tmp/uci_prediction", (BATCH_SIZE, ), decrypt_prediction_file)
```
**Note** that remember to delete the loss and prediction files in `/tmp` directory generated in last running, in case of any influence on the decrypted results of current running. For simplifying users operations, we provide the following commands in `run_standalone.sh`, which can delete the files mentioned above when running this script.
```bash
......@@ -58,9 +84,9 @@ Data owner encrypts data. Concrete operations are consistent with “Prepare Dat
According to the suffix of file name, distribute encrypted data files to `/tmp ` directories of all 3 computation parties. For example, send `house_feature.part0` and `house_label.part0` to `/tmp` of party 0 with `scp` command.
#### (3). Modify uci_housing_demo.py
#### (3). Modify uci_demo.py
Each computation party makes the following modifications on `uci_housing_demo.py` according to the environment of machine.
Each computation party makes the following modifications on `uci_demo.py` according to the environment of machine.
* Modify IP Information
......@@ -70,18 +96,6 @@ Each computation party makes the following modifications on `uci_housing_demo.py
pfl_mpc.init("aby3", int(role), "localhost", server, int(port))
```
* Comment Out Codes for Single Machine Running
Comment out the following codes which are used when running on single machine.
```python
import process_data
print("uci_loss:")
process_data.load_decrypt_data("/tmp/uci_loss", (1,))
print("prediction:")
process_data.load_decrypt_data("/tmp/uci_prediction", (BATCH_SIZE,))
```
#### (4). Launch Demo on Each Party
**Note** that Redis service is necessary for demo running. Remember to clear the cache of Redis server before launching demo on each computation party, in order to avoid any negative influences caused by the cached records in Redis. The following command can be used for clear Redis, where REDIS_BIN is the executable binary of redis-cli, SERVER and PORT represent the IP and port of Redis server respectively.
......@@ -93,7 +107,7 @@ $REDIS_BIN -h $SERVER -p $PORT flushall
Launch demo on each computation party with the following command,
```
$PYTHON_EXECUTABLE uci_housing_demo.py $PARTY_ID $SERVER $PORT
$PYTHON_EXECUTABLE uci_demo.py $PARTY_ID $SERVER $PORT
```
where PYTHON_EXECUTABLE is the python which installs PaddleFL, PARTY_ID is the ID of computation party, which is 0, 1, or 2, SERVER and PORT represent the IP and port of Redis server respectively.
......@@ -106,20 +120,19 @@ Similarly, training loss with cypher text format would be printed on the screen
Each computation party sends `uci_loss.part` and `uci_prediction.part` files in `/tmp` directory to the `/tmp` directory of data owner. Data owner decrypts and gets the plain text of loss and predictions with ` load_decrypt_data()` in `process_data.py`.
For example, the following code can be written into a python script to decrypt and print training loss.
For example, the following code can be written into a python script to decrypt and print training loss and predictions.
```python
import sys
import process_data
print("uci_loss:")
process_data.load_decrypt_data("/tmp/uci_loss", (1,))
```
And the following code can be written into a python script to decrypt and print predictions.
```python
import process_data
print("prediction:")
process_data.load_decrypt_data("/tmp/uci_prediction", (BATCH_SIZE,))
decrypt_loss_file=sys.argv[1]
decrypt_prediction_file=sys.argv[2]
BATCH_SIZE=10
process_data.load_decrypt_data("/tmp/uci_loss", (1, ), decrypt_loss_file)
process_data.load_decrypt_data("/tmp/uci_prediction", (BATCH_SIZE, ), decrypt_prediction_file)
```
### 3. Convergence of paddle_fl.mpc vs paddle
......
......@@ -19,17 +19,42 @@ process_data.generate_encrypted_data()
#### 2. 使用shell脚本启动demo
使用`run_standalone.sh`脚本,启动并运行demo,命令如下:
运行demo之前,需设置以下环境变量:
```
export PYTHON=/yor/python
export PATH_TO_REDIS_BIN=/path/to/redis_bin
export LOCALHOST=/your/localhost
export REDIS_PORT=/your/redis/port
```
然后使用`run_standalone.sh`脚本,启动并运行demo,命令如下:
```bash 
bash run_standalone.sh uci_housing_demo.py
bash run_standalone.sh uci_demo.py
```
运行之后将在屏幕上打印训练过程中的密文loss数据,同时,对应的密文loss数据将会保存到/tmp目录下的文件中,文件命名格式类似于步骤1中所述。
此外,在完成训练之后,demo会继续进行预测,并将预测密文结果也保存到/tmp目录下的文件中。
#### 3. 解密数据
最后,demo会使用`process_data.py`脚本中的`load_decrypt_data()`,恢复并打印出明文的loss数据和prediction结果,用以和明文Paddle模型结果进行对比。
例如,将下面的内容写到一个decrypt_save.py脚本中,然后python decrypt_save.py decrypt_loss_file decrypt_prediction_file,将把明文losss数据和预测结果分别保存在文件中。
```python
import sys
import process_data
decrypt_loss_file=sys.argv[1]
decrypt_prediction_file=sys.argv[2]
BATCH_SIZE=10
process_data.load_decrypt_data("/tmp/uci_loss", (1, ), decrypt_loss_file)
process_data.load_decrypt_data("/tmp/uci_prediction", (BATCH_SIZE, ), decrypt_prediction_file)
```
**注意**:再次启动运行demo之前,请先将上次在`/tmp`保存的loss和prediction文件删除,以免影响本次密文数据的恢复结果。为了简化用户操作,我们在`run_standalone.sh`脚本中加入了如下的内容,可以在执行脚本时删除上次数据。
......@@ -60,9 +85,9 @@ fi
`house_feature.part0``house_label.part0`发送到party0的/tmp目录下。
#### 3. 计算party修改uci_housing_demo.py脚本
#### 3. 计算party修改uci_demo.py脚本
各计算party根据自己的机器环境,对uci_housing_demo.py做如下改动:
各计算party根据自己的机器环境,对uci_demo.py做如下改动:
* 修改IP信息
......@@ -72,17 +97,6 @@ fi
pfl_mpc.init("aby3", int(role), "localhost", server, int(port))
```
* 注释掉单机运行所需代码
将脚本中如下代码注释掉,这部分代码用在单机运行case下。
```python
import process_data
print("uci_loss:")
process_data.load_decrypt_data("/tmp/uci_loss", (1,))
print("prediction:")
process_data.load_decrypt_data("/tmp/uci_prediction", (BATCH_SIZE,))
```
#### 4. 各计算party启动demo
......@@ -95,7 +109,7 @@ $REDIS_BIN -h $SERVER -p $PORT flushall
在各计算party分别执行以下命令,启动demo:
```
$PYTHON_EXECUTABLE uci_housing_demo.py $PARTY_ID $SERVER $PORT
$PYTHON_EXECUTABLE uci_demo.py $PARTY_ID $SERVER $PORT
```
其中,PYTHON_EXECUTABLE表示自己安装了PaddleFL的python,PARTY_ID表示计算party的编号,值为0、1或2,SERVER和PORT分别表示redis server的IP地址和端口号。
......@@ -108,20 +122,19 @@ $PYTHON_EXECUTABLE uci_housing_demo.py $PARTY_ID $SERVER $PORT
各计算party将`/tmp`目录下的`uci_loss.part``uci_prediction.part`文件发送到数据方的/tmp目录下。数据方使用process_data.py脚本中的load_decrypt_data()解密恢复出loss数据和prediction数据。
比如,使用如下内容的python脚本,打印解密的loss数据:
例如,将下面的内容写到一个decrypt_save.py脚本中,然后python decrypt_save.py decrypt_loss_file decrypt_prediction_file,将把明文losss数据和预测结果分别保存在文件中。
```python
import sys
import process_data
print("uci_loss:")
process_data.load_decrypt_data("/tmp/uci_loss", (1,))
```
使用如下内容的python脚本,打印解密的prediction数据:
```python
import process_data
print("prediction:")
process_data.load_decrypt_data("/tmp/uci_prediction", (BATCH_SIZE,))
decrypt_loss_file=sys.argv[1]
decrypt_prediction_file=sys.argv[2]
BATCH_SIZE=10
process_data.load_decrypt_data("/tmp/uci_loss", (1, ), decrypt_loss_file)
process_data.load_decrypt_data("/tmp/uci_prediction", (BATCH_SIZE, ), decrypt_prediction_file)
```
### 三. 单机精度测试
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Decrypt Prediction Data.
"""
import sys
import process_data
decrypt_loss_file=sys.argv[1]
decrypt_prediction_file=sys.argv[2]
BATCH_SIZE=10
process_data.load_decrypt_data("/tmp/uci_loss", (1, ), decrypt_loss_file)
print("uci_loss done")
process_data.load_decrypt_data("/tmp/uci_prediction", (BATCH_SIZE, ), decrypt_prediction_file)
print("prediction done")
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Prepare data for UCI Housing.
"""
import process_data
process_data.generate_encrypted_data()
......@@ -17,6 +17,7 @@ Process data for UCI Housing.
import numpy as np
import paddle
import six
import os
from paddle_fl.mpc.data_utils import aby3
sample_reader = paddle.dataset.uci_housing.train()
......@@ -45,10 +46,12 @@ def generate_encrypted_data():
aby3.save_aby3_shares(encrypted_housing_labels, "/tmp/house_label")
def load_decrypt_data(filepath, shape):
def load_decrypt_data(filepath, shape, decrypted_file):
"""
load the encrypted data and reconstruct
"""
if os.path.exists(decrypted_file):
os.remove(decrypted_file)
part_readers = []
for id in six.moves.range(3):
part_readers.append(
......@@ -59,4 +62,6 @@ def load_decrypt_data(filepath, shape):
for instance in aby3_share_reader():
p = aby3.reconstruct(np.array(instance))
print(p)
with open(decrypted_file, 'a+') as f:
for i in p:
f.write(str(i) + '\n')
......@@ -32,12 +32,13 @@
#
# modify the following vars according to your environment
PYTHON="python"
REDIS_HOME="path_to_redis_bin"
SERVER="localhost"
PORT=9937
PYTHON=${PYTHON}
REDIS_HOME=${PATH_TO_REDIS_BIN}
SERVER=${LOCALHOST}
PORT=${REDIS_PORT}
echo "redis home in ${REDIS_HOME}, server is ${SERVER}, port is ${PORT}"
function usage() {
function usage(){
echo 'run_standalone.sh SCRIPT_NAME [ARG...]'
exit 0
}
......@@ -64,14 +65,25 @@ $REDIS_BIN -h $SERVER -p $PORT flushall
# remove temp data generated in last time
LOSS_FILE="/tmp/uci_loss.*"
PRED_FILE="/tmp/uci_prediction.*"
if [ "$LOSS_FILE" ]; then
ls ${LOSS_FILE}
if [ $? -eq 0 ]; then
rm -rf $LOSS_FILE
fi
if [ "$PRED_FILE" ]; then
ls ${PRED_FILE}
if [ $? -eq 0 ]; then
rm -rf $PRED_FILE
fi
TRAINING_FILE="/tmp/house_feature.part*"
ls ${TRAINING_FILE}
if [ $? -ne 0 ]; then
echo "There is no data in /tmp, please prepare data with "python prepare.py" firstly"
exit 1
else
echo "There are data for uci:"
echo "`ls ${TRAINING_FILE}`"
fi
# kick off script with roles of 1 and 2, and redirect output to /dev/null
for role in {1..2}; do
......
......@@ -61,8 +61,8 @@ exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
epoch_num = 20
start_time = time.time()
for epoch_id in range(epoch_num):
start_time = time.time()
step = 0
# Method 1: feed data directly
......@@ -71,17 +71,18 @@ for epoch_id in range(epoch_num):
# Method 2: feed data via loader
for sample in loader():
step_start = time.time()
mpc_loss = exe.run(feed=sample, fetch_list=[avg_loss])
step_end = time.time()
if step % 50 == 0:
print('Epoch={}, Step={}, Loss={}'.format(epoch_id, step,
mpc_loss))
print('Epoch={}, Step={}, batch_cost={:.4f} s, Loss={},'.format(epoch_id, step,
(step_end - step_start), mpc_loss))
with open(loss_file, 'ab') as f:
f.write(np.array(mpc_loss).tostring())
step += 1
end_time = time.time()
print('Mpc Training of Epoch={} Batch_size={}, cost time in seconds:{}'
end_time = time.time()
print('Mpc Training of Epoch={} Batch_size={}, epoch_cost={:.4f} s'
.format(epoch_num, BATCH_SIZE, (end_time - start_time)))
prediction_file = "/tmp/uci_prediction.part{}".format(role)
......@@ -92,9 +93,3 @@ for sample in loader():
with open(prediction_file, 'ab') as f:
f.write(np.array(prediction).tostring())
break
import process_data
print("uci_loss:")
process_data.load_decrypt_data("/tmp/uci_loss", (1, ))
print("prediction:")
process_data.load_decrypt_data("/tmp/uci_prediction", (BATCH_SIZE, ))
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
MPC('int64') Initializer
"""
import numpy as np
import mpc_data_utils as mdu
from paddle.fluid.initializer import Initializer
import paddle.fluid.framework as framework
from paddle.fluid.core import VarDesc
from paddle.fluid import unique_name
from paddle.fluid.data_feeder import check_variable_and_dtype, check_type, check_dtype
class NumpyArrayInitializer(Initializer):
"""Init a mpc parameter with an numpy array (astype('int64'))
This op initialize the variable by numpy array.
Args:
value (numpy): numpy array to initialize the variable
Returns:
A Tensor variable initialized by numpy.
Examples:
.. code-block:: python
import paddle_fl.mpc as pfl
import numpy
weight_share = numpy.array([1,2]).astype('int64')
w_param_attrs = fluid.ParamAttr(name='emb_weight',
learning_rate=0.5,
initializer=pfl_mpc.initializer.NumpyArrayInitializer(weight_share),
trainable=True)
"""
def __init__(self, value):
import numpy
assert isinstance(value, numpy.ndarray)
super(NumpyArrayInitializer, self).__init__()
self._value = value
def __call__(self, var, block):
"""Add constant initialization ops for a variable
Args:
var: Variable that needs to be initialized
block: The block in which initialization ops
should be added
Returns:
the initialization op
"""
assert isinstance(var, framework.Variable)
assert isinstance(block, framework.Block)
out_var = var
out_dtype = var.dtype
np_value = self._value
value_name = "int64_values"
if (out_dtype != VarDesc.VarType.INT64):
raise ValueError("Only 'int64' dtype is supported in paddlefl's initializer, "
"Use paddle.fluid.initializer for other dtype.")
values = [int(v) for v in np_value.flat]
if self._value.size > 1024 * 1024 * 1024:
raise ValueError("The size of input is too big. Please consider "
"saving it to file and 'load_op' to load it")
op = block._prepend_op(
type='assign_value',
outputs={'Out': out_var},
attrs={
'dtype': out_dtype,
'shape': list(self._value.shape),
value_name: values
},
stop_gradient=True)
if not framework.in_dygraph_mode():
var.op = op
return op
class XavierInitializer(Initializer):
"""
This class implements the Xavier weight initializer from the paper
`Understanding the difficulty of training deep feedforward neural
networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
by Xavier Glorot and Yoshua Bengio.
This initializer is designed to keep the scale of the gradients
approximately same in all the layers. In case of Uniform distribution,
the range is [-x, x], where
.. math::
x = \sqrt{\\frac{6.0}{fan\_in + fan\_out}}
In case of Normal distribution, the mean is 0 and the standard deviation
is
.. math::
\sqrt{\\frac{2.0}{fan\_in + fan\_out}}
Args:
uniform (bool,default True): whether to use uniform ,if False use normal distribution
fan_in (float,default None): fan_in for Xavier initialization. If None, it is
inferred from the variable.
fan_out (float,default None): fan_out for Xavier initialization. If None, it is
inferred from the variable.
seed (int): random seed
Note:
It is recommended to set fan_in and fan_out to None for most cases.
Share of the distribution will be returned.
The seeds of three parties should be same.
Examples:
.. code-block:: python
import paddle_fl.mpc as pfl_mpc
queries = pfl_mpc.data(name='x', shape=[2,1], dtype='int64')
fc = pfl_mpc.layers.fc(
input=queries, size=10,
param_attr=pfl_mpc.initializer.Xavier(uniform=False))
"""
def __init__(self, uniform=True, fan_in=None, fan_out=None, seed=0):
assert uniform is not None
assert seed is not None
super(XavierInitializer, self).__init__()
self._uniform = uniform
self._fan_in = fan_in
self._fan_out = fan_out
self._seed = seed
def _compute_fans(self, var):
"""Compute the fan_in and the fan_out for layers
This method computes the fan_in and the fan_out
for neural network layers, if not specified. It is
not possible to perfectly estimate fan_in and fan_out.
This method will estimate it correctly for matrix multiply and
convolutions.
Args:
var: variable for which fan_in and fan_out have to be computed
Returns:
tuple of two integers (fan_in, fan_out)
"""
shape = var.shape
if not shape or len(shape) == 0:
raise ValueError("Shape should be larger than 0 in paddlefl's initializer.")
elif len(shape) == 1:
fan_in = fan_out = 1
elif len(shape) == 2:
fan_in = fan_out = shape[1]
elif len(shape) == 3:
# This is the case for simple matrix multiply
fan_in = shape[1]
fan_out = shape[2]
else:
# Assume this to be a convolutional kernel
# In PaddlePaddle, the shape of the kernel is like:
# [num_filters, num_filter_channels, ...] where the remaining
# dimensions are the filter_size
receptive_field_size = np.prod(shape[3:])
fan_in = shape[2] * receptive_field_size
fan_out = shape[1] * receptive_field_size
return (fan_in, fan_out)
def __call__(self, var, block):
"""Add xavier initialization ops for a variable
Args:
var: Variable that needs to be initialized
block: The block in which initialization ops
should be added
Returns:
the initialization op
"""
assert isinstance(block, framework.Block)
check_variable_and_dtype(var, "Out", ["int64"], "xavier_init")
if (var.dtype != VarDesc.VarType.INT64):
raise ValueError("Only 'int64' dtype is supported in paddlefl's initializer.")
f_in, f_out = self._compute_fans(var)
# If fan_in and fan_out are passed, use them
fan_in = f_in if self._fan_in is None else self._fan_in
fan_out = f_out if self._fan_out is None else self._fan_out
if self._seed == 0:
self._seed = block.program.random_seed
# create tmp var:
# out_var for random number, shape = (1, ...)
# out_expand_var for encrypted random number, shape = (2, ...), is same with var's shape
out_dtype = VarDesc.VarType.FP32
shape_ = list(var.shape)
shape_[0]=1
out_var = block.create_var(
name=unique_name.generate(".".join(
['gaussian_random', var.name, 'tmp'])),
shape=shape_,
dtype=out_dtype,
type=VarDesc.VarType.LOD_TENSOR,
persistable=False)
out_expand_var = block.create_var(
name=unique_name.generate(".".join(
['gaussian_random_expand', var.name, 'tmp'])),
shape=out_var.shape,
dtype=out_dtype,
type=VarDesc.VarType.LOD_TENSOR,
persistable=False)
if self._uniform:
limit = np.sqrt(6.0 / float(fan_in + fan_out))
op = block._prepend_op(
type="uniform_random",
inputs={},
outputs={"Out": out_var},
attrs={
"shape": out_var.shape,
"dtype": out_dtype,
"min": -limit,
"max": limit,
"seed": self._seed
},
stop_gradient=True)
else:
std = np.sqrt(2.0 / float(fan_in + fan_out))
op = block._prepend_op(
type="gaussian_random",
outputs={"Out": out_var},
attrs={
"shape": out_var.shape,
"dtype": out_dtype,
"mean": 0.0,
"std": std,
"seed": self._seed
},
stop_gradient=True)
# convert plaintext into cyphertext
block.append_op(
type="scale",
inputs={"X": out_var},
outputs={"Out": out_var},
attrs={"scale": float(mdu.mpc_one_share)})
# extend one share to two share
block.append_op(
type="concat",
inputs={"X": [out_var, out_var]},
outputs={"Out": [out_expand_var]},
attrs={"axis": 0})
# cast float into int64
block.append_op(
type="cast",
inputs={"X": out_expand_var},
outputs={"Out": var},
attrs={"in_dtype": out_expand_var.dtype,
"out_dtype": var.dtype})
if not framework.in_dygraph_mode():
var.op = op
return op
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module provides embedding operation for paddle_mpc.
"""
from __future__ import print_function
import six
import numpy as np
from paddle import fluid
from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
import warnings
from .framework import MpcVariable
from .mpc_layer_helper import MpcLayerHelper
from .data_utils import aby3
__all__ = ['embedding']
def embedding(input,
size,
is_sparse=False,
is_distributed=False,
padding_idx=None,
param_attr=None,
dtype='int64'):
"""
The operator is used to lookup embeddings vector of ids provided by :attr:`input` .
It automatically constructs a 2D embedding matrix based on the
input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` .
The `input` is the mpc one-hot tensor of indexes, it last dimensions is equal to `emb_size`,
its shape size must be 3, i.e., (2, x, emb_size)
The shape of output Tensor is generated by replacing an emb_size dimension to the
last dimension of the input Tensor shape.
**Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` ,
otherwise the program will throw an exception and exit.
** Params of `is_sparse`, `is_distributed`, `padding_idx` have not been implemented.
.. code-block:: text
Case 1:
input is a Tensor.
input.data = aby3.make_share([[1, 0, 0], [0, 1, 0]])
input.shape = [2, 2, 3]
w.data = aby3.make_share([[1, 2], [2, 3], [3, 4]])
Given size = [2, 3, 2]
output is a Tensor:
out.shape = [2, 2, 2]
out.data.reconstruct = [[1, 2], [2, 3]]
Args:
input(MpcVariable): A Tensor or LoDTensor with type int64, which contains the id information.
The value of the input id should satisfy :math:`0<= id < size[0]` .
size(tuple|list): The shape of lookup table parameter. It should have two elements which
indicates the size of the dictionary of embeddings and the size of each embedding vector respectively.
is_sparse(bool, not implemented): The flag indicating whether to use sparse update. This parameter only
affects the performance of the backwards gradient update. It is recommended to set
True because sparse update is faster. But some optimizer does not support sparse update,
such as :ref:`api_fluid_optimizer_AdadeltaOptimizer` , :ref:`api_fluid_optimizer_AdamaxOptimizer` ,
:ref:`api_fluid_optimizer_DecayedAdagradOptimizer` , :ref:`api_fluid_optimizer_FtrlOptimizer` ,
:ref:`api_fluid_optimizer_LambOptimizer` and :ref:`api_fluid_optimizer_LarsMomentumOptimizer` .
In these case, is_sparse must be False. Default: False.
is_distributed(bool, not implemented): Whether to store the embedding matrix in a distributed manner. Only used
in multi-machine distributed CPU training. Default: False.
padding_idx(int|long|None, not implemented): padding_idx needs to be in the interval [-vocab_size, vocab_size).
If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup
encounters :math:`padding\_idx` in id. And the padding data will not be updated while training.
If set None, it makes no effect to output. Default: None.
param_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the
default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` . In addition,
user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter.
The local word vector needs to be transformed into numpy format, and the shape of local word
vector shoud be consistent with :attr:`size` . Then :ref:`api_fluid_initializer_NumpyArrayInitializer`
is used to load custom or pre-trained word vectors.
dtype(str|core.VarDesc.VarType.INT64): It refers to the data type of output Tensor.
It must be int64.
Returns:
Variable: Embedding Tensor or LoDTensor mapped by input. The data type is the same as :attr:`dtype` .
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle_fl.mpc as pfl
import numpy as np
# data should be mpc one hot tensor
data = pfl.data(name='x', shape=[4, 3], dtype='int64')
# exampel 1
emb_1 = fluid.embedding(input=data, size=[3, 4])
# example 2: load custom or pre-trained word vectors
weight_data = np.random.random(size=(2, 3, 4)) # mpc word vectors with numpy format
w_param_attrs = fluid.ParamAttr(
name="emb_weight",
learning_rate=0.5,
initializer=fluid.initializer.NumpyArrayInitializer(weight_data),
trainable=True)
emb_2 = fluid.embedding(input=data, size=(3, 4), param_attr=w_param_attrs, dtype='int64')
"""
if is_sparse:
warnings.warn("the process on sparse data is the same with dense data,"
" this is, 'is_sparse' always set as 'False' in paddle_encrypted.")
if is_distributed:
warnings.warn("distributed deployment of paddle_encrypted has not been implemented."
" this is, 'is_distributed' always set as 'False' in paddle_encrypted.")
if padding_idx:
warnings.warn("padding_idx is not supported in paddle_encrypted."
" this is, 'padding_idx' always set as 'None' in paddle_encrypted.")
helper = MpcLayerHelper('embedding', **locals())
check_variable_and_dtype(input, 'input', ['int64'], 'paddle_encrypted.embedding')
check_dtype(dtype, 'dtype', ['int64'],
'paddle_encrypted.embedding')
w = helper.create_mpc_parameter(
attr=helper.param_attr, shape=size, dtype='int64', is_bias=False)
tmp = helper.create_mpc_variable_for_type_inference(dtype)
helper.append_op(
type='mpc_lookup_table_v2',
inputs={'Ids': input,
'W': w},
outputs={'Out': tmp},
attrs={
'is_sparse': False,
'is_distributed': False,
'remote_prefetch': False,
'padding_idx': None
})
return tmp
......@@ -30,6 +30,10 @@ from . import ml
from .ml import *
from . import compare
from .compare import *
from . import conv
from .conv import conv2d
from . import rnn
from .rnn import *
__all__ = []
__all__ += basic.__all__
......@@ -37,3 +41,4 @@ __all__ += math.__all__
__all__ += matrix.__all__
__all__ += ml.__all__
__all__ += compare.__all__
__all__ += conv.__all__
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
All layers just related to the neural network.
"""
from __future__ import print_function
import os
import inspect
import warnings
import itertools
import numpy as np
import six
import paddle
from ..mpc_layer_helper import MpcLayerHelper
from ..framework import MpcVariable, check_mpc_variable_and_dtype
from functools import reduce
import paddle
__all__ = [
'conv2d',
]
def _convert_to_list(value, n, name, dtype):
"""
Converts a single numerical type or iterable of numerical
types into an numerical type list.
Arguments:
value: The value to validate and convert. Could an int, or any iterable
of ints.
n: The size of the list to be returned.
name: The name of the argument being validated, e.g. "stride" or
"filter_size". This is only used to format error messages.
dtype: the numerical type of the element of the list to be returned.
Returns:
A list of n dtypes.
Raises:
ValueError: If something else than an int/long or iterable thereof was
passed.
"""
if isinstance(value, dtype):
return [value, ] * n
else:
try:
value_list = list(value)
except TypeError:
raise ValueError("The " + name +
"'s type must be list or tuple. Received: " + str(
value))
if len(value_list) != n:
raise ValueError("The " + name + "'s length must be " + str(n) +
". Received: " + str(value))
for single_value in value_list:
try:
dtype(single_value)
except (ValueError, TypeError):
raise ValueError(
"The " + name + "'s type must be a list or tuple of " + str(
n) + " " + str(dtype) + " . Received: " + str(
value) + " "
"including element " + str(single_value) + " of type" + " "
+ str(type(single_value)))
return value_list
def _is_symmetric_padding(padding, data_dim):
"""
Check whether padding is symmetrical.
"""
assert len(padding) == data_dim * 2 or len(padding) == data_dim
is_sys = True
if len(padding) == data_dim * 2:
for i in range(data_dim):
if padding[i * 2] != padding[i * 2 + 1]:
is_sys = False
return is_sys
def conv2d(input,
num_filters,
filter_size,
stride=1,
padding=0,
dilation=1,
groups=None,
param_attr=None,
bias_attr=None,
act=None,
name=None,
data_format="NCHW"):
"""
The convolution2D layer calculates the output based on the input, filter
and strides, paddings, dilations, groups parameters. Input and
Output are in NCHW or NHWC format, where N is batch size, C is the number of
channels, H is the height of the feature, and W is the width of the feature.
Filter is in MCHW format, where M is the number of output image channels,
C is the number of input image channels, H is the height of the filter,
and W is the width of the filter. If the groups is greater than 1,
C will equal the number of input image channels divided by the groups.
Please refer to UFLDL's `convolution
<http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_
for more details.
If bias attribution and activation type are provided, bias is added to the
output of the convolution, and the corresponding activation function is
applied to the final result.
For each input :math:`X`, the equation is:
.. math::
Out = \sigma (W \\ast X + b)
Where:
* :math:`X`: Input value, a tensor with NCHW or NHWC format.
* :math:`W`: Filter value, a tensor with MCHW format.
* :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
* :math:`\\sigma`: Activation function.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
Example:
- Input:
Input shape: :math:`(N, C_{in}, H_{in}, W_{in})`
Filter shape: :math:`(C_{out}, C_{in}, H_f, W_f)`
- Output:
Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
Where
.. math::
H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\
W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1
Args:
input (Variable): The input is 4-D Tensor with shape [N, C, H, W], the data type
of input is float16 or float32 or float64.
num_filters(int): The number of filter. It is as same as the output
image channel.
filter_size (int|tuple): The filter size. If filter_size
is a tuple, it must contain two integers, (filter_size_height,
filter_size_width). Otherwise, filter_size_height = filter_size_width =\
filter_size.
stride (int|tuple): The stride size. It means the stride in convolution.
If stride is a tuple, it must contain two integers, (stride_height, stride_width).
Otherwise, stride_height = stride_width = stride. Default: stride = 1.
padding (string|int|list|tuple): The padding size. It means the number of zero-paddings
on both sides for each dimension.If `padding` is a string, either 'VALID' or
'SAME' which is the padding algorithm. If padding size is a tuple or list,
it could be in three forms: `[pad_height, pad_width]` or
`[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when
`data_format` is `"NCHW"`, `padding` can be in the form `[[0,0], [0,0],
[pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
when `data_format` is `"NHWC"`, `pool_padding` can be in the form
`[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
Default: padding = 0.
dilation (int|tuple): The dilation size. It means the spacing between the kernel
points. If dilation is a tuple, it must contain two integers, (dilation_height,
dilation_width). Otherwise, dilation_height = dilation_width = dilation.
Default: dilation = 1.
groups (int): The groups number of the Conv2d Layer. According to grouped
convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
the first half of the filters is only connected to the first half
of the input channels, while the second half of the filters is only
connected to the second half of the input channels. Default: groups=1.
param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
of conv2d. If it is set to None or one attribute of ParamAttr, conv2d
will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with :math:`Normal(0.0, std)`,
and the :math:`std` is :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. Default: None.
bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of conv2d.
If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, conv2d
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None.
act (str): Activation type, if it is set to None, activation is not appended.
Default: None
name(str|None): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
data_format (str, optional): Specify the data format of the input, and the data format of the output
will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`.
Returns:
A Variable holding Tensor representing the conv2d, whose data type is the
same with input. If act is None, the tensor variable storing the convolution
result, and if act is not None, the tensor variable storing convolution
and non-linearity activation result.
Raises:
ValueError: If using "depthwise_conv2d" (which is not supported yet).
ValueError: If `data_format` is not "NCHW" or "NHWC".
ValueError: If the channel dimmention of the input is less than or equal to zero.
ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0
or the element corresponding to the input's channel is not 0.
ShapeError: If the input is not 4-D Tensor.
ShapeError: If the input's dimension size and filter's dimension size not equal.
ShapeError: If the dimension size of input minus the size of `stride` is not 2.
ShapeError: If the number of input channels is not equal to filter's channels * groups.
ShapeError: If the number of output channels is not be divided by groups.
Examples:
.. code-block:: python
import paddle.fluid as fluid
data = fluid.data(name='data', shape=[None, 3, 32, 32], dtype='float32')
conv2d = fluid.layers.conv2d(input=data, num_filters=2, filter_size=3, act="relu")
"""
check_mpc_variable_and_dtype(input, 'input', ['int64'],
'conv2d')
num_channels = input.shape[1 + 1]
use_cudnn = False
if not isinstance(use_cudnn, bool):
raise ValueError("Attr(use_cudnn) should be True or False. Received "
"Attr(use_cudnn): %s. " % str(use_cudnn))
if data_format not in ["NCHW", "NHWC"]:
raise ValueError(
"Attr(data_format) should be 'NCHW' or 'NHWC'. Received "
"Attr(data_format): %s." % str(data_format))
channel_last = (data_format == "NHWC")
num_channels = input.shape[3 + 1] if channel_last else input.shape[1 + 1]
if num_channels < 0:
raise ValueError(
"The channel dimmention of the input(%s) should be defined. "
"Received: %s." % (str(input.shape), str(num_channels)))
assert param_attr is not False, "param_attr should not be False here."
l_type = 'conv2d'
if (num_channels == groups and num_filters % num_channels == 0 and
not use_cudnn):
l_type = 'depthwise_conv2d'
raise ValueError("l_type"
"%s is not implemented yet. " % (str(l_type)))
helper = MpcLayerHelper(l_type, **locals())
dtype = helper.input_dtype()
if groups is None:
num_filter_channels = num_channels
else:
if num_channels % groups != 0:
raise ValueError(
"the channel of input must be divisible by groups,"
"received: the channel of input is {}, the shape of input is {}"
", the groups is {}".format(num_channels, input.shape, groups))
num_filter_channels = num_channels // groups
filter_size = _convert_to_list(filter_size, 2, 'filter_size', np.int)
stride = _convert_to_list(stride, 2, 'stride', np.int)
dilation = _convert_to_list(dilation, 2, 'dilation', np.int)
# padding
def _update_padding(padding, data_format):
""" update padding accroding to data_format
raise ValueError if padding is not supported
"""
def is_list_or_tuple(ele):
""" return True if ele is a list or tuple
"""
if isinstance(ele, list) or isinstance(ele, tuple):
return True
return False
if is_list_or_tuple(padding) and len(padding) == 4:
if is_list_or_tuple(padding[0]) and (data_format == "NCHW"):
if not (padding[0] == [0, 0] and padding[1] == [0, 0]):
raise ValueError(
"Non-zero padding(%s) in the batch or channel dimensions "
"is not supported." % str(padding))
padding = padding[2:4]
padding = list(itertools.chain(*padding))
elif is_list_or_tuple(padding[0]) and (data_format == "NHWC"):
if not (padding[0] == [0, 0] and padding[3] == [0, 0]):
raise ValueError(
"Non-zero padding(%s) in the batch or channel dimensions "
"is not supported." % str(padding))
padding = padding[1:3]
padding = list(itertools.chain(*padding))
padding = _convert_to_list(padding, 4, 'padding', np.int)
if _is_symmetric_padding(padding, 2):
padding = [padding[0], padding[2]]
else:
padding = _convert_to_list(padding, 2, 'padding', np.int)
return padding
padding_algorithm = "EXPLICIT"
if isinstance(padding, str):
padding = padding.upper()
if padding not in ["SAME", "VALID"]:
raise ValueError(
"Unknown padding: '%s'. It can only be 'SAME' or 'VALID'." %
str(padding))
if padding == "VALID":
padding_algorithm = "VALID"
padding = [0, 0]
elif padding == "SAME":
padding_algorithm = "SAME"
padding = [0, 0]
padding = _update_padding(padding, data_format)
filter_shape = [num_filters, int(num_filter_channels)] + filter_size
filter_param = helper.create_mpc_parameter(
attr=helper.param_attr,
shape=filter_shape,
dtype=dtype)
pre_bias = helper.create_mpc_variable_for_type_inference(dtype)
helper.append_op(
type= 'mpc_' + l_type,
inputs={
'Input': input,
'Filter': filter_param,
},
outputs={"Output": pre_bias},
attrs={
'strides': stride,
'paddings': padding,
'dilations': dilation,
'groups': groups,
'use_cudnn': use_cudnn,
'use_mkldnn': False,
'fuse_relu_before_depthwise_conv': False,
"padding_algorithm": padding_algorithm,
"data_format": data_format,
})
if data_format == 'NCHW':
pre_act = helper.append_mpc_bias_op(pre_bias, dim_start=1, dim_end=2 + 1)
else:
pre_act = helper.append_mpc_bias_op(pre_bias, dim_start=3, dim_end=4 + 1)
return helper.append_mpc_activation(pre_act)
......@@ -14,10 +14,15 @@
"""
mpc ml op layers.
"""
import os
import numpy
from functools import reduce
import mpc_data_utils as mdu
from paddle.fluid.data_feeder import check_type, check_dtype
import numpy
import paddle.fluid.layers.utils as utils
from paddle.fluid.initializer import Constant
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.framework import Variable
from ..framework import MpcVariable
from ..framework import check_mpc_variable_and_dtype
from ..mpc_layer_helper import MpcLayerHelper
......@@ -27,6 +32,9 @@ __all__ = [
'relu',
'softmax',
'sigmoid_cross_entropy_with_logits',
'softmax_with_cross_entropy',
'pool2d',
'batch_norm',
]
......@@ -221,10 +229,14 @@ def relu(input, name=None):
helper = MpcLayerHelper('relu', **locals())
dtype = helper.input_dtype(input_param_name='input')
out = helper.create_mpc_variable_for_type_inference(dtype)
derivative = helper.create_mpc_variable_for_type_inference(dtype)
helper.append_op(
type="mpc_relu",
inputs={"X": input},
outputs={"Y": out})
outputs={
"Out": out,
"Derivative": derivative}
)
return out
......@@ -259,3 +271,282 @@ def sigmoid_cross_entropy_with_logits(x,
"Label": label},
outputs={"Out": out})
return out
def softmax_with_cross_entropy(logits,
label,
soft_label=False,
return_softmax=False,
axis=-1,
use_relu=False,
use_long_div=True):
"""
forward: out = softmax(x). todo: add cross_entropy
backward: dx = dout.expand * (softmax(x) - label)
use_relu: False(default): output = exp(x_i) / sum(exp(x_i))
True: output = relu(x_i) / sum(relu(x_i))
use_long_div: True(default): long division implemented by boolean circuit.
slow with high precision.
range of quotient: [0, 2^20).
False: find inverse of divisor by Newton's method.
fast with low precision.
range of divisor: (0, 2^15).
"""
attrs = {
'soft_label': soft_label,
'axis': axis,
'use_relu': use_relu,
'use_long_div': use_long_div
}
helper = MpcLayerHelper('softmax_with_cross_entropy', **locals())
softmax = helper.create_variable_for_type_inference(dtype=logits.dtype)
loss = helper.create_variable_for_type_inference(dtype=logits.dtype)
helper.append_op(
type='mpc_softmax_with_cross_entropy',
inputs={'Logits': logits,
'Label': label},
outputs={'Softmax': softmax,
'Loss': loss},
attrs=attrs)
if return_softmax:
return loss, softmax
else:
raise NotImplementedError("'return_softmax' should be true. Loss is NULL, only for backward.")
def pool2d(input,
pool_size=-1,
pool_type="max",
pool_stride=1,
pool_padding=0,
global_pooling=False,
ceil_mode=False,
name=None,
exclusive=True,
data_format="NCHW"):
"""
pool2d
"""
if pool_type not in ["max"]:
raise ValueError(
"Unknown Attr(pool_type): '%s'. It can only be 'max'.",
str(pool_type))
if global_pooling is False and pool_size == -1:
raise ValueError(
"When Attr(global_pooling) is False, Attr(pool_size) must be passed "
"and be a valid value. Received pool_size: %s." % str(pool_size))
if data_format not in ["NCHW"]:
raise ValueError(
"Attr(data_format) should be 'NCHW'. Received "
"Attr(data_format): %s." % str(data_format))
pool_size = utils.convert_to_list(pool_size, 2, 'pool_size')
pool_stride = utils.convert_to_list(pool_stride, 2, 'pool_stride')
def update_padding(padding, data_format):
"""
update_padding: convert to 2-dimension padding
"""
def is_list_or_tuple(ele):
"""
return true if ele is list or tuple.
"""
if isinstance(ele, list) or isinstance(ele, tuple):
return True
return False
# covert padding size to 2 (H, W)
if is_list_or_tuple(padding) and len(padding) == 4:
if is_list_or_tuple(padding[0]):
if not (padding[0] == [0, 0] and padding[1] == [0, 0]):
raise ValueError(
"Non-zero pool_padding(%s) in the batch or channel dimensions "
"is not supported." % str(padding))
padding = padding[2:4] # data_format == "NCHW":
padding = [ele for a_list in padding for ele in a_list]
padding = utils.convert_to_list(padding, 4, 'padding')
if utils._is_symmetric_padding(padding, 2):
padding = [padding[0], padding[2]]
else:
padding = utils.convert_to_list(padding, 2, 'padding')
return padding
padding_algorithm = "EXPLICIT"
if isinstance(pool_padding, str):
pool_padding = pool_padding.upper()
if pool_padding not in ["SAME", "VALID"]:
raise ValueError(
"Unknown Attr(pool_padding): '%s'. It can only be 'SAME' or 'VALID'."
% str(pool_padding))
if pool_padding == "VALID":
padding_algorithm = "VALID"
pool_padding = [0, 0]
if ceil_mode != False:
raise ValueError(
"When Attr(pool_padding) is \"VALID\", Attr(ceil_mode) must be False. "
"Received ceil_mode: True.")
elif pool_padding == "SAME":
padding_algorithm = "SAME"
pool_padding = [0, 0]
pool_padding = update_padding(pool_padding, data_format) # [h, w]
op_type = 'pool2d'
helper = MpcLayerHelper(op_type, **locals())
dtype = helper.input_dtype()
pool_out = helper.create_mpc_variable_for_type_inference(dtype)
one_hot_tensor = helper.create_variable_for_type_inference(dtype=input.dtype)
helper.append_op(
type='mpc_' + op_type,
inputs={"X": input},
outputs={"Out": pool_out,
"One_hot_tensor": one_hot_tensor},
attrs={
"pooling_type": pool_type,
"ksize": pool_size,
"global_pooling": global_pooling,
"strides": pool_stride,
"paddings": pool_padding,
"padding_algorithm": padding_algorithm,
"ceil_mode": ceil_mode,
"exclusive": exclusive,
"data_format": data_format,
})
return pool_out
def batch_norm(input,
act=None,
is_test=False,
momentum=0.9,
epsilon=1e-05,
param_attr=None,
bias_attr=None,
data_layout='NCHW',
in_place=False,
name=None,
moving_mean_name=None,
moving_variance_name=None,
do_model_average_for_mean_and_var=True,
use_global_stats=False):
"""
**Batch Normalization Layer**
"""
assert bias_attr is not False, "bias_attr should not be False in batch_norm."
helper = MpcLayerHelper('batch_norm', **locals())
check_mpc_variable_and_dtype(input, 'input', ['int64'], 'batch_norm')
dtype = helper.input_dtype()
has_reserve_space = False
if data_layout == 'NHWC':
flag = os.environ.get('FLAGS_cudnn_batchnorm_spatial_persistent')
if flag is not None and flag.lower() in ['true', '1']:
has_reserve_space = True
# plaintext_dtype = core.VarDesc.VarType.FP32
input_shape = input.shape
if data_layout == 'NCHW':
channel_num = input_shape[2]
else:
if data_layout == 'NHWC':
channel_num = input_shape[-1]
else:
raise ValueError("unsupported data layout:" + data_layout)
param_shape = [channel_num]
mpc_param_shape = [2, channel_num]
# create parameter
scale = helper.create_mpc_parameter(
attr=helper.param_attr,
shape=param_shape,
dtype=dtype,
default_initializer=Constant(mdu.mpc_one_share))
bias = helper.create_mpc_parameter(
attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True)
mean = helper.create_mpc_parameter(
attr=ParamAttr(
name=moving_mean_name,
initializer=Constant(0),
trainable=False,
do_model_average=do_model_average_for_mean_and_var),
shape=param_shape,
dtype=dtype)
mean.stop_gradient = True
variance = helper.create_mpc_parameter(
attr=ParamAttr(
name=moving_variance_name,
initializer=Constant(mdu.mpc_one_share), # plaintext: 1
trainable=False,
do_model_average=do_model_average_for_mean_and_var),
shape=param_shape,
dtype=dtype)
variance.stop_gradient = True
# create output
# mean and mean_out share the same memory
mean_out = mean
# variance and variance out share the same memory
variance_out = variance
saved_mean = helper.create_mpc_variable_for_type_inference(
dtype=dtype, stop_gradient=True)
saved_variance = helper.create_mpc_variable_for_type_inference(
dtype=dtype, stop_gradient=True)
#reserve_space = None
#if has_reserve_space:
# reserve_space = helper.create_variable_for_type_inference(
# dtype=core.VarDesc.VarType.FP16, stop_gradient=True)
batch_norm_out = input if in_place else \
helper.create_mpc_variable_for_type_inference(dtype)
inputs = {
"X": input,
"Scale": scale,
"Bias": bias,
"Mean": mean,
"Variance": variance
}
attrs = {
"epsilon": epsilon,
"is_test": is_test,
"data_layout": data_layout,
"use_mkldnn": False,
"fuse_with_relu": False,
"use_global_stats": use_global_stats
}
if isinstance(momentum, Variable):
inputs['MomemtumTensor'] = momentum
else:
attrs['momentum'] = momentum
outputs = {
"Y": batch_norm_out,
"MeanOut": mean_out,
"VarianceOut": variance_out,
"SavedMean": saved_mean,
"SavedVariance": saved_variance
}
#if reserve_space is not None:
# outputs["ReserveSpace"] = reserve_space
helper.append_op(
type="mpc_batch_norm", inputs=inputs, outputs=outputs, attrs=attrs)
return helper.append_activation(batch_norm_out)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
mpc rnn op layers.
"""
from paddle.fluid.framework import in_dygraph_mode
from ..mpc_layer_helper import MpcLayerHelper
from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
def dynamic_gru(input,
size,
param_attr=None,
bias_attr=None,
is_reverse=False,
gate_activation='sigmoid',
candidate_activation='relu',
h_0=None,
origin_mode=False):
"""
**Note: The input type of this must be LoDTensor. If the input type to be
processed is Tensor, use** :ref:`api_fluid_layers_StaticRNN` .
This operator is used to perform the calculations for a single layer of
Gated Recurrent Unit (GRU) on full sequences step by step. The calculations
in one time step support these two modes:
If ``origin_mode`` is True, then the formula used is from paper
`Learning Phrase Representations using RNN Encoder Decoder for Statistical
Machine Translation <https://arxiv.org/pdf/1406.1078.pdf>`_ .
.. math::
u_t & = act_g(W_{ux}x_{t} + W_{uh}h_{t-1} + b_u)
r_t & = act_g(W_{rx}x_{t} + W_{rh}h_{t-1} + b_r)
\\tilde{h_t} & = act_c(W_{cx}x_{t} + W_{ch}(r_t \odot h_{t-1}) + b_c)
h_t & = u_t \odot h_{t-1} + (1-u_t) \odot \\tilde{h_t}
if ``origin_mode`` is False, then the formula used is from paper
`Empirical Evaluation of Gated Recurrent Neural Networks on Sequence
Modeling <https://arxiv.org/pdf/1412.3555.pdf>`_
.. math::
u_t & = act_g(W_{ux}x_{t} + W_{uh}h_{t-1} + b_u)
r_t & = act_g(W_{rx}x_{t} + W_{rh}h_{t-1} + b_r)
\\tilde{h_t} & = act_c(W_{cx}x_{t} + W_{ch}(r_t \odot h_{t-1}) + b_c)
h_t & = (1-u_t) \odot h_{t-1} + u_t \odot \\tilde{h_t}
:math:`x_t` is the input of current time step, but it is not from ``input`` .
This operator does not include the calculations :math:`W_{ux}x_{t}, W_{rx}x_{t}, W_{cx}x_{t}` ,
**Note** thus a fully-connect layer whose size is 3 times of ``size`` should
be used before this operator, and the output should be used as ``input`` here.
:math:`h_{t-1}` is the hidden state from previous time step.
:math:`u_t` , :math:`r_t` , :math:`\\tilde{h_t}` and :math:`h_t` stand for
update gate, reset gate, candidate hidden and hidden output separately.
:math:`W_{uh}, b_u` , :math:`W_{rh}, b_r` and :math:`W_{ch}, b_c` stand for
the weight matrix and bias used in update gate, reset gate, candidate hidden
calculations. For implementation, the three weight matrix are merged into a
tensor shaped :math:`[D, D \\times 3]` , the three bias are concatenated as
a tensor shaped :math:`[1, D \\times 3]` , where :math:`D` stands for the
hidden size; The data layout of weight tensor is: :math:`W_{uh}` and :math:`W_{rh}`
are concatenated with shape :math:`[D, D \\times 2]` lying on the first part,
and :math:`W_{ch}` lying on the latter part with shape :math:`[D, D]` .
Args:
input(Variable): A LoDTensor whose lod level is 1, representing the input
after linear projection. Its shape should be :math:`[T, 2, D \\times 3]` ,
which is transpose mpc input by axis {1, 0, 2}, and set mpc shares lod,
where :math:`T` stands for the total sequence lengths in this mini-batch,
:math:`D` for the hidden size. The data type should be int64.
size(int): Indicate the hidden size.
param_attr(ParamAttr, optional): To specify the weight parameter property.
Default: None, which means the default weight parameter property is used.
See usage for details in :ref:`api_fluid_ParamAttr` .
bias_attr (ParamAttr, optional): To specify the bias parameter property.
Default: None, which means the default bias parameter property is used.
See usage for details in :ref:`api_fluid_ParamAttr` .
is_reverse(bool, optional): Whether to compute in the reversed order of
input sequences. Default False.
gate_activation(str, optional): The activation function corresponding to
:math:`act_g` in the formula. Only 'sigmoid' is supported now.
candidate_activation(str, optional): The activation function corresponding to
:math:`act_c` in the formula. Only "relu" is supported now.
h_0 (Variable, optional): A Tensor representing the initial hidden state.
It not provided, the default initial hidden state is 0. The shape is
:math:`[2, N, D]` , where :math:`N` is the number of sequences in the
mini-batch, :math:`D` for the hidden size. The data type should be
same as ``input`` . Default None.
Returns:
Variable: A LoDTensor whose lod level is 1 and shape is :math:`[2, T, D]` , \
where :math:`T` stands for the total sequence lengths in this mini-batch \
:math:`D` for the hidden size. It represents GRU transformed sequence output, \
and has the same lod and data type with ``input`` .
Examples:
.. code-block:: python
import paddle.fluid as fluid
dict_dim, emb_dim = 128, 64
data = fluid.data(name='sequence',
shape=[None],
dtype='int64',
lod_level=1)
emb = fluid.embedding(input=data, size=[dict_dim, emb_dim])
hidden_dim = 512
x = fluid.layers.fc(input=emb, size=hidden_dim * 3)
hidden = fluid.layers.dynamic_gru(input=x, size=hidden_dim)
"""
assert in_dygraph_mode(
) is not True, "please use gru instead of dynamic_gru in dygraph mode!"
helper = MpcLayerHelper('mpc_gru', **locals())
dtype = helper.input_dtype()
check_variable_and_dtype(input, 'input', ['int64'], 'mpc_gru')
check_dtype(dtype, 'dtype', ['int64'], 'mpc_gru')
weight = helper.create_mpc_parameter(
attr=helper.param_attr, shape=[size, 3 * size], dtype=dtype)
bias = helper.create_mpc_parameter(
attr=helper.bias_attr, shape=[1, 3 * size], dtype=dtype, is_bias=True)
batch_size = input.shape[0]
inputs = {'Input': input, 'Weight': weight, 'Bias': bias}
if h_0:
assert h_0.shape == (
2, batch_size, size
), 'The shape of h0 should be(batch_size, %d)' % size
inputs['H0'] = h_0
hidden = helper.create_mpc_variable_for_type_inference(dtype)
batch_gate = helper.create_mpc_variable_for_type_inference(dtype)
batch_reset_hidden_prev = helper.create_mpc_variable_for_type_inference(dtype)
batch_hidden = helper.create_mpc_variable_for_type_inference(dtype)
helper.append_op(
type='mpc_gru',
inputs=inputs,
outputs={
'Hidden': hidden,
'BatchGate': batch_gate,
'BatchResetHiddenPrev': batch_reset_hidden_prev,
'BatchHidden': batch_hidden
},
attrs={
'is_reverse': is_reverse,
'gate_activation': gate_activation,
'activation': candidate_activation,
'origin_mode': origin_mode
})
return hidden
......@@ -29,7 +29,7 @@ from paddle.fluid.initializer import ConstantInitializer
# mpc_paddle module
from .framework import MpcVariable, MpcParameter, create_mpc_parameter, create_mpc_var
from .initializer import XavierInitializer
class MpcLayerHelper(LayerHelper):
"""
......@@ -100,7 +100,7 @@ class MpcLayerHelper(LayerHelper):
if is_bias:
attr._set_default_bias_initializer()
else:
attr._set_default_initializer(ConstantInitializer(0))
attr._set_default_initializer(XavierInitializer(seed=65536))
else:
attr._set_default_initializer(default_initializer)
......@@ -215,11 +215,14 @@ class MpcLayerHelper(LayerHelper):
tmp = self.create_mpc_variable_for_type_inference(
dtype=input_var.dtype)
derivative = self.create_mpc_variable_for_type_inference(
dtype=input_var.dtype)
# add "mpc_" as prefix of mpc activation
self.append_op(
type="mpc_" + act_type,
inputs={"X": [input_var]},
outputs={"Out": [tmp]},
outputs={"Out": [tmp],
"Derivative": [derivative]},
attrs=act)
return tmp
......
......@@ -22,6 +22,8 @@ from paddle.fluid.framework import Variable
from paddle.fluid.clip import error_clip_callback
from paddle.fluid import unique_name
from paddle.fluid.initializer import Constant
from paddle import fluid
from paddle.fluid import core
from .backward import append_backward
from .mpc_layer_helper import MpcLayerHelper
......@@ -135,7 +137,7 @@ class MPCSGDOptimizer(Optimizer):
name=unique_name.generate("learning_rate"),
shape=[1],
value=float(self._learning_rate),
dtype='double',
dtype='float32',
persistable=True)
def _create_param_lr(self, param_and_grad):
......@@ -166,6 +168,240 @@ class MPCSGDOptimizer(Optimizer):
return self._learning_rate_map.get(program, None)
class MPCAdamOptimizer(Optimizer):
"""
The Adam optimizer uses an optimization described at the end
of section 2 of `Adam paper <https://arxiv.org/abs/1412.6980>`_ ,
it can dynamically adjusts the learning rate of each parameter using
the 1st moment estimates and the 2nd moment estimates of the gradient.
The parameter ``param_out`` update rule with gradient ``grad``:
.. math::
t & = t + 1
moment\_1\_out & = {\\beta}_1 * moment\_1 + (1 - {\\beta}_1) * grad
moment\_2\_out & = {\\beta}_2 * moment\_2 + (1 - {\\beta}_2) * grad * grad
learning\_rate & = learning\_rate * \\
\\frac{\sqrt{1 - {\\beta}_2^t}}{1 - {\\beta}_1^t}
param\_out & = param - learning\_rate * \\frac{moment\_1}{\sqrt{moment\_2} + \epsilon}
Related paper: `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_
Args:
learning_rate (float|Variable, optional): The learning rate used to update ``Parameter``.
It can be a float value or a ``Variable`` with a float type. The default value is 0.001.
beta1 (float|Variable, optional): The exponential decay rate for the 1st moment estimates.
It should be a float number or a Variable with shape [1] and data type as float32.
The default value is 0.9.
beta2 (float|Variable, optional): The exponential decay rate for the 2nd moment estimates.
It should be a float number or a Variable with shape [1] and data type as float32.
The default value is 0.999.
epsilon (float, optional): A small float value for numerical stability.
The default value is 1e-08.
name (str, optional): Normally there is no need for user to set this property.
For more information, please refer to :ref:`api_guide_Name`.
The default value is None.
"""
_moment1_acc_str = "moment1"
_moment2_acc_str = "moment2"
_beta1_pow_acc_str = "beta1_pow_acc"
_beta2_pow_acc_str = "beta2_pow_acc"
def __init__(self,
learning_rate=0.001,
beta1=0.9,
beta2=0.999,
epsilon=1e-4,
name=None):
assert learning_rate is not None
assert beta1 is not None
assert beta2 is not None
assert epsilon is not None
super(MPCAdamOptimizer, self).__init__(
learning_rate=learning_rate,
name=name)
self.type = "adam"
self._beta1 = beta1
self._beta2 = beta2
self._epsilon = epsilon
self.type = "mpc_adam"
def _create_accumulators(self, block, parameters):
assert isinstance(block, framework.Block)
# Create accumulator tensors for first and second moments
for p in parameters:
self._add_accumulator(self._moment1_acc_str, p)
self._add_accumulator(self._moment2_acc_str, p)
self._add_accumulator(
name=self._beta1_pow_acc_str,
param=p,
fill_value=0.9 if isinstance(self._beta1, Variable) \
else self._beta1,
shape=[1],
type=core.VarDesc.VarType.LOD_TENSOR, device='cpu', dtype='float32')
self._add_accumulator(
name=self._beta2_pow_acc_str,
param=p,
fill_value=0.999 if isinstance(self._beta2, Variable) \
else self._beta2,
shape=[1],
type=core.VarDesc.VarType.LOD_TENSOR, device='cpu', dtype='float32')
def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block)
moment1 = self._get_accumulator(self._moment1_acc_str,
param_and_grad[0])
moment2 = self._get_accumulator(self._moment2_acc_str,
param_and_grad[0])
beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str,
param_and_grad[0])
beta2_pow_acc = self._get_accumulator(self._beta2_pow_acc_str,
param_and_grad[0])
lr = self._create_param_lr(param_and_grad)
# create the adam optimize op
inputs = {
"Param": [param_and_grad[0]],
"Grad": [param_and_grad[1]],
"LearningRate": [lr],
"Moment1": [moment1],
"Moment2": [moment2],
"Beta1Pow": [beta1_pow_acc],
"Beta2Pow": [beta2_pow_acc]
}
outputs = {
"ParamOut": [param_and_grad[0]],
"Moment1Out": [moment1],
"Moment2Out": [moment2],
"Beta1PowOut": [beta1_pow_acc],
"Beta2PowOut": [beta2_pow_acc],
}
attrs = {
"epsilon": self._epsilon,
}
if isinstance(self._beta1, Variable):
inputs['Beta1Tensor'] = self._beta1
else:
attrs['beta1'] = self._beta1
if isinstance(self._beta2, Variable):
inputs['Beta2Tensor'] = self._beta2
else:
attrs['beta2'] = self._beta2
adam_op = block.append_op(
type=self.type,
inputs=inputs,
outputs=outputs,
attrs=attrs,
stop_gradient=True)
return adam_op
def _create_global_learning_rate(self):
lr = self._global_learning_rate()
if isinstance(lr, framework.Variable):
return
else:
if not isinstance(self._learning_rate, float):
raise TypeError(
"learning rate variable is create outside optimizer,"
"can not create new learning rate variable for new program")
# create learning rate in the current main program
self._learning_rate_map[framework.default_main_program(
)] = create_global_var(
name=unique_name.generate("learning_rate"),
shape=[1],
value=float(self._learning_rate),
dtype='float32',
persistable=True)
def _create_param_lr(self, param_and_grad):
"""
create learning rate parameter
"""
# create learning rate variable for every parameter
param = param_and_grad[0]
param_lr = param.optimize_attr['learning_rate']
if type(param_lr) == Variable:
return param_lr
else:
if param_lr == 1.0:
return self._global_learning_rate()
else:
with fluid.default_main_program()._lr_schedule_guard(
is_with_opt=True), framework.name_scope(
'scale_with_param_lr'):
return self._global_learning_rate() * param_lr
def _global_learning_rate(self, program=None):
"""
get global decayed learning rate
:return:
"""
if program is None:
program = framework.default_main_program()
return self._learning_rate_map.get(program, None)
def backward(self,
loss,
startup_program=None,
parameter_list=None,
no_grad_set=None,
callbacks=None):
"""
The first part of ``minimize``, do auto-diff to append backward operations for
the current program.
Args:
loss (Variable): ``loss`` variable to run optimizations.
startup_program (Program, optional): :ref:`api_fluid_Program` for
initializing parameters in ``parameter_list``. The default value
is None, at this time :ref:`api_fluid_default_startup_program` will be used.
parameter_list (list, optional): List of ``Variable`` names to update
to minimize ``loss``. The default value is None, at this time all parameters
will be updated.
no_grad_set (set, optional): Set of ``Variable`` objects that don't need
to be updated. The default value is None.
callbacks (list, optional): list of callable objects to run when appending backward
operator for one parameter. The default value is None.
Return:
list: list of (param, grad) variable pairs, param is ``Parameter``,
grad is the gradient value corresponding to the parameter.
Examples:
See examples in ``apply_gradients``.
"""
no_grad_set = self._get_no_grad_set(loss, no_grad_set)
self._dtype = loss.dtype
if callbacks is None:
callbacks = [error_clip_callback]
else:
assert (isinstance(callbacks, list))
program = loss.block.program
assert len(loss.shape) == 2 and loss.shape[0] == 2 and loss.shape[1] == 1, \
"The loss.shape should be (2L,), but the current loss.shape is {}. " \
"Maybe that you should call fluid.layers.mean to process the current loss.".format(
loss.shape)
with program_guard(program, startup_program):
params_grads = append_backward(loss, parameter_list, no_grad_set,
callbacks)
# Note: since we can't use all_reduce_op now,
# dgc_op should be the last op of one grad.
self._append_dgc_ops(params_grads)
return params_grads
def create_global_var(shape,
value,
dtype,
......@@ -209,3 +445,4 @@ def create_global_var(shape,
SGD = MPCSGDOptimizer
Adam = MPCAdamOptimizer
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import unittest
import warnings
import numpy as np
import random
import six
import time
import itertools
import collections
from collections import defaultdict
from multiprocessing import Pipe, Process, Manager
import os
import traceback
import unittest
import redis
import paddle_fl.mpc as pfl_mpc
import paddle_fl.mpc.data_utils.aby3 as aby3
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle_fl.mpc.backward import append_backward
from paddle.fluid.op import Operator
from paddle.fluid.executor import Executor
from paddle.fluid.framework import Program, OpProtoHolder, Variable
from testsuite import create_op, set_input, append_input_output, append_loss_ops
from paddle.fluid import unique_name
import traceback
def _set_use_system_allocator(value=None):
USE_SYSTEM_ALLOCATOR_FLAG = "FLAGS_use_system_allocator"
old_value = core.globals()[USE_SYSTEM_ALLOCATOR_FLAG]
value = old_value if value is None else value
core.globals()[USE_SYSTEM_ALLOCATOR_FLAG] = value
return old_value
def randomize_probability(batch_size, class_num, dtype='float32'):
prob = np.random.uniform(
0.1, 1.0, size=(batch_size, class_num)).astype(dtype)
prob_sum = prob.sum(axis=1)
for i in six.moves.xrange(len(prob)):
prob[i] /= prob_sum[i]
return prob
def skip_check_grad_ci(reason=None):
"""Decorator to skip check_grad CI.
Check_grad is required for Op test cases. However, there are some special
cases that do not need to do check_grad. This decorator is used to skip the
check_grad of the above cases.
Note: the execution of unit test will not be skipped. It just avoids check_grad
checking in tearDownClass method by setting a `no_need_check_grad` flag.
Example:
@skip_check_grad_ci(reason="For inference, check_grad is not required.")
class TestInference(OpTest):
"""
if not isinstance(reason, str):
raise AssertionError("The reason for skipping check_grad is required.")
def wrapper(cls):
cls.no_need_check_grad = True
return cls
return wrapper
class Aby3Process(Process):
"""
Extends from Process, evaluate the computation party in aby3.
"""
def __init__(self, *args, **kwargs):
Process.__init__(self, *args, **kwargs)
self._pconn, self._cconn = Pipe()
self._exception = None
def run(self):
"""
Override. Send any exceptions raised in
subprocess to main process.
"""
try:
Process.run(self)
self._cconn.send(None)
except Exception as e:
tb = traceback.format_exc()
self._cconn.send((e, tb))
@property
def exception(self):
"""
Get exception.
"""
if self._pconn.poll():
self._exception = self._pconn.recv()
return self._exception
class OpTest(unittest.TestCase):
def __init__(self, methodName='runTest'):
super(OpTest, self).__init__(methodName)
# set redis server and port
self.server = os.environ['TEST_REDIS_IP']
self.port = os.environ['TEST_REDIS_PORT']
self.party_num = 3
def setUp(self):
"""
Connect redis and delete all keys in all databases on the current host.
:return:
"""
r = redis.Redis(host=self.server, port=int(self.port))
r.flushall()
def multi_party_run(self, **kwargs):
"""
Run 3 parties with target function or other additional arguments.
:param kwargs:
:return:
"""
r = redis.Redis(host=self.server, port=int(self.port))
r.flushall()
target = kwargs['target']
partys = []
for role in range(self.party_num):
kwargs.update({'role': role})
partys.append(Aby3Process(target=target, kwargs=kwargs))
partys[-1].start()
for party in partys:
party.join()
if party.exception:
return party.exception
return (True,)
@classmethod
def setUpClass(cls):
'''Fix random seeds to remove randomness from tests'''
cls._np_rand_state = np.random.get_state()
cls._py_rand_state = random.getstate()
cls.call_once = False
cls.dtype = None
cls.outputs = {}
cls.input_shape_is_large = True
np.random.seed(123)
random.seed(124)
cls._use_system_allocator = _set_use_system_allocator(True)
@classmethod
def tearDownClass(cls):
"""Restore random seeds"""
np.random.set_state(cls._np_rand_state)
random.setstate(cls._py_rand_state)
_set_use_system_allocator(cls._use_system_allocator)
def is_empty_grad_op(op_type):
all_op_kernels = core._get_all_register_op_kernels()
grad_op = op_type + '_grad'
if grad_op in all_op_kernels.keys():
return False
return True
if not hasattr(cls, "op_type"):
raise AssertionError(
"This test do not have op_type in class attrs, "
"please set self.__class__.op_type=the_real_op_type manually.")
if not hasattr(cls, "no_need_check_grad") \
and not is_empty_grad_op(cls.op_type):
if not cls.input_shape_is_large and not hasattr(cls, "exist_check_grad"):
raise AssertionError(
"Input's shape should be large than or equal to 100 for " +
cls.op_type + " Op.")
def try_call_once(self, data_type):
if not self.call_once:
self.call_once = True
self.dtype = data_type
def infer_dtype_from_inputs_outputs(self, inputs, outputs):
def is_np_data(input):
return isinstance(input, (np.ndarray, np.generic))
def infer_dtype(numpy_dict, dtype_set):
assert isinstance(
numpy_dict,
dict), "self.inputs, self.outputs must be numpy_dict"
# the inputs are as follows:
# case 1: inputs = {'X': x}
# case 2: inputs = {'X': (x, x_lod)}
# case 3: inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)]}
# case 4: inputs = {'X': [("x1", (x1, [x1_lod1])), ("x2", (x2, [x2_.lod2]))]}
# TODO(juncaipeng) infer dtype from inputs maybe obtain wrong type.
for _, var_value in six.iteritems(numpy_dict):
if is_np_data(var_value): # case 1
dtype_set.add(var_value.dtype)
elif isinstance(var_value, (list, tuple)): # case 2, 3, 4
for sub_val_value in var_value:
if is_np_data(sub_val_value): # case 2
dtype_set.add(sub_val_value.dtype)
elif len(sub_val_value) > 1 and is_np_data(
sub_val_value[1]): # case 3
dtype_set.add(sub_val_value[1].dtype)
elif len(sub_val_value) > 1 and isinstance(sub_val_value[1], (list, tuple)) \
and is_np_data(sub_val_value[1][0]): # case 4
dtype_set.add(sub_val_value[1][0].dtype)
# infer dtype from inputs, and dtype means the precision of the test
# collect dtype of all inputs
dtype_set = set()
infer_dtype(inputs, dtype_set)
dtype_list = [
np.dtype(np.float64), np.dtype(np.float32), np.dtype(np.float16),
np.dtype(np.int64), np.dtype(np.int32), np.dtype(np.int16),
np.dtype(np.int8), np.dtype(np.uint8), np.dtype(np.bool)
]
# check the dtype in dtype_list in order, select the first dtype that in dtype_set
for dtype in dtype_list:
if dtype in dtype_set:
self.dtype = dtype
break
# save dtype in class attr
self.__class__.dtype = self.dtype
def feed_var(self, input_vars, place):
feed_map = {}
for var_name in input_vars:
if isinstance(input_vars[var_name], list):
for name, np_value in self.inputs[var_name]:
tensor = core.LoDTensor()
if isinstance(np_value, tuple):
tensor.set(np_value[0], place)
tensor.set_recursive_sequence_lengths(np_value[1])
else:
tensor.set(np_value, place)
feed_map[name] = tensor
else:
tensor = core.LoDTensor()
if isinstance(self.inputs[var_name], tuple):
tensor.set(self.inputs[var_name][0], place)
tensor.set_recursive_sequence_lengths(self.inputs[var_name][
1])
else:
tensor.set(self.inputs[var_name], place)
feed_map[var_name] = tensor
return feed_map
def _append_ops(self, block):
self.__class__.op_type = self.op_type # for ci check, please not delete it for now
op_proto = OpProtoHolder.instance().get_op_proto(self.op_type)
"infer datatype from inputs and outputs for this test case"
self.infer_dtype_from_inputs_outputs(self.inputs, self.outputs)
inputs = append_input_output(block, op_proto, self.inputs, True,
self.dtype)
outputs = append_input_output(block, op_proto, self.outputs, False,
self.dtype)
op = block.append_op(
type=self.op_type,
inputs=inputs,
outputs=outputs,
attrs=self.attrs if hasattr(self, "attrs") else dict())
# infer variable type and infer shape in compile-time
op.desc.infer_var_type(block.desc)
op.desc.infer_shape(block.desc)
return op
def _get_io_vars(self, block, numpy_inputs):
inputs = {}
for name, value in six.iteritems(numpy_inputs):
if isinstance(value, list):
var_list = [
block.var(sub_name) for sub_name, sub_value in value
]
inputs[name] = var_list
else:
inputs[name] = block.var(name)
return inputs
def _get_inputs(self, block):
return self._get_io_vars(block, self.inputs)
def _get_outputs(self, block):
return self._get_io_vars(block, self.outputs)
def calc_output(self, place):
outs, _ = self._calc_output(place)
return outs
def _calc_output(self,
place,
parallel=False,
no_check_set=None,
loss=None,
enable_inplace=None,
for_inplace_test=False):
program = Program()
block = program.global_block()
op = self._append_ops(block)
inputs = self._get_inputs(block)
outputs = self._get_outputs(block)
feed_map = self.feed_var(inputs, place)
if for_inplace_test:
# Some variables' tensors hold no buffer (tensor's _holder is NULL), like XShape in reshape2 op,
# and the shapes of those variables contain 0 (eg. Xshape.shape = [0, 2, 5]).
# Set persistable for those variables in order to get them from global_scope for inplace grad test directly other than feed them,
# since feed op calls check_memory_size() which fails when tensor's holder_ is NULL.
for name in op.output_arg_names:
var = block.var(name)
var.persistable = True
original_program = program
#if parallel:
# use_cuda = False
# if isinstance(place, fluid.CUDAPlace):
# use_cuda = True
# compiled_prog = fluid.CompiledProgram(program).with_data_parallel(
# loss_name=loss.name if loss else None, places=place)
# program = compiled_prog
fetch_list = getattr(self, "fetch_list", [])
# if the fetch_list is customized by user, we use it directly.
# if not, fill the fetch_list by the user configured outputs in test.
if len(fetch_list) == 0:
for var_name, var in six.iteritems(outputs):
if no_check_set is not None and var_name in no_check_set:
continue
if isinstance(var, list):
for v in var:
fetch_list.append(v.name)
else:
fetch_list.append(var.name)
# if the fetch_list still empty, fill the fetch_list by the operator output.
if len(fetch_list) == 0:
for out_name, out_dup in Operator.get_op_outputs(self.op_type):
fetch_list.append(str(out_name))
if enable_inplace is not None:
build_strategy = fluid.BuildStrategy()
build_strategy.enable_inplace = enable_inplace
compiled_prog = fluid.CompiledProgram(program).with_data_parallel(
build_strategy=build_strategy, places=place)
program = compiled_prog
return_results = [Manager().list() for _ in range(len(fetch_list))]
def closure(**kwargs):
role = kwargs['role']
pfl_mpc.init("aby3", role, "localhost", self.server, int(self.port))
#init_op = fluid.default_main_program().global_block().ops[0]
#_insert_init_op(program, init_op)
executor = Executor(place)
executor.run()
outs = executor.run(program,
feed=feed_map,
fetch_list=fetch_list)
for idx in range(len(fetch_list)):
return_results[idx].append(outs[idx])
ret = self.multi_party_run(target=closure)
self.assertEqual(ret[0], True)
outs = []
for idx in range(len(fetch_list)):
outs.append(aby3.reconstruct(np.array(return_results[idx])))
self.op = op
self.program = original_program
if for_inplace_test:
return outs, fetch_list, feed_map, original_program, op.desc
else:
return outs, fetch_list
def _get_need_run_ops(self, op_desc, fwd_op_desc=None):
"""Postorder traversal of the 'grad' tree to get all ops that need to run during inplace test.
An op needs to run druing inplace check if,
(1) it has infer_inplace,
(2) it has infer_inplace in its grad descendants. (since we need its outputs as to construct its grad's inputs)
Args:
op_desc (OpDesc): The op_desc of current op.
fwd_op_desc (OpDesc): The op_desc of current op's forward op, None if current op has no forward op.
Eg. relu's fwd_op is None, relu_grad's fwd_op is relu, relu_grad_grad's fwd_op is relu_grad, etc.
Returns:
need_run_ops (list[(op_desc, fwd_op_desc)]): The ops that need to run during inplace test.
"""
need_run_ops = []
visited_ops = []
def _dfs_grad_op(op_desc, fwd_op_desc=None):
visited_ops.append(op_desc.type())
has_infer_inplace = fluid.core.has_infer_inplace(op_desc.type())
has_grad_op_maker = fluid.core.has_grad_op_maker(op_desc.type())
has_infer_inplace_in_grad_descendants = False
if not has_grad_op_maker:
has_infer_inplace_in_descendants = False
else:
# get grad_op_desc
grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(
op_desc, set(), [])
if not grad_op_desc_list:
has_infer_inplace_in_grad_descendants = False
else:
for i, grad_op_desc in enumerate(grad_op_desc_list):
if grad_op_desc.type(
) not in visited_ops and _dfs_grad_op(
grad_op_desc, fwd_op_desc=op_desc):
has_infer_inplace_in_grad_descendants = True
if has_infer_inplace or has_infer_inplace_in_grad_descendants:
need_run_ops.append((op_desc, fwd_op_desc))
return True
else:
return False
_dfs_grad_op(op_desc, fwd_op_desc=fwd_op_desc)
return need_run_ops
def check_inplace_output_with_place(self,
place,
no_check_set=None,
inplace_atol=None):
"""Chech the inplace correctness of given op, its grad op, its grad_grad op, etc.
(1) Get all ops need to run. (see conditions in _get_need_run_ops())
(2) Run op in need_run_ops, and do inplace check if it has infer_inplace.
Args:
place (CPUPlace | CUDAPlace): The place where the op runs.
no_check_set (list): The names of outputs that needn't check, like XShape of reshape op.
inplace_atol (float): The tolerable error, only set when op doesn't ensure computational consistency, like group_norm op.
Returns:
None
"""
has_infer_inplace = fluid.core.has_infer_inplace(self.op_type)
has_grad_op_maker = fluid.core.has_grad_op_maker(self.op_type)
fwd_res = self._calc_output(
place, no_check_set=no_check_set, for_inplace_test=True)
op_desc = fwd_res[4]
need_run_ops = self._get_need_run_ops(op_desc)
res = {}
for op_desc, father_op_desc in reversed(need_run_ops):
# The first one is the forward op
has_infer_inplace = fluid.core.has_infer_inplace(op_desc.type())
if op_desc.type() == self.op_type:
if has_infer_inplace:
res[op_desc] = self._check_forward_inplace(
place,
no_check_set=no_check_set,
inplace_atol=inplace_atol)
else:
res[op_desc] = self._calc_output(
place, no_check_set=no_check_set, for_inplace_test=True)
else:
if has_infer_inplace:
fwd_res = res[father_op_desc]
res[op_desc] = self._check_grad_inplace(
place, fwd_res, op_desc, inplace_atol=inplace_atol)
else:
res[op_desc] = self._calc_grad_output(place, fwd_res,
op_desc)
def check_output_with_place(self,
place,
atol=0,
no_check_set=None,
equal_nan=False,
check_dygraph=True,
inplace_atol=None):
self.infer_dtype_from_inputs_outputs(self.inputs, self.outputs)
outs, fetch_list = self._calc_output(place, no_check_set=no_check_set)
for out_name, out_dup in Operator.get_op_outputs(self.op_type):
if out_name not in self.outputs:
continue
if no_check_set is not None and out_name in no_check_set:
continue
def find_imperative_actual(target_name, dygraph_outs, place):
with fluid.dygraph.base.guard(place=place):
for name in dygraph_outs:
if name == target_name:
return dygraph_outs[name][0]
var_list = dygraph_outs[name]
for i, var in enumerate(var_list):
if var.name == target_name:
return dygraph_outs[name][i]
self.assertTrue(False, "Found failed {} {}".format(
dygraph_outs.keys(), target_name))
def find_actual(target_name, fetch_list):
found = [
i for i, var_name in enumerate(fetch_list)
if var_name == target_name
]
self.assertTrue(
len(found) == 1, "Found {} {}".format(
len(found), target_name))
return found[0]
if out_dup:
sub_out = self.outputs[out_name]
if not isinstance(sub_out, list):
raise AssertionError("sub_out type %s is not list",
type(sub_out))
for item in sub_out:
sub_out_name, expect = item[0], item[1]
idx = find_actual(sub_out_name, fetch_list)
actual = outs[idx]
actual_t = np.array(actual)
expect_t = expect[0] \
if isinstance(expect, tuple) else expect
self.assertTrue(
np.allclose(
actual_t, expect_t, atol=atol, equal_nan=equal_nan),
"Output (" + sub_out_name + ") has diff at " +
str(place))
if isinstance(expect, tuple):
self.assertListEqual(
actual.recursive_sequence_lengths(), expect[1],
"Output (" + sub_out_name +
") has different lod at " + str(place))
else:
idx = find_actual(out_name, fetch_list)
actual = outs[idx]
actual_t = np.array(actual)
expect = self.outputs[out_name]
expect_t = expect[0] if isinstance(expect, tuple) else expect
self.assertTrue(
np.allclose(
actual_t, expect_t, atol=atol, equal_nan=equal_nan),
"Output (" + out_name + ") has diff at " + str(place) +
"\nExpect " + str(expect_t) + "\n" + "But Got" +
str(actual_t) + " in class " + self.__class__.__name__)
if isinstance(expect, tuple):
self.assertListEqual(actual.recursive_sequence_lengths(),
expect[1], "Output (" + out_name +
") has different lod at " + str(place))
# Note(zhiqiu): inplace_atol should be only set when op doesn't ensure
# computational consistency.
# For example, group_norm uses AtomicAdd on CUDAPlace, which do not ensure
# computation order when multiple threads write the same address. So the
# result of group_norm is non-deterministic when datatype is float.
# When inplace_atol is not None, the inplace check uses numpy.allclose
# to check inplace result instead of numpy.array_equal.
if inplace_atol is not None:
warnings.warn(
"inplace_atol should only be set when op doesn't ensure computational consistency, please check it!"
)
# Check inplace for given op, its grad op, its grad_grad op, etc.
# No effect on original OpTest
self.check_inplace_output_with_place(
place, no_check_set=no_check_set, inplace_atol=inplace_atol)
return outs, fetch_list
def _assert_is_close(self, numeric_grads, analytic_grads, names,
max_relative_error, msg_prefix):
for a, b, name in six.moves.zip(numeric_grads, analytic_grads, names):
# It asserts np.abs(a - b) / np.abs(a) < max_relative_error, in which
# max_relative_error is 1e-7. According to the value of np.abs(a), we
# change np.abs(a) to achieve dynamic threshold. For example, if
# the value of np.abs(a) is between 1e-10 and 1e-8, we set np.abs(a)*=1e4.
# Therefore, it asserts np.abs(a - b) / (np.abs(a)*1e4) < max_relative_error,
# which is the same as np.abs(a - b) / np.abs(a) < max_relative_error*1e4.
abs_a = np.abs(a)
abs_a[abs_a < 1e-3] = 1
diff_mat = np.abs(a - b) / abs_a
max_diff = np.max(diff_mat)
def err_msg():
offset = np.argmax(diff_mat > max_relative_error)
return ("%s error, %s variable %s max gradient diff %f over limit %f, "
"the first error element is %d, expected %f, but got %f.") \
% (self.op_type, msg_prefix, name, max_diff, max_relative_error,
offset, a.flatten()[offset], b.flatten()[offset])
self.assertLessEqual(max_diff, max_relative_error, err_msg())
def _check_grad_helper(self):
self.infer_dtype_from_inputs_outputs(self.inputs, self.outputs)
self.__class__.op_type = self.op_type
self.__class__.exist_check_grad = True
def check_grad_with_place(self,
place,
inputs_to_check,
output_names,
no_grad_set=None,
numeric_grad_delta=0.005,
in_place=False,
max_relative_error=0.005,
user_defined_grads=None,
check_dygraph=True):
self.scope = core.Scope()
op_inputs = self.inputs if hasattr(self, "inputs") else dict()
op_outputs = self.outputs if hasattr(self, "outputs") else dict()
op_attrs = self.attrs if hasattr(self, "attrs") else dict()
self._check_grad_helper()
cache_list = None
if hasattr(self, "cache_name_list"):
cache_list = self.cache_name_list
self.op = create_op(
self.scope,
self.op_type,
op_inputs,
op_outputs,
op_attrs,
cache_list=cache_list)
if no_grad_set is None:
no_grad_set = set()
for input_to_check in inputs_to_check:
set_input(self.scope, self.op, self.inputs, place)
tensor_to_check = self.scope.find_var(input_to_check).get_tensor()
tensor_size = six.moves.reduce(lambda a, b: a * b,
tensor_to_check.shape(), 1)
if tensor_size < 100:
self.__class__.input_shape_is_large = False
if not type(output_names) is list:
output_names = [output_names]
numeric_grads = user_defined_grads or [
self.get_numeric_gradient(
place,
self.scope,
self.op,
self.inputs,
input_to_check,
output_names,
delta=numeric_grad_delta,
in_place=in_place) for input_to_check in inputs_to_check
]
analytic_grads = self._get_gradient(inputs_to_check, place,
output_names, no_grad_set)
self._assert_is_close(numeric_grads, analytic_grads, inputs_to_check,
max_relative_error,
"Gradient Check On %s" % str(place))
@staticmethod
def _numpy_to_lod_tensor(np_value, lod, place):
tensor = core.LoDTensor()
tensor.set(np_value, place)
if lod is not None:
tensor.set_recursive_sequence_lengths(lod)
return tensor
@staticmethod
def np_dtype_to_fluid_dtype(input):
return input
@staticmethod
def fluid_dtype_to_np_dtype(self, dtype):
return dtype
@staticmethod
def np_value_to_fluid_value(input):
return input
def _get_gradient(self,
input_to_check,
place,
output_names,
no_grad_set,
parallel=False):
prog = Program()
block = prog.global_block()
self._append_ops(block)
loss = append_loss_ops(block, output_names)
param_grad_list = append_backward(
loss=loss, parameter_list=input_to_check, no_grad_set=no_grad_set)
inputs = self._get_inputs(block)
feed_dict = self.feed_var(inputs, place)
fetch_list = [g for p, g in param_grad_list]
return_results = [Manager().list() for _ in range(len(fetch_list))]
def closure(**kwargs):
role = kwargs['role']
pfl_mpc.init("aby3", role, "localhost", self.server, int(self.port))
#init_op = fluid.default_main_program().global_block().ops[0]
#_insert_init_op(program, init_op)
executor = Executor(place)
executor.run()
outs = executor.run(prog,
feed=feed_dict,
fetch_list=fetch_list)
for idx in range(len(fetch_list)):
return_results[idx].append(outs[idx])
ret = self.multi_party_run(target=closure)
self.assertEqual(ret[0], True)
outs = []
for idx in range(len(fetch_list)):
outs.append(aby3.reconstruct(np.array(return_results[idx])))
return outs
def get_numeric_gradient(self,
place,
scope,
op,
inputs,
input_to_check,
output_names,
delta=0.005,
in_place=False):
# FIXME: change this method by compile time concepts
set_input(scope, op, inputs, place)
def product(dim):
return six.moves.reduce(lambda a, b: a * b, dim, 1)
reveal = lambda x: (2**-16 * np.array(x))[0].astype('float32')
tensor_to_check = scope.find_var(input_to_check).get_tensor()
tensor_to_check = reveal(tensor_to_check)
tensor_to_check_ = fluid.LoDTensor()
tensor_to_check_.set(tensor_to_check, fluid.CPUPlace())
tensor_to_check = tensor_to_check_
tensor_size = product(tensor_to_check.shape())
tensor_to_check_dtype = tensor_to_check._dtype()
if tensor_to_check_dtype == core.VarDesc.VarType.FP32:
tensor_to_check_dtype = np.float32
elif tensor_to_check_dtype == core.VarDesc.VarType.FP64:
tensor_to_check_dtype = np.float64
elif tensor_to_check_dtype == core.VarDesc.VarType.FP16:
tensor_to_check_dtype = np.float16
# set delta as np.float16, will automatic convert to float32, float64
delta = np.array(delta).astype(np.float16)
else:
raise ValueError("Not supported data type " + str(
tensor_to_check_dtype))
def get_output():
sum = []
return_results = dict()
for name in (output_names):
return_results[name] = Manager().list()
def closure(**kwargs):
role = kwargs['role']
pfl_mpc.init("aby3", role, "localhost", self.server, int(self.port))
executor = Executor(place)
executor.run()
op.run(scope, place)
for name in output_names:
out = np.array(scope.find_var(name).get_tensor())
return_results[name].append(out[0])
ret = self.multi_party_run(target=closure)
self.assertEqual(ret[0], True)
for output_name in output_names:
plain = aby3.reconstruct(np.array(return_results[output_name]))
sum.append(plain.mean())
return tensor_to_check_dtype(np.array(sum).sum() / len(output_names))
gradient_flat = np.zeros(shape=(tensor_size, ), dtype=tensor_to_check_dtype)
def __get_elem__(tensor, i):
if tensor_to_check_dtype == np.float16:
numpy_tensor = np.array(tensor).astype(np.float16)
numpy_tensor = numpy_tensor.flatten()
return numpy_tensor[i]
elif tensor_to_check_dtype == np.float32:
return tensor._get_float_element(i)
else:
return tensor._get_double_element(i)
def __set_elem__(tensor, i, e):
if tensor_to_check_dtype == np.float16:
numpy_tensor = np.array(tensor).astype(np.float16)
shape = numpy_tensor.shape
numpy_tensor = numpy_tensor.flatten()
numpy_tensor[i] = e
numpy_tensor = numpy_tensor.reshape(shape)
tensor.set(numpy_tensor, place)
elif tensor_to_check_dtype == np.float32:
tensor._set_float_element(i, e)
else:
tensor._set_double_element(i, e)
# we only compute gradient of one element each time.
# we use a for loop to compute the gradient of every element.
for i in six.moves.xrange(tensor_size):
if in_place:
set_input(scope, op, inputs, place)
# get one input element throw it's index i.
origin = __get_elem__(tensor_to_check, i)
# add delta to it, run op and then get the sum of the result tensor.
x_pos = origin + delta
__set_elem__(tensor_to_check, i, x_pos)
y_pos = get_output()
if in_place:
set_input(scope, op, inputs, place)
x_neg = origin - delta
__set_elem__(tensor_to_check, i, x_neg)
y_neg = get_output()
__set_elem__(tensor_to_check, i, origin)
gradient_flat[i] = (y_pos - y_neg) / delta / 2
return gradient_flat.reshape(tensor_to_check.shape())
#!/bin/bash
# set redis server ip and port for test
export TEST_REDIS_IP="test_redis_server_ip"
export TEST_REDIS_PORT="test_redis_port"
export TEST_REDIS_IP=${LOCALHOST}
export TEST_REDIS_PORT=${REDIS_PORT}
# unittest command
PYTHON_TEST="python -m unittest"
# add the modules to test
TEST_MODULES=("test_datautils_aby3"
"test_model_encryption"
"test_datautils_align"
"test_op_add"
"test_op_sub"
......@@ -21,6 +20,11 @@ TEST_MODULES=("test_datautils_aby3"
"test_op_fc"
"test_op_relu"
"test_op_compare"
"test_input_embedding"
"test_op_softmax_with_cross_entropy"
"test_op_batch_norm"
"test_op_conv"
"test_op_pool"
)
# run unittest
......
......@@ -53,10 +53,10 @@ class TestDataUtilsAlign(unittest.TestCase):
party_1 = Process(target=self.run_align, args=(set_1, 1, endpoints, False))
party_2 = Process(target=self.run_align, args=(set_2, 2, endpoints, False))
party_0.start()
party_1.start()
party_2.start()
party_2.join()
party_0.start()
party_0.join()
if __name__ == '__main__':
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module test embedding op.
"""
import unittest
from multiprocessing import Manager
import numpy as np
import paddle.fluid as fluid
import paddle_fl.mpc as pfl_mpc
import paddle_fl.mpc.data_utils.aby3 as aby3
import test_op_base
class TestInput(test_op_base.TestOpBase):
def gen_one_hot(self, input, depth):
"""
example for generate mpc one hot tensor
"""
data_var = fluid.data(name='input_data', shape=input.shape, dtype='int64')
ret1 = fluid.input.one_hot(input=data_var, depth=3)
exe =fluid.Executor(place=fluid.CPUPlace())
exe.run(fluid.default_startup_program())
data = exe.run(program=fluid.default_main_program(),feed={'input_data': input}, fetch_list=[ret1])
return data[0]
def embedding_op(self, **kwargs):
role = kwargs['role']
#data = kwargs['data']
data_normal = kwargs['data_normal']
data_share = kwargs['data_share'][role]
w_data = kwargs['w_data']
w_data_share = kwargs['w_data_share'][role]
return_results = kwargs['return_results']
expected_result = kwargs['expect_results']
pfl_mpc.init("aby3", role, "localhost", self.server, int(self.port))
w_param_attrs = fluid.ParamAttr(name='emb_weight',
learning_rate=0.5,
initializer=pfl_mpc.initializer.NumpyArrayInitializer(w_data_share),
trainable=True)
w_param_attrs1 = fluid.ParamAttr(name='emb_weight1',
learning_rate=0.5,
initializer=fluid.initializer.NumpyArrayInitializer(w_data),
trainable=True)
input_shape = np.delete(data_share.shape, 0, 0)
data1 = pfl_mpc.data(name='input', shape=input_shape, dtype='int64')
data2 = fluid.data(name='input1', shape=data_normal.shape, dtype='int64')
math_embedding = fluid.input.embedding(input=data2, size=w_data.shape, param_attr=w_param_attrs1, dtype='float32')
op_embedding = pfl_mpc.input.embedding(input=data1, size=(input_shape[1],input_shape[0]), param_attr=w_param_attrs, dtype='int64')
exe = fluid.Executor(place=fluid.CPUPlace())
exe.run(fluid.default_startup_program())
results = exe.run(feed={'input': data_share, 'input1': data_normal}, fetch_list=[op_embedding, math_embedding])
return_results.append(results[0])
expected_result.append(results[1])
def test_embedding_op(self):
data = np.array([[1, 0, 0], [0, 1, 0]])
data_normal = np.array([0, 1]).astype('int64')
w_data = np.array([[1, 2], [2, 3], [3, 4]])
# data = self.gen_one_hot(data_normal, w_data.shape[0]).astype('int64')
data_share = aby3.make_shares(np.array(data))
data_all3shares = np.array([aby3.get_aby3_shares(data_share, i) for i in range(3)])
w_data_share = aby3.make_shares(w_data)
w_data_all3shares = np.array([aby3.get_aby3_shares(w_data_share, i) for i in range(3)])
return_results = Manager().list()
expect_results = Manager().list()
ret = self.multi_party_run(target=self.embedding_op,
data=data,
data_normal=data_normal,
w_data=w_data,
data_share=data_all3shares,
w_data_share=w_data_all3shares,
return_results=return_results,
expect_results=expect_results)
self.assertEqual(ret[0], True)
revealed = aby3.reconstruct(np.array(return_results))
# print("reveal: ", revealed)
self.assertTrue(np.allclose(revealed, expect_results[0], atol=1e-4))
def test_mpc_one_hot(self):
data = np.array([0, 1]).astype('int64')
ret = self.gen_one_hot(data, 3)
mpc_one_hot = aby3.make_shares(ret)
if __name__ == '__main__':
unittest.main()
......@@ -91,6 +91,28 @@ class TestOpAdd(test_op_base.TestOpBase):
self.assertEqual(results[0].shape, (2, 3, 4))
return_results.append(results[0])
def diff_dim_add_mid(self, **kwargs):
"""
Add with different dimensions.
:param kwargs:
:return:
"""
role = kwargs['role']
d_1 = kwargs['data_1'][role]
d_2 = kwargs['data_2'][role]
return_results = kwargs['return_results']
pfl_mpc.init("aby3", role, "localhost", self.server, int(self.port))
x = pfl_mpc.data(name='x', shape=[3, 4, 2], dtype='int64')
y = pfl_mpc.data(name='y', shape=[4], dtype='int64')
# math_add = x + y
math_add = pfl_mpc.layers.elementwise_add(x, y, axis=1)
exe = fluid.Executor(place=fluid.CPUPlace())
results = exe.run(feed={'x': d_1, 'y': d_2}, fetch_list=[math_add])
self.assertEqual(results[0].shape, (2, 3, 4, 2))
return_results.append(results[0])
def test_elementwise_add(self):
data_1 = [np.array([[0, 1, 2, 3],
[0, 1, 2, 3]]).astype('int64')] * self.party_num
......@@ -117,6 +139,7 @@ class TestOpAdd(test_op_base.TestOpBase):
expect_results=expect_results)
self.assertEqual(ret[0], True)
def test_diff_dim_add(self):
data_1 = np.full((3, 4), fill_value=2)
data_2 = np.ones((4,))
......@@ -135,6 +158,29 @@ class TestOpAdd(test_op_base.TestOpBase):
expected_out = np.array([[3, 3, 3, 3], [3, 3, 3, 3], [3, 3, 3, 3]])
self.assertTrue(np.allclose(revealed, expected_out, atol=1e-4))
def test_diff_dim_add_mid(self):
data_1 = np.full((3, 4, 2), fill_value=2)
data_2 = np.ones((4,))
# print(data_1)
# print(data_2)
data_1_shares = aby3.make_shares(data_1)
data_2_shares = aby3.make_shares(data_2)
data_1_all3shares = np.array([aby3.get_aby3_shares(data_1_shares, i) for i in range(3)])
data_2_all3shares = np.array([aby3.get_aby3_shares(data_2_shares, i) for i in range(3)])
return_results = Manager().list()
ret = self.multi_party_run(target=self.diff_dim_add_mid,
data_1=data_1_all3shares,
data_2=data_2_all3shares,
return_results=return_results)
self.assertEqual(ret[0], True)
revealed = aby3.reconstruct(np.array(return_results))
# print(revealed)
expected_out = np.array([[[3, 3], [3, 3], [3, 3], [3, 3]],
[[3, 3], [3, 3], [3, 3], [3, 3]],
[[3, 3], [3, 3], [3, 3], [3, 3]]])
self.assertTrue(np.allclose(revealed, expected_out, atol=1e-4))
def test_elementwise_add_dim_error(self):
data_1 = [np.array([0, 1, 2, 3]).astype('int64')] * self.party_num
data_2 = [np.array([4, 3, 2, 1]).astype('int64')] * self.party_num
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module test add op.
"""
import unittest
from multiprocessing import Manager
import numpy as np
import paddle.fluid as fluid
import paddle_fl.mpc as pfl_mpc
import paddle_fl.mpc.data_utils.aby3 as aby3
import test_op_base
class TestOpBatchNorm(test_op_base.TestOpBase):
def batch_norm(self, **kwargs):
"""
Add two variables with one dimension.
:param kwargs:
:return:
"""
role = kwargs['role']
d_1 = kwargs['data_1'][role]
return_results = kwargs['return_results']
pfl_mpc.init("aby3", role, "localhost", self.server, int(self.port))
x = pfl_mpc.data(name='x', shape=[2, 3], dtype='int64')
param_attr = fluid.ParamAttr(name='batch_norm_w', initializer=fluid.initializer.ConstantInitializer(value=21845))
bias_attr = fluid.ParamAttr(name='batch_norm_b', initializer=fluid.initializer.ConstantInitializer(value=0))
bn_out = pfl_mpc.layers.batch_norm(input=x, param_attr = param_attr, bias_attr = bias_attr)
exe = fluid.Executor(place=fluid.CPUPlace())
exe.run(fluid.default_startup_program())
results = exe.run(feed={'x': d_1}, fetch_list=[bn_out])
self.assertEqual(results[0].shape, (2, 2, 3))
return_results.append(results[0])
def test_batch_norm(self):
data_1 = np.array(
[[10, 10, 10], [50, 50, 50]]).astype('float32')
expected_out = np.array(
[[-1, -1, -1], [1, 1, 1]]).astype('float32')
# print("input data_1: {} \n".format(data_1))
data_1_shares = aby3.make_shares(data_1)
data_1_all3shares = np.array([aby3.get_aby3_shares(data_1_shares, i) for i in range(3)])
return_results = Manager().list()
ret = self.multi_party_run(target=self.batch_norm,
data_1=data_1_all3shares,
return_results=return_results)
self.assertEqual(ret[0], True)
revealed = aby3.reconstruct(np.array(return_results))
# print("revealed: {} \n".format(revealed))
# print("expected: {} \n".format(expected_out))
self.assertTrue(np.allclose(revealed, expected_out, atol=1e-2))
if __name__ == '__main__':
unittest.main()
......@@ -39,7 +39,7 @@ class TestOpCompare(test_op_base.TestOpBase):
pfl_mpc.init("aby3", role, "localhost", self.server, int(self.port))
x = pfl_mpc.data(name='x', shape=[3], dtype='int64')
y = fluid.data(name='y', shape=[1, 3], dtype='float32')
y = fluid.data(name='y', shape=[3], dtype='float32')
# todo: reshape y to [3]
op_gt = pfl_mpc.layers.greater_than(x=x, y=y)
math_gt = x > y
......@@ -47,7 +47,7 @@ class TestOpCompare(test_op_base.TestOpBase):
results = exe.run(feed={'x': d_1, 'y': d_2}, fetch_list=[op_gt, math_gt])
self.assertTrue(np.allclose(results[0], results[1]))
self.assertEqual(results[0].shape, (1, 3))
self.assertEqual(results[0].shape, (3, ))
self.assertTrue(np.allclose(results[0], expected_out))
def ge(self, **kwargs):
......@@ -63,14 +63,14 @@ class TestOpCompare(test_op_base.TestOpBase):
pfl_mpc.init("aby3", role, "localhost", self.server, int(self.port))
x = pfl_mpc.data(name='x', shape=[3], dtype='int64')
y = fluid.data(name='y', shape=[1, 3], dtype='float32')
y = fluid.data(name='y', shape=[3], dtype='float32')
op_ge = pfl_mpc.layers.greater_equal(x=x, y=y)
math_ge = x >= y
exe = fluid.Executor(place=fluid.CPUPlace())
results = exe.run(feed={'x': d_1, 'y': d_2}, fetch_list=[op_ge, math_ge])
self.assertTrue(np.allclose(results[0], results[1]))
self.assertEqual(results[0].shape, (1, 3))
self.assertEqual(results[0].shape, (3, ))
self.assertTrue(np.allclose(results[0], expected_out))
def lt(self, **kwargs):
......@@ -86,14 +86,14 @@ class TestOpCompare(test_op_base.TestOpBase):
pfl_mpc.init("aby3", role, "localhost", self.server, int(self.port))
x = pfl_mpc.data(name='x', shape=[3], dtype='int64')
y = fluid.data(name='y', shape=[1, 3], dtype='float32')
y = fluid.data(name='y', shape=[3], dtype='float32')
op_lt = pfl_mpc.layers.less_than(x=x, y=y)
math_lt = x < y
exe = fluid.Executor(place=fluid.CPUPlace())
results = exe.run(feed={'x': d_1, 'y': d_2}, fetch_list=[op_lt, math_lt])
self.assertTrue(np.allclose(results[0], results[1]))
self.assertEqual(results[0].shape, (1, 3))
self.assertEqual(results[0].shape, (3, ))
self.assertTrue(np.allclose(results[0], expected_out))
def le(self, **kwargs):
......@@ -109,14 +109,14 @@ class TestOpCompare(test_op_base.TestOpBase):
pfl_mpc.init("aby3", role, "localhost", self.server, int(self.port))
x = pfl_mpc.data(name='x', shape=[3], dtype='int64')
y = fluid.data(name='y', shape=[1, 3], dtype='float32')
y = fluid.data(name='y', shape=[3], dtype='float32')
op_le = pfl_mpc.layers.less_equal(x=x, y=y)
math_le = x <= y
exe = fluid.Executor(place=fluid.CPUPlace())
results = exe.run(feed={'x': d_1, 'y': d_2}, fetch_list=[op_le, math_le])
self.assertTrue(np.allclose(results[0], results[1]))
self.assertEqual(results[0].shape, (1, 3))
self.assertEqual(results[0].shape, (3, ))
self.assertTrue(np.allclose(results[0], expected_out))
def equal(self, **kwargs):
......@@ -132,14 +132,14 @@ class TestOpCompare(test_op_base.TestOpBase):
pfl_mpc.init("aby3", role, "localhost", self.server, int(self.port))
x = pfl_mpc.data(name='x', shape=[3], dtype='int64')
y = fluid.data(name='y', shape=[1, 3], dtype='float32')
y = fluid.data(name='y', shape=[3], dtype='float32')
op_eq = pfl_mpc.layers.equal(x=x, y=y)
math_eq = x == y
exe = fluid.Executor(place=fluid.CPUPlace())
results = exe.run(feed={'x': d_1, 'y': d_2}, fetch_list=[op_eq, math_eq])
self.assertTrue(np.allclose(results[0], results[1]))
self.assertEqual(results[0].shape, (1, 3))
self.assertEqual(results[0].shape, (3, ))
self.assertTrue(np.allclose(results[0], expected_out))
def not_equal(self, **kwargs):
......@@ -155,21 +155,21 @@ class TestOpCompare(test_op_base.TestOpBase):
pfl_mpc.init("aby3", role, "localhost", self.server, int(self.port))
x = pfl_mpc.data(name='x', shape=[3], dtype='int64')
y = fluid.data(name='y', shape=[1, 3], dtype='float32')
y = fluid.data(name='y', shape=[3], dtype='float32')
op_ne = pfl_mpc.layers.not_equal(x=x, y=y)
math_ne = x != y
exe = fluid.Executor(place=fluid.CPUPlace())
results = exe.run(feed={'x': d_1, 'y': d_2}, fetch_list=[op_ne, math_ne])
self.assertTrue(np.allclose(results[0], results[1]))
self.assertEqual(results[0].shape, (1, 3))
self.assertEqual(results[0].shape, (3, ))
self.assertTrue(np.allclose(results[0], expected_out))
def test_gt(self):
data_1 = [np.array([[65536, 65536, 65536],
[65536, 65536, 65536]]).astype('int64')] * self.party_num
data_2 = [np.array([[5, 3, 2]]).astype('float32')] * self.party_num
expect_results = [np.array([[0, 0, 1]])] * self.party_num
data_2 = [np.array([5, 3, 2]).astype('float32')] * self.party_num
expect_results = [np.array([0, 0, 1])] * self.party_num
ret = self.multi_party_run(target=self.gt,
data_1=data_1,
data_2=data_2,
......@@ -179,8 +179,8 @@ class TestOpCompare(test_op_base.TestOpBase):
def test_ge(self):
data_1 = [np.array([[65536, 65536, 65536],
[65536, 65536, 65536]]).astype('int64')] * self.party_num
data_2 = [np.array([[5, 3, 2]]).astype('float32')] * self.party_num
expect_results = [np.array([[0, 1, 1]])] * self.party_num
data_2 = [np.array([5, 3, 2]).astype('float32')] * self.party_num
expect_results = [np.array([0, 1, 1])] * self.party_num
ret = self.multi_party_run(target=self.ge,
data_1=data_1,
data_2=data_2,
......@@ -190,8 +190,8 @@ class TestOpCompare(test_op_base.TestOpBase):
def test_lt(self):
data_1 = [np.array([[65536, 65536, 65536],
[65536, 65536, 65536]]).astype('int64')] * self.party_num
data_2 = [np.array([[5, 3, 2]]).astype('float32')] * self.party_num
expect_results = [np.array([[1, 0, 0]])] * self.party_num
data_2 = [np.array([5, 3, 2]).astype('float32')] * self.party_num
expect_results = [np.array([1, 0, 0])] * self.party_num
ret = self.multi_party_run(target=self.lt,
data_1=data_1,
data_2=data_2,
......@@ -201,8 +201,8 @@ class TestOpCompare(test_op_base.TestOpBase):
def test_le(self):
data_1 = [np.array([[65536, 65536, 65536],
[65536, 65536, 65536]]).astype('int64')] * self.party_num
data_2 = [np.array([[5, 3, 2]]).astype('float32')] * self.party_num
expect_results = [np.array([[1, 1, 0]])] * self.party_num
data_2 = [np.array([5, 3, 2]).astype('float32')] * self.party_num
expect_results = [np.array([1, 1, 0])] * self.party_num
ret = self.multi_party_run(target=self.le,
data_1=data_1,
data_2=data_2,
......@@ -212,8 +212,8 @@ class TestOpCompare(test_op_base.TestOpBase):
def test_equal(self):
data_1 = [np.array([[65536, 65536, 65536],
[65536, 65536, 65536]]).astype('int64')] * self.party_num
data_2 = [np.array([[5, 3, 2]]).astype('float32')] * self.party_num
expect_results = [np.array([[0, 1, 0]])] * self.party_num
data_2 = [np.array([5, 3, 2]).astype('float32')] * self.party_num
expect_results = [np.array([0, 1, 0])] * self.party_num
ret = self.multi_party_run(target=self.equal,
data_1=data_1,
data_2=data_2,
......@@ -223,8 +223,8 @@ class TestOpCompare(test_op_base.TestOpBase):
def test_not_equal(self):
data_1 = [np.array([[65536, 65536, 65536],
[65536, 65536, 65536]]).astype('int64')] * self.party_num
data_2 = [np.array([[5, 3, 2]]).astype('float32')] * self.party_num
expect_results = [np.array([[1, 0, 1]])] * self.party_num
data_2 = [np.array([5, 3, 2]).astype('float32')] * self.party_num
expect_results = [np.array([1, 0, 1])] * self.party_num
ret = self.multi_party_run(target=self.not_equal,
data_1=data_1,
data_2=data_2,
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module test conv op.
"""
import unittest
from multiprocessing import Manager
import numpy as np
import test_op_base
from op_test import OpTest
import paddle_fl.mpc.data_utils.aby3 as aby3
import paddle.fluid as fluid
import paddle.fluid.core as core
def conv2d_forward_naive(input,
filter,
group,
conv_param,
padding_algorithm='EXPLICIT',
data_format='NCHW'):
if padding_algorithm not in ["SAME", "VALID", "EXPLICIT"]:
raise ValueError("Unknown Attr(padding_algorithm): '%s'. "
"It can only be 'SAME' or 'VALID'." %
str(padding_algorithm))
if data_format not in ["NCHW", "NHWC"]:
raise ValueError("Unknown Attr(data_format): '%s' ."
"It can only be 'NCHW' or 'NHWC'." % str(data_format))
channel_last = (data_format == "NHWC")
if channel_last:
input = np.transpose(input, [0, 3, 1, 2])
in_n, in_c, in_h, in_w = input.shape
f_n, f_c, f_h, f_w = filter.shape
out_n = in_n
out_c = f_n
assert f_c * group == in_c
assert np.mod(out_c, group) == 0
sub_out_c = out_c // group
sub_f_n = f_n // group
stride, pad, dilation = conv_param['stride'], conv_param['pad'], conv_param[
'dilation']
# update pad and dilation
def _get_padding_with_SAME(input_shape, pool_size, pool_stride):
padding = []
for input_size, filter_size, stride_size in zip(input_shape, pool_size,
pool_stride):
out_size = int((input_size + stride_size - 1) / stride_size)
pad_sum = np.max((
(out_size - 1) * stride_size + filter_size - input_size, 0))
pad_0 = int(pad_sum / 2)
pad_1 = int(pad_sum - pad_0)
padding.append(pad_0)
padding.append(pad_1)
return padding
ksize = filter.shape[2:4]
if padding_algorithm == "VALID":
pad = [0, 0, 0, 0]
elif padding_algorithm == "SAME":
dilation = [1, 1]
input_data_shape = input.shape[2:4]
pad = _get_padding_with_SAME(input_data_shape, ksize, stride)
pad_h_0, pad_h_1 = pad[0], pad[0]
pad_w_0, pad_w_1 = pad[1], pad[1]
if len(pad) == 4:
pad_h_0, pad_h_1 = pad[0], pad[1]
pad_w_0, pad_w_1 = pad[2], pad[3]
out_h = 1 + (in_h + pad_h_0 + pad_h_1 - (dilation[0] *
(f_h - 1) + 1)) // stride[0]
out_w = 1 + (in_w + pad_w_0 + pad_w_1 - (dilation[1] *
(f_w - 1) + 1)) // stride[1]
out = np.zeros((out_n, out_c, out_h, out_w))
d_bolck_h = (dilation[0] * (f_h - 1) + 1)
d_bolck_w = (dilation[1] * (f_w - 1) + 1)
input_pad = np.pad(input, ((0, 0), (0, 0), (pad_h_0, pad_h_1),
(pad_w_0, pad_w_1)),
mode='constant',
constant_values=0)
filter_dilation = np.zeros((f_n, f_c, d_bolck_h, d_bolck_w))
filter_dilation[:, :, 0:d_bolck_h:dilation[0], 0:d_bolck_w:dilation[
1]] = filter
for i in range(out_h):
for j in range(out_w):
for g in range(group):
input_pad_masked = \
input_pad[:, g * f_c:(g + 1) * f_c,
i * stride[0]:i * stride[0] + d_bolck_h,
j * stride[1]:j * stride[1] + d_bolck_w]
f_sub = filter_dilation[g * sub_f_n:(g + 1) * sub_f_n, :, :, :]
# sub_f_n == sub_out_c
for k in range(sub_out_c):
# Multiplication of Corresponding Elements, then sum all
out[:, g * sub_out_c + k, i, j] = \
np.sum(input_pad_masked * f_sub[k, :, :, :],
axis=(1, 2, 3))
if channel_last:
out = np.transpose(out, [0, 2, 3, 1])
return out, in_n, out_h, out_w, out_c
def create_test_channel_last_class(parent):
class TestChannelLastCase(parent):
def init_data_format(self):
self.data_format = "NHWC"
def init_test_case_2(self):
N, C, H, W = self.input_size
self.input_size = [N, H, W, C]
cls_name = "{0}_{1}".format(parent.__name__, "ChannelLast")
TestChannelLastCase.__name__ = cls_name
globals()[cls_name] = TestChannelLastCase
def create_test_padding_SAME_class(parent):
class TestPaddingSMAECase(parent):
def init_paddings(self):
self.pad = [0, 0]
self.padding_algorithm = "SAME"
cls_name = "{0}_{1}".format(parent.__name__, "PaddingSAMEOp")
TestPaddingSMAECase.__name__ = cls_name
globals()[cls_name] = TestPaddingSMAECase
def create_test_padding_VALID_class(parent):
class TestPaddingVALIDCase(parent):
def init_paddings(self):
self.pad = [1, 1]
self.padding_algorithm = "VALID"
def test_check_grad(self):
error = 0.09
if parent.__name__ in ["TestConv2dOp_AsyPadding",
"TestWithStride_AsyPadding"]:
error = 0.14
elif parent.__name__ in ["TestWithInput1x1Filter1x1_AsyPadding"]:
error = 0.66
place = core.CPUPlace()
self.check_grad_with_place(
place, {'Input', 'Filter'},
'Output',
max_relative_error=error)
cls_name = "{0}_{1}".format(parent.__name__, "PaddingVALIDOp")
TestPaddingVALIDCase.__name__ = cls_name
globals()[cls_name] = TestPaddingVALIDCase
class TestConv2dOp(OpTest):
def setUp(self):
OpTest.setUp(self)
self.op_type = "mpc_conv2d"
self.data_format = "AnyLayout"
self.dtype = np.int64
self.init_kernel_type()
self.init_group()
self.init_dilation()
self.init_test_case()
conv2d_param = {
'stride': self.stride,
'pad': self.pad,
'dilation': self.dilations
}
share = lambda x: np.array([x * 65536/3] * 2).astype('int64')
input = np.random.random(self.input_size)
filter = np.random.uniform(-1, 1, self.filter_size)
output, _, _, _, _ = conv2d_forward_naive(input, filter, self.groups,
conv2d_param)
input = share(input)
filter = share(filter)
self.inputs = {
'Input': OpTest.np_dtype_to_fluid_dtype(input),
'Filter': OpTest.np_dtype_to_fluid_dtype(filter)
}
self.attrs = {
'strides': self.stride,
'paddings': self.pad,
'groups': self.groups,
'dilations': self.dilations,
'data_format': self.data_format,
}
self.outputs = {'Output': output}
def test_check_output(self):
place = core.CPUPlace()
self.check_output_with_place(
place, atol=1e-3)
def test_check_grad(self):
place = core.CPUPlace()
self.check_grad_with_place(
place, {'Input', 'Filter'},
'Output',
max_relative_error=0.07)
def test_check_grad_no_filter(self):
place = core.CPUPlace()
self.check_grad_with_place(
place, ['Input'],
'Output',
max_relative_error=0.07,
no_grad_set=set(['Filter']))
def test_check_grad_no_input(self):
place = core.CPUPlace()
self.check_grad_with_place(
place, ['Filter'],
'Output',
max_relative_error=0.06,
no_grad_set=set(['Input']))
def init_test_case(self):
self.pad = [0, 0]
self.stride = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
def init_test_case_2(self):
pass
def init_dilation(self):
self.dilations = [1, 1]
def init_group(self):
self.groups = 1
def init_kernel_type(self):
pass
class TestWithPad(TestConv2dOp):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
class TestWithStride(TestConv2dOp):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [2, 2]
self.input_size = [2, 3, 6, 6] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
class TestWithGroup(TestConv2dOp):
def init_test_case(self):
self.pad = [0, 0]
self.stride = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
self.group = 3
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [18, f_c, 3, 3]
class TestWith1x1(TestConv2dOp):
def init_test_case(self):
self.pad = [0, 0]
self.stride = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [120, f_c, 1, 1]
def init_group(self):
self.groups = 3
def test_check_grad(self):
place = core.CPUPlace()
self.check_grad_with_place(
place, {'Input', 'Filter'},
'Output',
max_relative_error=0.6)
def test_check_grad_no_filter(self):
place = core.CPUPlace()
self.check_grad_with_place(
place, ['Input'],
'Output',
max_relative_error=0.9,
no_grad_set=set(['Filter']))
class TestWithDilation(TestConv2dOp):
def init_test_case(self):
self.pad = [0, 0]
self.stride = [1, 1]
self.input_size = [2, 3, 10, 10] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [12, f_c, 3, 3]
def init_dilation(self):
self.dilations = [2, 2]
def init_group(self):
self.groups = 3
class TestWithInput1x1Filter1x1(TestConv2dOp):
def init_test_case(self):
self.pad = [0, 0]
self.stride = [1, 1]
self.input_size = [100, 3, 1, 1] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [120, f_c, 1, 1]
def init_group(self):
self.groups = 3
def test_check_grad(self):
place = core.CPUPlace()
self.check_grad_with_place(
place, {'Input', 'Filter'},
'Output',
max_relative_error=0.75)
class TestConv2dOp_v2(OpTest):
def setUp(self):
self.op_type = "mpc_conv2d"
self.dtype = np.int64
self.init_kernel_type()
self.init_group()
self.init_dilation()
self.init_data_format()
self.init_test_case()
self.init_paddings()
self.init_test_case_2()
conv2d_param = {
'stride': self.stride,
'pad': self.pad,
'dilation': self.dilations
}
share = lambda x: np.array([x * 65536/3] * 2).astype('int64')
input = np.random.random(self.input_size)
filter = np.random.uniform(-1, 1, self.filter_size)
output, _, _, _, _ = conv2d_forward_naive(input, filter, self.groups,
conv2d_param, self.padding_algorithm, self.data_format)
input = share(input)
filter = share(filter)
self.inputs = {
'Input': OpTest.np_dtype_to_fluid_dtype(input),
'Filter': OpTest.np_dtype_to_fluid_dtype(filter)
}
self.attrs = {
'strides': self.stride,
'paddings': self.pad,
'padding_algorithm': self.padding_algorithm,
'groups': self.groups,
'dilations': self.dilations,
'data_format': self.data_format
}
self.outputs = {'Output': output}
def test_check_output(self):
place = core.CPUPlace()
self.check_output_with_place(
place, atol=1e-3)
def test_check_grad(self):
place = core.CPUPlace()
self.check_grad_with_place(
place, {'Input', 'Filter'},
'Output',
max_relative_error=0.14)
def test_check_grad_no_filter(self):
place = core.CPUPlace()
self.check_grad_with_place(
place, ['Input'],
'Output',
max_relative_error=0.13,
no_grad_set=set(['Filter']))
def test_check_grad_no_input(self):
place = core.CPUPlace()
self.check_grad_with_place(
place, ['Filter'],
'Output',
max_relative_error=0.7,
no_grad_set=set(['Input']))
def init_test_case(self):
self.pad = [0, 0]
self.stride = [1, 2]
self.input_size = [2, 3, 5, 5] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 4, 3]
def init_dilation(self):
self.dilations = [1, 1]
def init_group(self):
self.groups = 1
def init_kernel_type(self):
pass
def init_paddings(self):
self.pad = [0, 0]
self.padding_algorithm = "EXPLICIT"
def init_data_format(self):
self.data_format = "NCHW"
def init_test_case_2(self):
pass
class TestConv2dOp_AsyPadding(TestConv2dOp_v2):
def init_paddings(self):
self.pad = [0, 0, 1, 2]
self.padding_algorithm = "EXPLICIT"
def test_check_grad(self):
place = core.CPUPlace()
self.check_grad_with_place(
place, {'Input', 'Filter'},
'Output',
max_relative_error=0.09)
class TestWithPad_AsyPadding(TestConv2dOp_v2):
def init_test_case(self):
self.stride = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
def init_paddings(self):
self.pad = [2, 1, 3, 2]
self.padding_algorithm = "EXPLICIT"
class TestWithStride_AsyPadding(TestConv2dOp_v2):
def init_test_case(self):
self.stride = [2, 2]
self.input_size = [2, 3, 6, 6] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
def init_paddings(self):
self.pad = [2, 1, 3, 2]
self.padding_algorithm = "EXPLICIT"
class TestWithGroup_AsyPadding(TestConv2dOp_v2):
def init_test_case(self):
self.pad = [0, 0]
self.stride = [1, 2]
self.input_size = [2, 3, 5, 5] # NCHW
self.group = 3
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [24, f_c, 4, 3]
class TestWith1x1_AsyPadding(TestConv2dOp_v2):
def init_test_case(self):
self.stride = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [120, f_c, 1, 1]
def init_group(self):
self.groups = 3
def init_paddings(self):
self.pad = [2, 2, 4, 0]
self.padding_algorithm = "EXPLICIT"
class TestWithDepthWise3x3_AsyPadding(TestConv2dOp_v2):
def init_test_case(self):
self.stride = [1, 1]
self.input_size = [3, 4, 10, 10] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [16, f_c, 3, 3]
def init_dilation(self):
self.dilations = [2, 2]
def init_group(self):
self.groups = 4
def init_paddings(self):
self.pad = [1, 3, 2, 1]
self.padding_algorithm = "EXPLICIT"
class TestWithDepthWise5x5_AsyPadding(TestConv2dOp_v2):
def init_test_case(self):
self.stride = [1, 1]
self.input_size = [2, 4, 10, 10] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [8, f_c, 5, 5]
def init_group(self):
self.groups = 4
def init_paddings(self):
self.pad = [0, 1, 1, 0]
self.padding_algorithm = "EXPLICIT"
class TestWithDepthWise7x7_AsyPadding(TestConv2dOp_v2):
def init_test_case(self):
self.stride = [2, 2]
self.input_size = [2, 8, 10, 10] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [16, f_c, 7, 7]
def init_group(self):
self.groups = 8
def init_paddings(self):
self.pad = [1, 3, 4, 1]
self.padding_algorithm = "EXPLICIT"
class TestWithDilation_AsyPadding(TestConv2dOp_v2):
def init_test_case(self):
self.stride = [1, 1]
self.input_size = [2, 3, 10, 10] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [24, f_c, 3, 3]
def init_dilation(self):
self.dilations = [2, 2]
def init_group(self):
self.groups = 3
def init_paddings(self):
self.pad = [0, 1, 3, 0]
self.padding_algorithm = "EXPLICIT"
class TestWithInput1x1Filter1x1_AsyPadding(TestConv2dOp_v2):
def init_test_case(self):
self.stride = [1, 1]
self.input_size = [40, 3, 1, 1] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [120, f_c, 1, 1]
def init_group(self):
self.groups = 3
def init_paddings(self):
self.pad = [0, 3, 4, 0]
self.padding_algorithm = "EXPLICIT"
def test_check_grad(self):
place = core.CPUPlace()
self.check_grad_with_place(
place, {'Input', 'Filter'},
'Output',
max_relative_error=0.7)
def test_check_grad_no_filter(self):
place = core.CPUPlace()
self.check_grad_with_place(
place, ['Input'],
'Output',
max_relative_error=0.7,
no_grad_set=set(['Filter']))
#---------- test SAME VALID -----------
create_test_padding_SAME_class(TestConv2dOp_AsyPadding)
create_test_padding_SAME_class(TestWithPad_AsyPadding)
create_test_padding_SAME_class(TestWithStride_AsyPadding)
create_test_padding_SAME_class(TestWithGroup_AsyPadding)
create_test_padding_SAME_class(TestWithInput1x1Filter1x1_AsyPadding)
create_test_padding_VALID_class(TestConv2dOp_AsyPadding)
create_test_padding_VALID_class(TestWithPad_AsyPadding)
create_test_padding_VALID_class(TestWithStride_AsyPadding)
create_test_padding_VALID_class(TestWithGroup_AsyPadding)
create_test_padding_VALID_class(TestWithInput1x1Filter1x1_AsyPadding)
# ------------ test channel last ---------
create_test_channel_last_class(TestConv2dOp_AsyPadding)
create_test_channel_last_class(TestWithPad_AsyPadding)
create_test_channel_last_class(TestWithGroup_AsyPadding)
create_test_channel_last_class(TestWith1x1_AsyPadding)
create_test_channel_last_class(TestWithInput1x1Filter1x1_AsyPadding)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module test dyanmic_gru op.
"""
import unittest
from multiprocessing import Manager
import numpy as np
import paddle.fluid as fluid
import paddle_fl.mpc as pfl_mpc
import paddle_fl.mpc.data_utils.aby3 as aby3
import test_op_base
class TestInput(test_op_base.TestOpBase):
def dyanmic_gru_op(self, **kwargs):
role = kwargs['role']
data = kwargs['data']
data_share = kwargs['data_share'][role]
weight = kwargs['weight']
weight_share = kwargs['weight_share'][role]
return_results = kwargs['return_results']
return_results_cheb = kwargs['return_results_cheb']
expected_result = kwargs['expect_results']
pfl_mpc.init("aby3", role, "localhost", self.server, int(self.port))
hidden_dim = 1
data_paddle = fluid.data(name='input_paddle', shape=[3, 3], dtype='float32', lod_level=1)
ldata_paddle = fluid.create_lod_tensor(data, [[3]], fluid.CPUPlace())
w_param_attrs = fluid.ParamAttr(name='gru_weight',
learning_rate=0.5,
initializer=fluid.initializer.NumpyArrayInitializer(weight),
trainable=True)
hidden_paddle = fluid.layers.dynamic_gru(input=data_paddle, size=hidden_dim, param_attr=w_param_attrs,
gate_activation='sigmoid', candidate_activation='relu')
data_mpc = fluid.data(name='input_mpc', shape=[3, 2, 3], dtype='int64', lod_level=1)
# trans batch information to shape[0]
data_share_trans = np.transpose(data_share, [1, 0, 2])
ldata_mpc = fluid.create_lod_tensor(data_share_trans, [[3]], fluid.CPUPlace())
w_param_attrs1 = fluid.ParamAttr(name='mpc_gru_weight',
learning_rate=0.5,
initializer=pfl_mpc.initializer.NumpyArrayInitializer(weight_share),
trainable=True)
w_param_attrs2 = fluid.ParamAttr(name='mpc_gru_weight_cheb',
learning_rate=0.5,
initializer=pfl_mpc.initializer.NumpyArrayInitializer(weight_share),
trainable=True)
hidden_mpc = pfl_mpc.layers.dynamic_gru(input=data_mpc, size=hidden_dim,
param_attr=w_param_attrs1)
hidden_mpc_cheb = pfl_mpc.layers.dynamic_gru(input=data_mpc, size=hidden_dim,
param_attr=w_param_attrs2, gate_activation='sigmoid_chebyshev')
exe = fluid.Executor(place=fluid.CPUPlace())
exe.run(fluid.default_startup_program())
results = exe.run(feed={'input_paddle': ldata_paddle, 'input_mpc': ldata_mpc},
fetch_list=[hidden_paddle, hidden_mpc, hidden_mpc_cheb], return_numpy=False)
return_results.append(np.array(results[1]))
return_results_cheb.append(np.array(results[2]))
expected_result.append(np.array(results[0]))
def test_dyanmic_gru_op(self):
data = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [-1.0, -2.0, -3.0]]).astype('float32')
data_share = aby3.make_shares(data)
data_all3shares = np.array([aby3.get_aby3_shares(data_share, i) for i in range(3)])
weight = np.array([[0.0, 0.0, 0.0]]).astype('float32')
weight_share = aby3.make_shares(weight)
weight_all3shares = np.array([aby3.get_aby3_shares(weight_share, i) for i in range(3)])
return_results = Manager().list()
return_results_cheb = Manager().list()
expect_results = Manager().list()
ret = self.multi_party_run(target=self.dyanmic_gru_op,
data=data,
data_share = data_all3shares,
weight=weight,
weight_share=weight_all3shares,
return_results=return_results,
return_results_cheb=return_results_cheb,
expect_results=expect_results)
self.assertEqual(ret[0], True)
revealed = aby3.reconstruct(np.array(return_results))
revealed_cheb = aby3.reconstruct(np.array(return_results_cheb))
print("expected:", expect_results[0])
print("reveal: ", revealed)
print("reveal_cheb: ", revealed_cheb)
self.assertTrue(np.allclose(revealed, expect_results[0], atol=1e-1*5))
self.assertTrue(np.allclose(revealed_cheb, expect_results[0], atol=1e-1*5))
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module test add op.
"""
import unittest
from multiprocessing import Manager
import numpy as np
import paddle.fluid as fluid
import paddle_fl.mpc as pfl_mpc
import paddle_fl.mpc.data_utils.aby3 as aby3
import test_op_base
class TestOpPool2d(test_op_base.TestOpBase):
def pool2d(self, **kwargs):
"""
Add two variables with one dimension.
:param kwargs:
:return:
"""
role = kwargs['role']
d_1 = kwargs['data_1'][role]
return_results = kwargs['return_results']
pfl_mpc.init("aby3", role, "localhost", self.server, int(self.port))
x = pfl_mpc.data(name='x', shape=[1, 1, 4, 6], dtype='int64')
pool_out = pfl_mpc.layers.pool2d(input=x, pool_size=2, pool_stride=2)
exe = fluid.Executor(place=fluid.CPUPlace())
exe.run(fluid.default_startup_program())
results = exe.run(feed={'x': d_1}, fetch_list=[pool_out])
self.assertEqual(results[0].shape, (2, 1, 1, 2, 3))
return_results.append(results[0])
def test_pool2d(self):
data_1 = np.array(
[[[[1, 2, 3, 4, 0, 100],
[5, 6, 7, 8, 0, 100],
[9, 10, 11, 12, 0, 200],
[13, 14, 15, 16, 0, 200]]]]).astype('float32')
expected_out = np.array(
[[[[6, 8, 100],
[14, 16, 200]]]]).astype('float32')
print("input data_1: {} \n".format(data_1))
data_1_shares = aby3.make_shares(data_1)
data_1_all3shares = np.array([aby3.get_aby3_shares(data_1_shares, i) for i in range(3)])
return_results = Manager().list()
ret = self.multi_party_run(target=self.pool2d,
data_1=data_1_all3shares,
return_results=return_results)
self.assertEqual(ret[0], True)
revealed = aby3.reconstruct(np.array(return_results))
#print("revealed: {} \n".format(revealed))
#print("expected: {} \n".format(expected_out))
self.assertTrue(np.allclose(revealed, expected_out, atol=1e-2))
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module test add op.
"""
import unittest
from multiprocessing import Manager
import numpy as np
import paddle.fluid as fluid
import paddle_fl.mpc as pfl_mpc
import paddle_fl.mpc.data_utils.aby3 as aby3
import test_op_base
class TestOpSoftmaxWithCrossEntropy(test_op_base.TestOpBase):
def softmax_with_cross_entropy(self, **kwargs):
"""
Add two variables with one dimension.
:param kwargs:
:return:
"""
role = kwargs['role']
d_1 = kwargs['data_1'][role]
d_2 = kwargs['data_2'][role]
return_results = kwargs['return_results']
pfl_mpc.init("aby3", role, "localhost", self.server, int(self.port))
x = pfl_mpc.data(name='x', shape=[2], dtype='int64')
y = pfl_mpc.data(name='y', shape=[2], dtype='int64')
cost, softmax = pfl_mpc.layers.softmax_with_cross_entropy(x, y, soft_label=True, return_softmax=True)
exe = fluid.Executor(place=fluid.CPUPlace())
results = exe.run(feed={'x': d_1, 'y': d_2}, fetch_list=[softmax])
self.assertEqual(results[0].shape, (2, 2))
return_results.append(results[0])
def test_softmax_with_cross_entropy(self):
data_1 = np.array(
[1, 1]).astype('float32')
data_2 = np.array(
[1, 0]).astype('float32')
expected_out = np.array(
[0.5, 0.5]).astype('float32')
#print("input data_1: {} \n".format(data_1))
data_1_shares = aby3.make_shares(data_1)
data_2_shares = aby3.make_shares(data_2)
data_1_all3shares = np.array([aby3.get_aby3_shares(data_1_shares, i) for i in range(3)])
data_2_all3shares = np.array([aby3.get_aby3_shares(data_2_shares, i) for i in range(3)])
return_results = Manager().list()
ret = self.multi_party_run(target=self.softmax_with_cross_entropy,
data_1=data_1_all3shares,
data_2=data_2_all3shares,
return_results=return_results)
self.assertEqual(ret[0], True)
revealed = aby3.reconstruct(np.array(return_results))
#print("revealed: {} \n".format(revealed))
#print("expected: {} \n".format(expected_out))
self.assertTrue(np.allclose(revealed, expected_out, atol=1e-4))
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
import paddle.fluid.core as core
from paddle.fluid.op import Operator
def create_op(scope, op_type, inputs, outputs, attrs, cache_list=None):
kwargs = dict()
op_maker = core.op_proto_and_checker_maker
op_role_attr_name = op_maker.kOpRoleAttrName()
if op_role_attr_name not in attrs:
attrs[op_role_attr_name] = int(op_maker.OpRole.Forward)
def __create_var__(name, var_name):
scope.var(var_name).get_tensor()
kwargs[name].append(var_name)
for in_name, in_dup in Operator.get_op_inputs(op_type):
if in_name in inputs:
kwargs[in_name] = []
if in_dup:
sub_in = inputs[in_name]
for item in sub_in:
sub_in_name, _ = item[0], item[1]
__create_var__(in_name, sub_in_name)
else:
__create_var__(in_name, in_name)
if cache_list != None and isinstance(cache_list, list):
for name in cache_list:
kwargs[name] = []
scope.var(name)
kwargs[name].append(name)
for out_name, out_dup in Operator.get_op_outputs(op_type):
if out_name in outputs:
kwargs[out_name] = []
if out_dup:
sub_out = outputs[out_name]
for item in sub_out:
sub_out_name, _ = item[0], item[1]
__create_var__(out_name, sub_out_name)
else:
__create_var__(out_name, out_name)
for attr_name in Operator.get_op_attr_names(op_type):
if attr_name in attrs:
kwargs[attr_name] = attrs[attr_name]
return Operator(op_type, **kwargs)
def set_input(scope, op, inputs, place):
def __set_input__(var_name, var):
if isinstance(var, tuple) or isinstance(var, np.ndarray):
tensor = scope.find_var(var_name).get_tensor()
if isinstance(var, tuple):
tensor.set_recursive_sequence_lengths(var[1])
var = var[0]
tensor._set_dims(var.shape)
tensor.set(var, place)
elif isinstance(var, float):
scope.find_var(var_name).set_float(var)
elif isinstance(var, int):
scope.find_var(var_name).set_int(var)
for in_name, in_dup in Operator.get_op_inputs(op.type()):
if in_name in inputs:
if in_dup:
sub_in = inputs[in_name]
for item in sub_in:
sub_in_name, sub_in_val = item[0], item[1]
__set_input__(sub_in_name, sub_in_val)
else:
__set_input__(in_name, inputs[in_name])
def append_input_output(block, op_proto, np_list, is_input, dtype):
'''Insert VarDesc and generate Python variable instance'''
proto_list = op_proto.inputs if is_input else op_proto.outputs
def create_var(block, name, np_list, var_proto):
dtype = None
shape = None
lod_level = None
if name not in np_list:
assert var_proto.intermediate, "{} not found".format(name)
else:
# inferece the dtype from numpy value.
np_value = np_list[name]
if isinstance(np_value, tuple):
dtype = np_value[0].dtype
# output shape, lod should be infered from input.
if is_input:
shape = list(np_value[0].shape)
lod_level = len(np_value[1])
else:
dtype = np_value.dtype
if is_input:
shape = list(np_value.shape)
lod_level = 0
return block.create_var(
dtype=dtype, shape=shape, lod_level=lod_level, name=name)
var_dict = {}
for var_proto in proto_list:
var_name = str(var_proto.name)
if (var_name not in np_list) and var_proto.dispensable:
continue
if is_input:
assert (var_name in np_list) or (var_proto.dispensable), \
"Missing {} as input".format(var_name)
if var_proto.duplicable:
assert isinstance(np_list[var_name], list), \
"Duplicable {} should be set as list".format(var_name)
var_list = []
for (name, np_value) in np_list[var_name]:
var_list.append(
create_var(block, name, {name: np_value}, var_proto))
var_dict[var_name] = var_list
else:
var_dict[var_name] = create_var(block, var_name, np_list, var_proto)
return var_dict
def append_loss_ops(block, output_names):
mean_inputs = list(map(block.var, output_names))
if len(mean_inputs) == 1:
loss = block.create_var(dtype=mean_inputs[0].dtype, shape=[2, 1])
op = block.append_op(
inputs={"X": mean_inputs}, outputs={"Out": loss}, type='mpc_mean')
op.desc.infer_var_type(block.desc)
op.desc.infer_shape(block.desc)
else:
avg_sum = []
for cur_loss in mean_inputs:
cur_avg_loss = block.create_var(dtype=cur_loss.dtype, shape=[1])
op = block.append_op(
inputs={"X": [cur_loss]},
outputs={"Out": [cur_avg_loss]},
type="mean")
op.desc.infer_var_type(block.desc)
op.desc.infer_shape(block.desc)
avg_sum.append(cur_avg_loss)
loss_sum = block.create_var(dtype=avg_sum[0].dtype, shape=[1])
op_sum = block.append_op(
inputs={"X": avg_sum}, outputs={"Out": loss_sum}, type='sum')
op_sum.desc.infer_var_type(block.desc)
op_sum.desc.infer_shape(block.desc)
loss = block.create_var(dtype=loss_sum.dtype, shape=[1])
op_loss = block.append_op(
inputs={"X": loss_sum},
outputs={"Out": loss},
type='scale',
attrs={'scale': 1.0 / float(len(avg_sum))})
op_loss.desc.infer_var_type(block.desc)
op_loss.desc.infer_shape(block.desc)
return loss
......@@ -44,11 +44,13 @@ trainer = FLTrainerFactory().create_fl_trainer(job)
trainer._current_ep = "127.0.0.1:{}".format(9000 + trainer_id)
place = fluid.CPUPlace()
trainer.start(place)
print(trainer._scheduler_ep, trainer._current_ep)
print("scheduler_ep is {}, current_ep is {}".format(trainer._scheduler_ep, trainer._current_ep))
output_folder = "fl_model"
epoch_id = 0
while not trainer.stop():
print("batch %d start train" % (epoch_id))
if epoch_id > 15:
break
print("{} Epoch {} start train".format(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())), epoch_id))
train_step = 0
for data in reader():
trainer.run(feed=data, fetch=[])
......
#!/bin/bash
unset http_proxy
unset https_proxy
python fl_master.py
ps -ef | grep -E fl_ | grep -v grep | awk '{print $2}' | xargs kill -9
log_dir=${1:-"logs"}
mkdir -p ${log_dir}
python fl_master.py > ${log_dir}/master.log 2>&1 &
sleep 2
python -u fl_scheduler.py > scheduler.log &
python -u fl_scheduler.py > ${log_dir}/scheduler.log 2>&1 &
sleep 5
python -u fl_server.py >server0.log &
python -u fl_server.py > ${log_dir}/server0.log 2>&1 &
sleep 2
for ((i=0;i<2;i++))
do
python -u fl_trainer.py $i >trainer$i.log &
python -u fl_trainer.py $i > ${log_dir}/trainer$i.log 2>&1 &
sleep 2
done
......@@ -20,6 +20,7 @@ import paddle
import paddle.fluid as fluid
import logging
import math
import time
logging.basicConfig(
filename="test.log",
......@@ -72,9 +73,9 @@ epoch_id = 0
step = 0
while not trainer.stop():
epoch_id += 1
if epoch_id > 40:
if epoch_id > 10:
break
print("epoch %d start train" % (epoch_id))
print("{} Epoch {} start train".format(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())), epoch_id))
for step_id, data in enumerate(train_reader()):
acc = trainer.run(feeder.feed(data), fetch=["accuracy_0.tmp_0"])
step += 1
......
#!/bin/bash
unset http_proxy
unset https_proxy
python fl_master.py
ps -ef | grep -E fl_ | grep -v grep | awk '{print $2}' | xargs kill -9
log_dir=${1:-"logs"}
mkdir -p ${log_dir}
python fl_master.py > ${log_dir}/master.log 2>&1 &
sleep 2
python -u fl_scheduler.py >scheduler.log &
python -u fl_scheduler.py > ${log_dir}/scheduler.log 2>&1 &
sleep 5
python -u fl_server.py > ${log_dir}/server0.log 2>&1 &
sleep 2
python -u fl_server.py >server0.log &
sleep 2
python -u fl_trainer.py 0 >trainer0.log &
sleep 2
python -u fl_trainer.py 1 >trainer1.log &
sleep 2
python -u fl_trainer.py 2 >trainer2.log &
sleep 2
python -u fl_trainer.py 3 >trainer3.log &
for ((i=0;i<4;i++))
do
python -u fl_trainer.py $i > ${log_dir}/trainer$i.log 2>&1 &
sleep 2
done
......@@ -21,6 +21,7 @@ import paddle
import paddle.fluid as fluid
import logging
import math
import time
logging.basicConfig(
filename="test.log",
......@@ -60,7 +61,7 @@ def train_test(train_test_program, train_test_feed, train_test_reader):
epoch_id = 0
step = 0
epoch = 3000
epoch = 10
count_by_step = False
if count_by_step:
output_folder = "model_node%d" % trainer_id
......@@ -72,7 +73,7 @@ while not trainer.stop():
epoch_id += 1
if epoch_id > epoch:
break
print("epoch %d start train" % (epoch_id))
print("{} Epoch {} start train".format(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())), epoch_id))
#train_data,test_data= data_generater(trainer_id,inner_step=trainer._step,batch_size=64,count_by_step=count_by_step)
train_reader = paddle.batch(
paddle.reader.shuffle(
......@@ -97,7 +98,6 @@ while not trainer.stop():
acc = trainer.run(feeder.feed(data), fetch=["accuracy_0.tmp_0"])
step += 1
count += 1
print(count)
if count % trainer._step == 0:
break
# print("acc:%.3f" % (acc[0]))
......
#!/bin/bash
unset http_proxy
unset https_proxy
#killall python
python fl_master.py
ps -ef | grep -E fl_ | grep -v grep | awk '{print $2}' | xargs kill -9
log_dir=${1:-"logs"}
mkdir -p ${log_dir}
python fl_master.py > ${log_dir}/master.log 2>&1 &
sleep 2
python -u fl_scheduler.py >scheduler.log &
sleep 2
python -u fl_server.py >server0.log &
python -u fl_scheduler.py > ${log_dir}/scheduler.log 2>&1 &
sleep 5
python -u fl_server.py > ${log_dir}/server0.log 2>&1 &
sleep 2
for ((i=0;i<4;i++))
do
python -u fl_trainer.py $i >trainer$i.log &
python -u fl_trainer.py $i > ${log_dir}/trainer$i.log 2>&1 &
sleep 2
done
......@@ -17,6 +17,10 @@ pip install paddle_fl
#### How to save a program
```sh
python program_saver.py
```
In program_saver.py, you can defind a model. And save the program in to 'load_file'
```python
......
......@@ -20,6 +20,7 @@ import paddle
import paddle.fluid as fluid
import logging
import math
import time
logging.basicConfig(
filename="test.log",
......@@ -67,9 +68,9 @@ epoch_id = 0
step = 0
while not trainer.stop():
epoch_id += 1
if epoch_id > 40:
if epoch_id > 10:
break
print("epoch %d start train" % (epoch_id))
print("{} Epoch {} start train".format(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())), epoch_id))
for step_id, data in enumerate(train_reader()):
acc = trainer.run(feeder.feed(data), fetch=["accuracy_0.tmp_0"])
step += 1
......
#!/bin/bash
unset http_proxy
unset https_proxy
python program_saver.py
ps -ef | grep -E fl_ | grep -v grep | awk '{print $2}' | xargs kill -9
if [ ! -d load_file ]; then
python program_saver.py
fi
python fl_master.py
sleep 2
python -u fl_scheduler.py >scheduler.log &
sleep 2
python -u fl_server.py >server0.log &
sleep 2
python -u fl_trainer.py 0 >trainer0.log &
log_dir=${1:-"logs"}
mkdir -p ${log_dir}
python fl_master.py > ${log_dir}/master.log 2>&1 &
sleep 2
python -u fl_trainer.py 1 > trainer1.log &
python -u fl_scheduler.py > ${log_dir}/scheduler.log 2>&1 &
sleep 5
python -u fl_server.py > ${log_dir}/server0.log 2>&1 &
sleep 2
for ((i=0;i<2;i++))
do
python -u fl_trainer.py $i > ${log_dir}/trainer$i.log 2>&1 &
sleep 2
done
......@@ -20,6 +20,8 @@ import numpy as np
import sys
import os
import logging
import time
logging.basicConfig(
filename="test.log",
filemode="w",
......@@ -43,10 +45,9 @@ r = Gru4rec_Reader()
train_reader = r.reader(train_file_dir, place, batch_size=125)
output_folder = "model_node4"
step_i = 0
epoch_i = 0
while not trainer.stop():
step_i += 1
print("batch %d start train" % (step_i))
epoch_i += 1
train_step = 0
for data in train_reader():
#print(np.array(data['src_wordseq']))
......@@ -56,10 +57,10 @@ while not trainer.stop():
break
avg_ppl = np.exp(ret_avg_cost[0])
newest_ppl = np.mean(avg_ppl)
print("ppl:%.3f" % (newest_ppl))
save_dir = (output_folder + "/epoch_%d") % step_i
print("{} Epoch {} start train, train_step {}, ppl {}".format (time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())), epoch_i, train_step, newest_ppl))
save_dir = (output_folder + "/epoch_%d") % epoch_i
if trainer_id == 0:
print("start save")
trainer.save_inference_program(save_dir)
if step_i >= 40:
if epoch_i >= 5:
break
#!/bin/bash
unset http_proxy
unset https_proxy
python fl_master.py
ps -ef | grep -E fl_ | grep -v grep | awk '{print $2}' | xargs kill -9
if [ ! -d mid_data ];then
sh download.sh
fi
log_dir=${1:-"logs"}
mkdir -p ${log_dir}
python fl_master.py > ${log_dir}/master.log 2>&1 &
sleep 2
python -u fl_scheduler.py >scheduler.log &
python -u fl_server.py >server0.log &
python -u fl_scheduler.py > ${log_dir}/scheduler.log 2>&1 &
sleep 5
python -u fl_server.py > ${log_dir}/server0.log 2>&1 &
sleep 2
python -u fl_trainer.py 0 >trainer0.log &
sleep 2
python -u fl_trainer.py 1 >trainer1.log &
sleep 2
python -u fl_trainer.py 2 >trainer2.log &
sleep 2
python -u fl_trainer.py 3 >trainer3.log &
for ((i=0;i<4;i++))
do
python -u fl_trainer.py $i > ${log_dir}/trainer$i.log 2>&1 &
sleep 2
done
......@@ -84,21 +84,16 @@ def train_test(train_test_program, train_test_feed, train_test_reader):
# for test
while not trainer.stop():
epoch_id += 1
print("epoch %d start train" % (epoch_id))
for data in train_reader():
step_i += 1
trainer.step_id = step_i
accuracy, = trainer.run(feed=feeder.feed(data),
fetch=["accuracy_0.tmp_0"])
if step_i % 100 == 0:
print("Epoch: {0}, step: {1}, accuracy: {2}".format(
print("{} Epoch {} start train, step: {}, accuracy: {}".format(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())),
epoch_id, step_i, accuracy[0]))
print(step_i)
avg_loss_val, acc_val = train_test(
train_test_program=test_program,
train_test_reader=test_reader,
......@@ -106,7 +101,7 @@ while not trainer.stop():
print("Test with Epoch %d, avg_cost: %s, acc: %s" %
(epoch_id, avg_loss_val, acc_val))
if epoch_id > 40:
if epoch_id > 5:
break
if epoch_id % 5 == 0:
trainer.save_inference_program(output_folder)
#!/bin/bash
unset http_proxy
unset https_proxy
ps -ef | grep -E fl_ | grep -v grep | awk '{print $2}' | xargs kill -9
if [ ! -d log ];then
mkdir log
fi
log_dir=${1:-"logs"}
mkdir -p ${log_dir}
python fl_master.py
python fl_master.py > ${log_dir}/master.log 2>&1 &
sleep 2
python -u fl_server.py >log/server0.log &
python -u fl_scheduler.py > ${log_dir}/scheduler.log 2>&1 &
sleep 5
python -u fl_server.py > ${log_dir}/server0.log 2>&1 &
sleep 2
python -u fl_scheduler.py > log/scheduler.log &
sleep 2
python -u fl_trainer.py 0 >log/trainer0.log &
sleep 2
python -u fl_trainer.py 1 >log/trainer1.log &
for ((i=0;i<2;i++))
do
python -u fl_trainer.py $i > ${log_dir}/trainer$i.log 2>&1 &
sleep 2
done
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册