提交 383137c8 编写于 作者: J jhjiangcs

add some mpc ops and fix some bugs.

上级 6c66b67b
...@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.15) ...@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.15)
project(PaddleEncrypted) project(PaddleEncrypted)
add_compile_options(-msse4.2 -maes -fPIC -DPADDLE_WITH_MKLDNN) add_compile_options(-msse4.2 -fPIC -DPADDLE_WITH_MKLDNN -O2)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD 11)
...@@ -57,6 +57,10 @@ option(WITH_TESTING "Compile with unit testing" ON) ...@@ -57,6 +57,10 @@ option(WITH_TESTING "Compile with unit testing" ON)
option(WITH_PSI "Compile with psi lib" ON) option(WITH_PSI "Compile with psi lib" ON)
option(USE_AES_NI "Compile with AES NI" ON)
option(USE_OPENMP "Compile with OpenMP" ON)
########################### the project build part ############################### ########################### the project build part ###############################
message(STATUS "Using paddlepaddle installation of ${paddle_version}") message(STATUS "Using paddlepaddle installation of ${paddle_version}")
message(STATUS "paddlepaddle include directory: ${PADDLE_INCLUDE}") message(STATUS "paddlepaddle include directory: ${PADDLE_INCLUDE}")
...@@ -70,6 +74,15 @@ include_directories(.) ...@@ -70,6 +74,15 @@ include_directories(.)
include_directories(${PADDLE_INCLUDE}) include_directories(${PADDLE_INCLUDE})
include_directories(${PADDLE_INCLUDE}/third_party) include_directories(${PADDLE_INCLUDE}/third_party)
if (USE_AES_NI)
add_compile_definitions(USE_AES_NI)
add_compile_options(-maes)
endif (USE_AES_NI)
if (USE_OPENMP)
add_compile_options(-fopenmp)
find_package(OpenMP REQUIRED)
endif(USE_OPENMP)
add_subdirectory(core/privc3) add_subdirectory(core/privc3)
add_subdirectory(core/paddlefl_mpc/mpc_protocol) add_subdirectory(core/paddlefl_mpc/mpc_protocol)
......
add_compile_options(-msse4.2 -maes)
set(PYBIND_SRCS set(PYBIND_SRCS
"./data_utils.cc" "./data_utils.cc"
) )
if (NOT PYTHON_INCLUDE_DIRS) if (NOT PYTHON_INCLUDE_DIRS)
find_package(PythonLibs REQUIRED) find_package(PythonLibs REQUIRED)
endif() endif()
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. <<<<<<< HEAD
// /* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
=======
>>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
#include <atomic> #include <atomic>
#include <set> #include <set>
#include <string> #include <string>
...@@ -21,8 +24,8 @@ ...@@ -21,8 +24,8 @@
#include <pybind11/pybind11.h> #include <pybind11/pybind11.h>
#include <pybind11/stl.h> #include <pybind11/stl.h>
#include "core/paddlefl_mpc/mpc_protocol/aby3_operators.h"
#include "core/privc3/fixedpoint_util.h" #include "core/privc3/fixedpoint_util.h"
#include "core/paddlefl_mpc/mpc_protocol/aby3_operators.h"
#include "core/psi/psi_api.h" #include "core/psi/psi_api.h"
namespace py = pybind11; namespace py = pybind11;
...@@ -30,68 +33,87 @@ namespace py = pybind11; ...@@ -30,68 +33,87 @@ namespace py = pybind11;
namespace aby3 { namespace aby3 {
// split plaintext into three shares. // split plaintext into three shares.
template <typename T, size_t N> py::array_t<T> share(double input) { template<typename T, size_t N>
size_t share_num = 3; py::array_t<T> share(double input) {
auto shares = py::array_t<T>(share_num); size_t share_num = 3;
py::buffer_info shares_buf = shares.request(); auto shares = py::array_t<T>(share_num);
T *shares_buf_ptr = (T *)shares_buf.ptr; py::buffer_info shares_buf = shares.request();
T *ret_ptr[share_num]; T* shares_buf_ptr = (T*)shares_buf.ptr;
for (size_t i = 0; i < share_num; ++i) { T* ret_ptr[share_num];
ret_ptr[i] = &shares_buf_ptr[i]; for (size_t i = 0; i < share_num; ++i) {
} ret_ptr[i] = &shares_buf_ptr[i];
}
FixedPointUtil<T, N>::share(input, ret_ptr);
FixedPointUtil<T, N>::share(input, ret_ptr);
return shares;
return shares;
} }
// combine three shares to reveal plaintext. // combine three shares to reveal plaintext.
template <typename T, size_t N> double reveal(py::array_t<T> shares) { template<typename T, size_t N>
size_t share_num = 3; double reveal(py::array_t<T> shares) {
py::buffer_info shares_buf = shares.request(); size_t share_num = 3;
T *shares_buf_ptr = (T *)shares_buf.ptr; py::buffer_info shares_buf = shares.request();
T *ret[share_num]; T *shares_buf_ptr = (T *) shares_buf.ptr;
T *ret[share_num];
for (size_t idx = 0; idx < share_num; ++idx) { for (size_t idx = 0; idx < share_num; ++idx) {
ret[idx] = &shares_buf_ptr[idx]; ret[idx] = &shares_buf_ptr[idx];
} }
double result = FixedPointUtil<T, N>::reveal(ret); double result = FixedPointUtil<T, N>::reveal(ret);
return result; return result;
} }
// call psi_send // call psi_send
int send_psi(int port, const std::set<std::string> &input) { int send_psi(int port, const std::set<std::string>& input) {
std::atomic<int> prog(0); std::atomic<int> prog(0);
return psi::psi_send(port, input, &prog); return psi::psi_send(port, input, &prog);
} }
// call psi_recv // call psi_recv
std::vector<std::string> recv_psi(const std::string &remote_ip, int port, std::vector<std::string> recv_psi(const std::string &remote_ip,
const std::set<std::string> &input) { int port,
std::vector<std::string> output; const std::set<std::string>& input) {
std::atomic<int> prog(0); std::vector<std::string> output;
int ret = psi::psi_recv(remote_ip, port, input, &output, &prog); std::atomic<int> prog(0);
if (ret != 0) { int ret = psi::psi_recv(remote_ip, port, input, &output, &prog);
output.clear(); if (ret != 0) {
output.clear();
return output;
}
return output; return output;
}
return output;
} }
PYBIND11_MODULE(mpc_data_utils, m) { PYBIND11_MODULE(mpc_data_utils, m)
// optional module docstring {
m.doc() = "pybind11 paddle-mpc plugin: data_utils (share, reveal, psi)"; // optional module docstring
m.doc() = "pybind11 paddle-mpc plugin: data_utils (share, reveal, psi)";
<<<<<<< HEAD
m.def("share", &share<long long, paddle::mpc::ABY3_SCALING_FACTOR>,
"split plaintext into three shares.");
m.def("reveal", &reveal<long long, paddle::mpc::ABY3_SCALING_FACTOR>,
"combine three shares to reveal plaintext.");
m.def("share", &share<long long, paddle::mpc::ABY3_SCALING_FACTOR>, m.def("send_psi", &send_psi, "Send input in two party PSI.");
"split plaintext into three shares."); m.def("recv_psi", &recv_psi, "Send input and return PSI result as output in two party PSI.");
m.def("reveal", &reveal<long long, paddle::mpc::ABY3_SCALING_FACTOR>,
"combine three shares to reveal plaintext.");
m.def("send_psi", &send_psi, "Send input in two party PSI."); =======
m.def("recv_psi", &recv_psi,
"Send input and return PSI result as output in two party PSI."); m.def("share", &share<long long, paddle::mpc::ABY3_SCALING_FACTOR>,
"split plaintext into three shares.");
m.def("reveal", &reveal<long long, paddle::mpc::ABY3_SCALING_FACTOR>,
"combine three shares to reveal plaintext.");
m.def("send_psi", &send_psi, "Send input in two party PSI.");
m.def("recv_psi", &recv_psi, "Send input and return PSI result as output in two party PSI.");
>>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
m.attr("mpc_one_share") = (1 << paddle::mpc::ABY3_SCALING_FACTOR) / 3;
} }
} // namespace aby3 } // namespace aby3
add_compile_options(-msse4.2 -maes)
set(PROTO_SRCS set(PROTO_SRCS
"./aby3_protocol.cc" "./aby3_protocol.cc"
"./mesh_network.cc" "./mesh_network.cc"
...@@ -17,3 +15,5 @@ target_link_libraries(mpc_protocol fluid_framework gloo hiredis privc3) ...@@ -17,3 +15,5 @@ target_link_libraries(mpc_protocol fluid_framework gloo hiredis privc3)
cc_test(mesh_network_test SRCS mesh_network_test.cc DEPS mpc_protocol) cc_test(mesh_network_test SRCS mesh_network_test.cc DEPS mpc_protocol)
cc_test(mpc_protocol_test SRCS mpc_protocol_test.cc DEPS mpc_protocol) cc_test(mpc_protocol_test SRCS mpc_protocol_test.cc DEPS mpc_protocol)
cc_test(mpc_instance_test SRCS mpc_instance_test.cc DEPS mpc_protocol) cc_test(mpc_instance_test SRCS mpc_instance_test.cc DEPS mpc_protocol)
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
// You may obtain a copy of the License at You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and See the License for the specific language governing permissions and
// limitations under the License. limitations under the License. */
// Description: implementations of each virtual op according to ABY3 protocol // Description: implementations of each virtual op according to ABY3 protocol
...@@ -21,9 +21,9 @@ ...@@ -21,9 +21,9 @@
#include "context_holder.h" #include "context_holder.h"
#include "mpc_operators.h" #include "mpc_operators.h"
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#include "core/privc3/boolean_tensor.h"
#include "core/privc3/circuit_context.h" #include "core/privc3/circuit_context.h"
#include "core/privc3/fixedpoint_tensor.h" #include "core/privc3/fixedpoint_tensor.h"
#include "core/privc3/boolean_tensor.h"
#include "core/privc3/paddle_tensor.h" #include "core/privc3/paddle_tensor.h"
namespace paddle { namespace paddle {
...@@ -32,259 +32,344 @@ namespace mpc { ...@@ -32,259 +32,344 @@ namespace mpc {
using paddle::framework::Tensor; using paddle::framework::Tensor;
using aby3::CircuitContext; using aby3::CircuitContext;
// TODO: decide scaling factor // TODO: decide scaling factor
const size_t ABY3_SCALING_FACTOR = 16; const size_t ABY3_SCALING_FACTOR = FIXED_POINTER_SCALING_FACTOR;
using FixedTensor = aby3::FixedPointTensor<int64_t, ABY3_SCALING_FACTOR>; using FixedTensor = aby3::FixedPointTensor<int64_t, ABY3_SCALING_FACTOR>;
using BoolTensor = aby3::BooleanTensor<int64_t>; using BoolTensor = aby3::BooleanTensor<int64_t>;
using PaddleTensor = aby3::PaddleTensor<int64_t>; using PaddleTensor = aby3::PaddleTensor<int64_t>;
class Aby3OperatorsImpl : public MpcOperators { class Aby3OperatorsImpl : public MpcOperators {
public: public:
void add(const Tensor *lhs, const Tensor *rhs, Tensor *out) override {
auto lhs_tuple = from_tensor(lhs); void add(const Tensor *lhs, const Tensor *rhs, Tensor *out) override {
auto rhs_tuple = from_tensor(rhs);
auto out_tuple = from_tensor(out); auto lhs_tuple = from_tensor(lhs);
auto rhs_tuple = from_tensor(rhs);
auto out_tuple = from_tensor(out);
auto lhs_ = std::get<0>(lhs_tuple).get();
auto rhs_ = std::get<0>(rhs_tuple).get();
auto out_ = std::get<0>(out_tuple).get();
lhs_->add(rhs_, out_);
}
// TODO: override
void sub(const Tensor *lhs, const Tensor *rhs, Tensor *out) override {
auto lhs_tuple = from_tensor(lhs);
auto rhs_tuple = from_tensor(rhs);
auto out_tuple = from_tensor(out);
auto lhs_ = std::get<0>(lhs_tuple).get();
auto rhs_ = std::get<0>(rhs_tuple).get();
auto out_ = std::get<0>(out_tuple).get();
lhs_->sub(rhs_, out_);
}
void neg(const Tensor *op, Tensor *out) override {
auto op_tuple = from_tensor(op);
auto out_tuple = from_tensor(out);
auto op_ = std::get<0>(op_tuple).get();
auto out_ = std::get<0>(out_tuple).get();
op_->negative(out_);
}
auto lhs_ = std::get<0>(lhs_tuple).get(); void sum(const Tensor *op, Tensor *out) override {
auto rhs_ = std::get<0>(rhs_tuple).get();
auto out_ = std::get<0>(out_tuple).get();
lhs_->add(rhs_, out_); auto op_tuple = from_tensor(op);
} auto out_tuple = from_tensor(out);
// TODO: override auto op_ = std::get<0>(op_tuple).get();
void sub(const Tensor *lhs, const Tensor *rhs, Tensor *out) override { auto out_ = std::get<0>(out_tuple).get();
auto lhs_tuple = from_tensor(lhs); op_->sum(out_);
auto rhs_tuple = from_tensor(rhs); }
auto out_tuple = from_tensor(out);
auto lhs_ = std::get<0>(lhs_tuple).get(); void mul(const Tensor *lhs, const Tensor *rhs, Tensor *out) override {
auto rhs_ = std::get<0>(rhs_tuple).get();
auto out_ = std::get<0>(out_tuple).get();
lhs_->sub(rhs_, out_); auto lhs_tuple = from_tensor(lhs);
} auto rhs_tuple = from_tensor(rhs);
auto out_tuple = from_tensor(out);
void neg(const Tensor *op, Tensor *out) override { auto lhs_ = std::get<0>(lhs_tuple).get();
auto rhs_ = std::get<0>(rhs_tuple).get();
auto out_ = std::get<0>(out_tuple).get();
auto op_tuple = from_tensor(op); lhs_->mul(rhs_, out_);
auto out_tuple = from_tensor(out); }
auto op_ = std::get<0>(op_tuple).get(); void matmul(const Tensor *lhs, const Tensor *rhs, Tensor *out) override {
auto out_ = std::get<0>(out_tuple).get();
op_->negative(out_); auto lhs_tuple = from_tensor(lhs);
} auto rhs_tuple = from_tensor(rhs);
auto out_tuple = from_tensor(out);
void sum(const Tensor *op, Tensor *out) override { auto lhs_ = std::get<0>(lhs_tuple).get();
auto rhs_ = std::get<0>(rhs_tuple).get();
auto out_ = std::get<0>(out_tuple).get();
auto op_tuple = from_tensor(op); lhs_->mat_mul(rhs_, out_);
auto out_tuple = from_tensor(out); }
auto op_ = std::get<0>(op_tuple).get(); void scale(const Tensor *lhs, const double factor, Tensor *out) override {
auto out_ = std::get<0>(out_tuple).get(); auto lhs_tuple = from_tensor(lhs);
auto out_tuple = from_tensor(out);
op_->sum(out_); auto lhs_ = std::get<0>(lhs_tuple).get();
} auto out_ = std::get<0>(out_tuple).get();
void mul(const Tensor *lhs, const Tensor *rhs, Tensor *out) override { PaddleTensor scale_tensor(ContextHolder::device_ctx());
scale_tensor.from_float_point_scalar(factor, lhs_->shape(), ABY3_SCALING_FACTOR);
auto lhs_tuple = from_tensor(lhs); lhs_->mul(&scale_tensor, out_);
auto rhs_tuple = from_tensor(rhs); }
auto out_tuple = from_tensor(out);
auto lhs_ = std::get<0>(lhs_tuple).get(); void relu(const Tensor *op, Tensor *out) override {
auto rhs_ = std::get<0>(rhs_tuple).get(); auto op_tuple = from_tensor(op);
auto out_ = std::get<0>(out_tuple).get(); auto out_tuple = from_tensor(out);
lhs_->mul(rhs_, out_); auto op_ = std::get<0>(op_tuple).get();
} auto out_ = std::get<0>(out_tuple).get();
void matmul(const Tensor *lhs, const Tensor *rhs, Tensor *out) override { op_->relu(out_);
}
auto lhs_tuple = from_tensor(lhs); void relu_with_derivative(const Tensor *op, Tensor *out, Tensor *derivative) override {
auto rhs_tuple = from_tensor(rhs); auto op_tuple = from_tensor(op);
auto out_tuple = from_tensor(out); auto out_tuple = from_tensor(out);
auto der_tuple = from_tensor<BoolTensor>(derivative);
auto lhs_ = std::get<0>(lhs_tuple).get(); auto op_ = std::get<0>(op_tuple).get();
auto rhs_ = std::get<0>(rhs_tuple).get(); auto out_ = std::get<0>(out_tuple).get();
auto out_ = std::get<0>(out_tuple).get(); auto der_ = std::get<0>(der_tuple).get();
lhs_->mat_mul(rhs_, out_); op_->relu_with_derivative(out_, der_);
} }
void scale(const Tensor *lhs, const double factor, Tensor *out) override { void sigmoid(const Tensor *op, Tensor *out) override {
auto lhs_tuple = from_tensor(lhs); auto op_tuple = from_tensor(op);
auto out_tuple = from_tensor(out); auto out_tuple = from_tensor(out);
auto lhs_ = std::get<0>(lhs_tuple).get(); auto op_ = std::get<0>(op_tuple).get();
auto out_ = std::get<0>(out_tuple).get(); auto out_ = std::get<0>(out_tuple).get();
PaddleTensor scale_tensor(ContextHolder::device_ctx()); op_->sigmoid(out_);
scale_tensor.from_float_point_scalar(factor, lhs_->shape(), }
ABY3_SCALING_FACTOR);
lhs_->mul(&scale_tensor, out_); void sigmoid_enhanced(const Tensor *op, Tensor *out) override {
} auto op_tuple = from_tensor(op);
auto out_tuple = from_tensor(out);
void relu(const Tensor *op, Tensor *out) override { auto op_ = std::get<0>(op_tuple).get();
auto op_tuple = from_tensor(op); auto out_ = std::get<0>(out_tuple).get();
auto out_tuple = from_tensor(out);
auto op_ = std::get<0>(op_tuple).get(); op_->sigmoid_enhanced(out_);
auto out_ = std::get<0>(out_tuple).get(); }
op_->relu(out_); void sigmoid_chebyshev(const Tensor *op, Tensor *out) override {
} auto op_tuple = from_tensor(op);
auto out_tuple = from_tensor(out);
void sigmoid(const Tensor *op, Tensor *out) override { auto op_ = std::get<0>(op_tuple).get();
auto op_tuple = from_tensor(op); auto out_ = std::get<0>(out_tuple).get();
auto out_tuple = from_tensor(out);
auto op_ = std::get<0>(op_tuple).get(); op_->sigmoid_chebyshev(out_);
auto out_ = std::get<0>(out_tuple).get(); }
op_->sigmoid(out_); void softmax(const Tensor *op, Tensor *out, bool use_relu, bool use_long_div) override {
} auto op_tuple = from_tensor(op);
auto out_tuple = from_tensor(out);
void softmax(const Tensor *op, Tensor *out) override { auto op_ = std::get<0>(op_tuple).get();
auto op_tuple = from_tensor(op); auto out_ = std::get<0>(out_tuple).get();
auto out_tuple = from_tensor(out);
auto op_ = std::get<0>(op_tuple).get(); op_->softmax(out_, use_relu, use_long_div);
auto out_ = std::get<0>(out_tuple).get(); }
op_->softmax(out_); void gt(const Tensor *lhs, const Tensor *rhs, Tensor *out) override {
}
void gt(const Tensor *lhs, const Tensor *rhs, Tensor *out) override { auto lhs_tuple = from_tensor(lhs);
auto lhs_tuple = from_tensor(lhs); auto lhs_ = std::get<0>(lhs_tuple).get();
auto lhs_ = std::get<0>(lhs_tuple).get(); PaddleTensor rhs_(ContextHolder::device_ctx());
rhs_.from_float_point_type<float>(*rhs, ABY3_SCALING_FACTOR);
PaddleTensor rhs_(ContextHolder::device_ctx()); PaddleTensor out_(ContextHolder::device_ctx(), *out);
rhs_.from_float_point_type<float>(*rhs, ABY3_SCALING_FACTOR);
PaddleTensor out_(ContextHolder::device_ctx(), *out); auto tmp0 = ContextHolder::tensor_factory()->create_int64_t(rhs_.shape());
auto tmp1 = ContextHolder::tensor_factory()->create_int64_t(rhs_.shape());
auto tmp0 = ContextHolder::tensor_factory()->create_int64_t(rhs_.shape()); BoolTensor bool_out(tmp0.get(), tmp1.get());
auto tmp1 = ContextHolder::tensor_factory()->create_int64_t(rhs_.shape());
BoolTensor bool_out(tmp0.get(), tmp1.get()); lhs_->gt(&rhs_, &bool_out);
lhs_->gt(&rhs_, &bool_out); bool_out.reveal(&out_);
}
bool_out.reveal(&out_); void geq(const Tensor *lhs, const Tensor *rhs, Tensor *out) override {
} lt(lhs, rhs, out);
std::transform(out->data<int64_t>(), out->data<int64_t>() + out->numel(),
out->data<int64_t>(), [](int64_t b) { return 1 - b; });
}
void geq(const Tensor *lhs, const Tensor *rhs, Tensor *out) override { void lt(const Tensor *lhs, const Tensor *rhs, Tensor *out) override {
lt(lhs, rhs, out);
std::transform(out->data<int64_t>(), out->data<int64_t>() + out->numel(),
out->data<int64_t>(), [](int64_t b) { return 1 - b; });
}
void lt(const Tensor *lhs, const Tensor *rhs, Tensor *out) override { auto lhs_tuple = from_tensor(lhs);
auto lhs_tuple = from_tensor(lhs); auto lhs_ = std::get<0>(lhs_tuple).get();
auto lhs_ = std::get<0>(lhs_tuple).get(); PaddleTensor rhs_(ContextHolder::device_ctx(), *rhs);
rhs_.from_float_point_type<float>(*rhs, ABY3_SCALING_FACTOR);
PaddleTensor rhs_(ContextHolder::device_ctx(), *rhs); PaddleTensor out_(ContextHolder::device_ctx(), *out);
rhs_.from_float_point_type<float>(*rhs, ABY3_SCALING_FACTOR);
PaddleTensor out_(ContextHolder::device_ctx(), *out); auto tmp0 = ContextHolder::tensor_factory()->create_int64_t(rhs_.shape());
auto tmp1 = ContextHolder::tensor_factory()->create_int64_t(rhs_.shape());
auto tmp0 = ContextHolder::tensor_factory()->create_int64_t(rhs_.shape()); BoolTensor bool_out(tmp0.get(), tmp1.get());
auto tmp1 = ContextHolder::tensor_factory()->create_int64_t(rhs_.shape());
BoolTensor bool_out(tmp0.get(), tmp1.get()); lhs_->lt(&rhs_, &bool_out);
lhs_->lt(&rhs_, &bool_out); bool_out.reveal(&out_);
}
bool_out.reveal(&out_); void leq(const Tensor *lhs, const Tensor *rhs, Tensor *out) override {
} gt(lhs, rhs, out);
std::transform(out->data<int64_t>(), out->data<int64_t>() + out->numel(),
out->data<int64_t>(), [](int64_t b) { return 1 - b; });
}
void leq(const Tensor *lhs, const Tensor *rhs, Tensor *out) override { void eq(const Tensor *lhs, const Tensor *rhs, Tensor *out) override {
gt(lhs, rhs, out);
std::transform(out->data<int64_t>(), out->data<int64_t>() + out->numel(),
out->data<int64_t>(), [](int64_t b) { return 1 - b; });
}
void eq(const Tensor *lhs, const Tensor *rhs, Tensor *out) override { auto lhs_tuple = from_tensor(lhs);
auto lhs_tuple = from_tensor(lhs); auto lhs_ = std::get<0>(lhs_tuple).get();
auto lhs_ = std::get<0>(lhs_tuple).get(); PaddleTensor rhs_(ContextHolder::device_ctx(), *rhs);
rhs_.from_float_point_type<float>(*rhs, ABY3_SCALING_FACTOR);
PaddleTensor rhs_(ContextHolder::device_ctx(), *rhs); PaddleTensor out_(ContextHolder::device_ctx(), *out);
rhs_.from_float_point_type<float>(*rhs, ABY3_SCALING_FACTOR);
PaddleTensor out_(ContextHolder::device_ctx(), *out); auto tmp0 = ContextHolder::tensor_factory()->create_int64_t(rhs_.shape());
auto tmp1 = ContextHolder::tensor_factory()->create_int64_t(rhs_.shape());
auto tmp0 = ContextHolder::tensor_factory()->create_int64_t(rhs_.shape()); BoolTensor bool_out(tmp0.get(), tmp1.get());
auto tmp1 = ContextHolder::tensor_factory()->create_int64_t(rhs_.shape());
BoolTensor bool_out(tmp0.get(), tmp1.get()); lhs_->eq(&rhs_, &bool_out);
lhs_->eq(&rhs_, &bool_out); bool_out.reveal(&out_);
}
bool_out.reveal(&out_); void neq(const Tensor *lhs, const Tensor *rhs, Tensor *out) override {
} eq(lhs, rhs, out);
std::transform(out->data<int64_t>(), out->data<int64_t>() + out->numel(),
out->data<int64_t>(), [](int64_t b) { return 1 - b; });
}
void neq(const Tensor *lhs, const Tensor *rhs, Tensor *out) override { void relu_grad(const Tensor *y, const Tensor *dy,
eq(lhs, rhs, out); Tensor *dx, float point = 0.0f) override {
std::transform(out->data<int64_t>(), out->data<int64_t>() + out->numel(),
out->data<int64_t>(), [](int64_t b) { return 1 - b; });
}
void relu_grad(const Tensor *y, const Tensor *dy, Tensor *dx, auto y_tuple = from_tensor(y);
float point = 0.0f) override {
auto y_tuple = from_tensor(y); auto y_ = std::get<0>(y_tuple).get();
auto y_ = std::get<0>(y_tuple).get(); PaddleTensor point_(ContextHolder::device_ctx());
PaddleTensor point_(ContextHolder::device_ctx()); point_.from_float_point_scalar<float>(point, y_->shape(), ABY3_SCALING_FACTOR);
point_.from_float_point_scalar<float>(point, y_->shape(), auto tmp0 = ContextHolder::tensor_factory()->create_int64_t(y_->shape());
ABY3_SCALING_FACTOR); auto tmp1 = ContextHolder::tensor_factory()->create_int64_t(y_->shape());
auto tmp0 = ContextHolder::tensor_factory()->create_int64_t(y_->shape()); BoolTensor bool_out(tmp0.get(), tmp1.get());
auto tmp1 = ContextHolder::tensor_factory()->create_int64_t(y_->shape());
BoolTensor bool_out(tmp0.get(), tmp1.get()); y_->gt(&point_, &bool_out);
y_->gt(&point_, &bool_out); auto out_tuple = from_tensor(dx);
auto out_ = std::get<0>(out_tuple).get();
auto out_tuple = from_tensor(dx); auto dy_tuple = from_tensor(dy);
auto out_ = std::get<0>(out_tuple).get(); auto dy_ = std::get<0>(dy_tuple).get();
auto dy_tuple = from_tensor(dy); bool_out.mul(dy_, out_);
auto dy_ = std::get<0>(dy_tuple).get(); }
bool_out.mul(dy_, out_); void arith_bool_mul(const Tensor* op_a, const Tensor* op_b, Tensor* out) override {
}
auto a_tuple = from_tensor(op_a);
auto a_ = std::get<0>(a_tuple).get();
auto b_tuple = from_tensor<BoolTensor>(op_b);
auto b_ = std::get<0>(b_tuple).get();
auto out_tuple = from_tensor(out);
auto out_ = std::get<0>(out_tuple).get();
b_->mul(a_, out_);
}
void max_pooling(const Tensor* in, Tensor* out, Tensor* pos_info) override {
auto a_tuple = from_tensor(in);
auto a_ = std::get<0>(a_tuple).get();
auto b_tuple = from_tensor<BoolTensor>(pos_info);
auto b_ = std::get<0>(b_tuple).get();
auto out_tuple = from_tensor(out);
auto out_ = std::get<0>(out_tuple).get();
a_->max_pooling(out_, b_);
}
void inverse_square_root(const Tensor* in, Tensor* out) override {
auto x_tuple = from_tensor(in);
auto x_ = std::get<0>(x_tuple).get();
auto y_tuple = from_tensor(out);
auto y_ = std::get<0>(y_tuple).get();
x_->inverse_square_root(y_);
}
private: private:
std::tuple<std::shared_ptr<FixedTensor>, std::shared_ptr<PaddleTensor>, template <typename T>
std::shared_ptr<PaddleTensor>> std::tuple<
from_tensor(const Tensor *t) { std::shared_ptr<T>,
std::shared_ptr<PaddleTensor>,
std::shared_ptr<PaddleTensor> > from_tensor(const Tensor* t) {
PADDLE_ENFORCE_EQ(t->dims()[0], 2);
auto pt0 = std::make_shared<PaddleTensor>(ContextHolder::device_ctx(), t->Slice(0, 1));
auto pt1 = std::make_shared<PaddleTensor>(ContextHolder::device_ctx(), t->Slice(1, 2));
// remove leading 1 in shape
auto shape = pt0->shape();
shape.erase(shape.begin());
pt0->reshape(shape);
pt1->reshape(shape);
aby3::TensorAdapter<int64_t>* pt_array[2] = {pt0.get(), pt1.get()};
PADDLE_ENFORCE_EQ(t->dims()[0], 2); auto ft = std::make_shared<T>(pt_array);
auto pt0 = std::make_shared<PaddleTensor>(ContextHolder::device_ctx(), return std::make_tuple(ft, pt0, pt1);
t->Slice(0, 1)); }
auto pt1 = std::make_shared<PaddleTensor>(ContextHolder::device_ctx(),
t->Slice(1, 2));
aby3::TensorAdapter<int64_t> *pt_array[2] = {pt0.get(), pt1.get()}; std::tuple<
std::shared_ptr<FixedTensor>,
std::shared_ptr<PaddleTensor>,
std::shared_ptr<PaddleTensor> > from_tensor(const Tensor* t) {
auto ft = std::make_shared<FixedTensor>(pt_array); return from_tensor<FixedTensor>(t);
}
return std::make_tuple(ft, pt0, pt1);
}
}; };
} // mpc } // mpc
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
// You may obtain a copy of the License at You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and See the License for the specific language governing permissions and
// limitations under the License. limitations under the License. */
// Description: // Description:
// abstract mpc operation interface // abstract mpc operation interface
...@@ -24,43 +24,67 @@ namespace mpc { ...@@ -24,43 +24,67 @@ namespace mpc {
using paddle::framework::Tensor; using paddle::framework::Tensor;
// TODO: decide scaling factor
const size_t FIXED_POINTER_SCALING_FACTOR = 16;
class MpcOperators { class MpcOperators {
public: public:
virtual void add(const Tensor *lhs, const Tensor *rhs, Tensor *out) = 0; virtual void add(const Tensor *lhs, const Tensor *rhs, Tensor *out) = 0;
virtual void sub(const Tensor *lhs, const Tensor *rhs, Tensor *out) = 0;
virtual void neg(const Tensor *op, Tensor *out) = 0;
virtual void sub(const Tensor *lhs, const Tensor *rhs, Tensor *out) = 0; virtual void sum(const Tensor *op, Tensor *out) = 0;
virtual void neg(const Tensor *op, Tensor *out) = 0; virtual void mul(const Tensor *lhs, const Tensor *rhs, Tensor *out) = 0;
virtual void sum(const Tensor *op, Tensor *out) = 0; virtual void matmul(const Tensor *lhs, const Tensor *rhs, Tensor *out) = 0;
virtual void mul(const Tensor *lhs, const Tensor *rhs, Tensor *out) = 0; virtual void scale(const Tensor *lhs, const double factor, Tensor *out) = 0;
virtual void matmul(const Tensor *lhs, const Tensor *rhs, Tensor *out) = 0; virtual void relu(const Tensor *op, Tensor *out) = 0;
virtual void scale(const Tensor *lhs, const double factor, Tensor *out) = 0; virtual void relu_with_derivative(const Tensor *op, Tensor *out,
Tensor *derivative) = 0;
virtual void relu(const Tensor *op, Tensor *out) = 0; virtual void sigmoid(const Tensor *op, Tensor *out) = 0;
virtual void sigmoid(const Tensor *op, Tensor *out) = 0; virtual void sigmoid_enhanced(const Tensor *op, Tensor *out) = 0;
virtual void softmax(const Tensor *op, Tensor *out) = 0; virtual void sigmoid_chebyshev(const Tensor *op, Tensor *out) = 0;
virtual void gt(const Tensor *lhs, const Tensor *rhs, Tensor *out) = 0; virtual void softmax(const Tensor *op, Tensor *out, bool use_relu, bool use_long_div) = 0;
virtual void geq(const Tensor *lhs, const Tensor *rhs, Tensor *out) = 0; virtual void gt(const Tensor *lhs, const Tensor *rhs, Tensor *out) = 0;
virtual void lt(const Tensor *lhs, const Tensor *rhs, Tensor *out) = 0; virtual void geq(const Tensor *lhs, const Tensor *rhs, Tensor *out) = 0;
virtual void leq(const Tensor *lhs, const Tensor *rhs, Tensor *out) = 0; virtual void lt(const Tensor *lhs, const Tensor *rhs, Tensor *out) = 0;
virtual void eq(const Tensor *lhs, const Tensor *rhs, Tensor *out) = 0; virtual void leq(const Tensor *lhs, const Tensor *rhs, Tensor *out) = 0;
virtual void neq(const Tensor *lhs, const Tensor *rhs, Tensor *out) = 0; virtual void eq(const Tensor *lhs, const Tensor *rhs, Tensor *out) = 0;
virtual void relu_grad(const Tensor *y, const Tensor *dy, Tensor *dx, virtual void neq(const Tensor *lhs, const Tensor *rhs, Tensor *out) = 0;
const float point) = 0;
virtual void relu_grad(const Tensor *y, const Tensor *dy, Tensor *dx, const float point) = 0;
// arithmetic tensor mult boolean tensor, element-wisely
// see [ABY3, sec 5.4.1]
// for aby3 only
// example (in plaintext):
// [1, 2, 3, 4] * [0, 0, 1, 0] = [0, 0, 3, 0]
virtual void arith_bool_mul(const Tensor* op_a, const Tensor* op_b, Tensor* out) {}
// max pooling in which shape of filter is nx1
// pos_info keeps which element is max in a col, for backward grad
// for filter in other shape, reshape input first
virtual void max_pooling(const Tensor* in, Tensor* out, Tensor* pos_info) {}
virtual void inverse_square_root(const Tensor* in, Tensor* out) = 0;
}; };
} // mpc } // mpc
} // paddle } // paddle
add_compile_options(-msse4.2 -maes)
aux_source_directory(. DIR_SRCS) aux_source_directory(. DIR_SRCS)
add_library(mpc_ops_o OBJECT ${DIR_SRCS}) aux_source_directory(./math MATH_SRCS)
add_library(mpc_ops_o OBJECT ${DIR_SRCS} ${MATH_SRCS})
add_dependencies(mpc_ops_o fluid_framework gloo) add_dependencies(mpc_ops_o fluid_framework gloo)
add_library(mpc_ops STATIC $<TARGET_OBJECTS:mpc_ops_o>) add_library(mpc_ops STATIC $<TARGET_OBJECTS:mpc_ops_o>)
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "./conv_op.h"
#include <memory>
#include <string>
#include <vector>
namespace paddle {
namespace operators {
std::vector<int64_t> ConvOp::ComputeOutputShape(
framework::InferShapeContext* ctx) const {
OP_INOUT_CHECK(ctx->HasInput("Input"), "Input", "Input", "Conv");
OP_INOUT_CHECK(ctx->HasInput("Filter"), "Input", "Filter", "Conv");
auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter");
std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
std::string padding_algorithm =
ctx->Attrs().Get<std::string>("padding_algorithm");
int groups = ctx->Attrs().Get<int>("groups");
std::vector<int> dilations = ctx->Attrs().Get<std::vector<int>>("dilations");
const std::string data_format = ctx->Attrs().Get<std::string>("data_format");
// MKL-DNN Kernels are using NCHW order of dims description
// so we ignore data_format consideration for MKL-DNN kernel
const bool channel_last = (this->IsMKLDNNType() == false) &&
(data_format == "NHWC" || data_format == "NDHWC");
PADDLE_ENFORCE_EQ(
// 1 for share dim
in_dims.size() == 4 + 1 || in_dims.size() == 5 + 1, true,
platform::errors::InvalidArgument(
"The input of Op(Conv) should be a 4-D or 5-D Tensor. But "
"received: input's dimension is %u, input's shape is [%s].",
in_dims.size(), in_dims));
PADDLE_ENFORCE_EQ(
in_dims.size(), filter_dims.size(),
platform::errors::InvalidArgument(
"The input's dimension and filter's dimension of "
"Op(Conv) should be equal. But received: the input's shape is [%s], "
"the input's dimension is %d; the filter's shape is [%s], "
"the filter's dimension is %d.",
in_dims, in_dims.size(), filter_dims, filter_dims.size()));
int in_sub_stride_size = in_dims.size() - strides.size();
PADDLE_ENFORCE_EQ(
in_dims.size(), strides.size() + 2U + 1,
platform::errors::InvalidArgument(
"The difference of input's dimension and Attr(strides)'s "
"length must be euqal to 2 for Op(Conv). "
"But received: input's dimension is %d, input's shape is [%s]; "
"Attr(stride)'s length is %d, Attr(stride) is [%s]; "
"difference of input's dimention and Attr(strides)'s length = %u.",
in_dims.size(), in_dims, strides.size(),
framework::make_ddim(strides), in_sub_stride_size));
const auto input_channels =
channel_last ? in_dims[in_dims.size() - 1] : in_dims[1 + 1];
PADDLE_ENFORCE_EQ(
input_channels, filter_dims[1 + 1] * groups,
platform::errors::InvalidArgument(
"The number of input's channels should be equal to filter's channels "
"* groups for Op(Conv). But received: the input's channels is %d, "
"the input's shape is [%s]; the filter's channels is %d, the "
"filter's shape is [%s]; the groups is %d, the data_format is %s. "
"The error may come from wrong data_format setting.",
input_channels, in_dims, filter_dims[1 + 1], filter_dims, groups,
data_format));
PADDLE_ENFORCE_EQ(
filter_dims[0 + 1] % groups, 0,
platform::errors::InvalidArgument(
"The number of output's channels (filter's first dimension) of "
"Op(Conv) should be divided by groups. But received: "
"the output channels is %d, the filter's shape is [%s], "
"the groups is %d.",
filter_dims[0 + 1], filter_dims, groups));
framework::DDim in_data_dims;
if (channel_last) {
in_data_dims = framework::slice_ddim(in_dims, 1 + 1, in_dims.size() - 1);
} else {
in_data_dims = framework::slice_ddim(in_dims, 2 + 1, in_dims.size());
}
framework::DDim filter_data_dims =
framework::slice_ddim(filter_dims, 2 + 1, filter_dims.size());
std::vector<int> ksize = framework::vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
in_data_dims, strides, ksize);
std::vector<int64_t> output_shape({in_dims[0], in_dims[1]});
if (!channel_last) {
output_shape.push_back(filter_dims[0 + 1]);
}
for (int i = 0; i < in_data_dims.size(); ++i) {
if ((!ctx->IsRuntime()) &&
(in_data_dims[i] <= 0 || filter_dims[i + 2] <= 0)) {
output_shape.push_back(-1);
} else {
output_shape.push_back(
ConvOutputSize(in_data_dims[i], filter_data_dims[i], dilations[i],
paddings[2 * i], paddings[2 * i + 1], strides[i]));
}
}
if (channel_last) {
output_shape.push_back(filter_dims[1]);
}
return output_shape;
}
framework::OpKernelType ConvOp::GetExpectedKernelType(
const framework::ExecutionContext& ctx) const {
int customized_type_value =
framework::OpKernelType::kDefaultCustomizedTypeValue;
framework::LibraryType library{framework::LibraryType::kPlain};
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input");
std::string data_format =
"AnyLayout"; // todo enable data layout when it's ready
framework::DataLayout layout = framework::StringToDataLayout(data_format);
if (input_data_type != framework::proto::VarType::INT8 &&
input_data_type != framework::proto::VarType::UINT8) {
auto filter_data_type = ctx.Input<Tensor>("Filter")->type();
PADDLE_ENFORCE_EQ(input_data_type, filter_data_type,
platform::errors::InvalidArgument(
"input and filter data type should be consistent"));
}
if (input_data_type == framework::proto::VarType::FP16) {
PADDLE_ENFORCE_EQ(library, framework::LibraryType::kCUDNN,
platform::errors::InvalidArgument(
"float16 can only be used when CUDNN is used"));
}
auto type = framework::OpKernelType(input_data_type, ctx.GetPlace(), layout,
library, customized_type_value);
return type;
}
framework::OpKernelType ConvOp::GetKernelTypeForVar(
const std::string& var_name, const Tensor& tensor,
const framework::OpKernelType& expected_kernel_type) const {
return framework::OpKernelType(expected_kernel_type.data_type_,
tensor.place(), tensor.layout());
}
void Conv2DOpMaker::Make() {
AddAttr<bool>("is_test",
"(bool, default false) Set to true for inference only, false "
"for training. Some layers may run faster when this is true.")
.SetDefault(false);
AddInput("Input",
"(Tensor) The input tensor of convolution operator. "
"The format of input tensor is NCHW or NHWC, where N is batch size, "
"C is the "
"number of channels, H is the height of the feature, "
"and W is the width of the feature.");
AddInput("Filter",
"(Tensor) The filter tensor of convolution operator. "
"The format of the filter tensor is MCHW, where M is the number of "
"output image channels, C is the number of input image channels, "
"H is the height of the filter, and W is the width of the filter. "
"If the groups attribute is greater than 1, C equals the number of "
"input image channels divided by the groups.");
AddInput("Bias",
"(Tensor) Bias to be added to each output of filter application."
"The format of output tensor is X (one-dimensional) of size equal"
"to the number of output channels. Only used with MKL-DNN.")
.AsDispensable();
AddOutput("Output",
"(Tensor) The output tensor of convolution operator. "
"It has same data fromat and data type as the Input.");
AddAttr<std::vector<int>>("strides",
"(vector<int> default:{1, 1}), the "
"strides(h_stride, w_stride) of "
"convolution operator.")
.SetDefault({1, 1});
AddAttr<std::vector<int>>("paddings",
"(vector<int> default:{0, 0}), the "
"paddings(pad_height_top, pad_height_bottom, "
"pad_width_left, pad_wifth_right) of "
"convolution operator.")
.SetDefault({0, 0});
AddAttr<std::string>(
"padding_algorithm",
"(string, default \"EXPLICIT\") An optional string from: \"EXPLICIT\","
"\"SAME\",\"VALID\". Set to \"EXPLICIT\" for explicit padding. "
"Set to \"SAME\" or \"VALID\" for algorithm of padding. ")
.SetDefault("EXPLICIT");
AddAttr<int>(
"groups",
"(int default:1), the groups number of the convolution operator. "
"According to grouped convolution in Alex Krizhevsky's Deep CNN paper: "
"when group=2, the first half of the filters is only connected to the "
"first half of the input channels, while the second half of the filters "
"is only connected to the second half of the input channels.")
.SetDefault(1);
AddAttr<std::vector<int>>("dilations",
"(vector<int> default:{1, 1}), the "
"dilations(h_dilation, w_dilation) of "
"convolution operator.")
.SetDefault({1, 1});
AddAttr<bool>("use_quantizer",
"(bool, default false) "
"Set to true for operators that should be quantized and use "
"int8 kernel. "
"Only used on CPU.")
.SetDefault(false);
AddAttr<float>("Scale_in",
"Scale_in to be used for int8 input data."
"Only used with MKL-DNN INT8.")
.SetDefault(1.0f);
AddAttr<float>("Scale_out",
"Scale_out to be used for int8 output data."
"Only used with MKL-DNN INT8.")
.SetDefault(1.0f);
AddAttr<float>("Scale_in_eltwise",
"Scale_in_eltwise to be used for int8 eltwise input data."
"Only used with MKL-DNN INT8.")
.SetDefault(1.0f);
AddAttr<std::vector<float>>("Scale_weights",
"Scale_weights to be used for int8 weights data."
"Only used with MKL-DNN INT8.")
.SetDefault({1.0f});
AddAttr<bool>("force_fp32_output",
"(bool, default false) Force INT8 kernel output FP32, only "
"used in MKL-DNN INT8")
.SetDefault(false);
AddAttr<std::string>(
"data_format",
"(string, default NCHW) Only used in "
"An optional string from: \"NHWC\", \"NCHW\". "
"Defaults to \"NHWC\". Specify the data format of the output data, "
"the input will be transformed automatically. ")
.SetDefault("NCHW");
// TODO(dzhwinter): need to registered layout transform function
AddAttr<bool>("exhaustive_search",
"(bool, default false) cuDNN has many algorithm to calculation "
"convolution, whether enable exhaustive search "
"for cuDNN convolution or not, default is False.")
.SetDefault(false);
AddComment(R"DOC(
Convolution Operator.
The convolution operation calculates the output based on the input, filter
and strides, paddings, dilations, groups parameters. The size of each dimension of the
parameters is checked in the infer-shape.
Input(Input) and Output(Output) are in NCHW or NHWC format. Where N is batch
size, C is the number of channels, H is the height of the feature, and W is
the width of the feature.
Filters(Input) is MCHW format format. Where M is the number of output image channels, C is
the number of input image channels, H is the height of the filter, and W
is the width of the filter.
Parameters(strides, paddings, dilations) are two elements. These two elements represent
height and width, respectively.
The input(X) size and output(Out) size may be different.
Example:
Input:
Input shape: $(N, C_{in}, H_{in}, W_{in})$
Filter shape: $(C_{out}, C_{in}, H_f, W_f)$
Output:
Output shape: $(N, C_{out}, H_{out}, W_{out})$
Where
$$
H_{out}= \frac{(H_{in} + pad_height_top + pad_height_bottom - (dilations[0] * (H_f - 1) + 1))}{strides[0]}+ 1 \\
W_{out}= \frac{(W_{in} + pad_width_left + pad_width_right - (dilations[1] * (W_f - 1) + 1))}{strides[1]}+ 1
$$
)DOC");
Apply();
}
void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const {
auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter");
if (ctx->HasOutput(framework::GradVarName("Input"))) {
ctx->SetOutputDim(framework::GradVarName("Input"), in_dims);
}
if (ctx->HasOutput(framework::GradVarName("Filter"))) {
ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims);
}
}
framework::OpKernelType ConvOpGrad::GetExpectedKernelType(
const framework::ExecutionContext& ctx) const {
int customized_type_value =
framework::OpKernelType::kDefaultCustomizedTypeValue;
framework::LibraryType library_{framework::LibraryType::kPlain};
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
std::string data_format = "AnyLayout";
framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
auto type = framework::OpKernelType(
OperatorWithKernel::IndicateVarDataType(ctx, "Input"), ctx.GetPlace(),
layout_, library_, customized_type_value);
return type;
}
framework::OpKernelType ConvOpGrad::GetKernelTypeForVar(
const std::string& var_name, const Tensor& tensor,
const framework::OpKernelType& expected_kernel_type) const {
return framework::OpKernelType(expected_kernel_type.data_type_,
tensor.place(), tensor.layout());
}
template <typename T>
class Conv2DGradMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
void Apply(GradOpPtr<T> op) const override {
op->SetType(this->ForwardOpType() + "_grad");
op->SetInput("Input", this->Input("Input"));
op->SetInput("Filter", this->Input("Filter"));
op->SetInput("Bias", this->Input("Bias"));
op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output"));
op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input"));
op->SetOutput(framework::GradVarName("Filter"), this->InputGrad("Filter"));
op->SetOutput(framework::GradVarName("Bias"), this->InputGrad("Bias"));
op->SetAttrMap(this->Attrs());
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(mpc_conv2d, ops::ConvOp, ops::Conv2DOpMaker,
ops::ConvOpInferVarType,
ops::Conv2DGradMaker<paddle::framework::OpDesc>,
ops::Conv2DGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(mpc_conv2d_grad, ops::ConvOpGrad);
REGISTER_OP_CPU_KERNEL(
mpc_conv2d, ops::GemmConvKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL(
mpc_conv2d_grad,
ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
此差异已折叠。
/* Copyright (c) 2020 paddlepaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "core/paddlefl_mpc/operators/math/concat_and_split.h"
#include <vector>
namespace paddle {
namespace operators {
namespace math {
/*
* All tensors' dimension should be the same and the values of
* each dimension must be the same, except the axis dimension.
*/
template <typename T>
class ConcatFunctor<platform::CPUDeviceContext, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const std::vector<framework::Tensor>& input, int axis,
framework::Tensor* output) {
// TODO(zcd): Add input data validity checking
int num = input.size();
int rows = 1;
auto dim_0 = input[0].dims();
for (int i = 0; i < axis; ++i) {
rows *= dim_0[i];
}
int out_rows = rows, out_cols = 0;
std::vector<int64_t> input_cols(input.size());
for (int i = 0; i < num; ++i) {
int t_cols = input[i].numel() / rows;
out_cols += t_cols;
input_cols[i] = t_cols;
}
auto cpu_place = boost::get<platform::CPUPlace>(context.GetPlace());
// computation
auto output_data = output->data<T>();
int col_idx = 0;
for (int j = 0; j < num; ++j) {
int col_len = input_cols[j];
auto input_data = input[j].data<T>();
for (int k = 0; k < out_rows; ++k) {
memory::Copy(cpu_place, output_data + k * out_cols + col_idx, cpu_place,
input_data + k * col_len, sizeof(T) * col_len);
}
col_idx += col_len;
}
}
};
/*
* All tensors' dimension should be the same and the values of
* each dimension must be the same, except the axis dimension.
*/
template <typename T>
class SplitFunctor<platform::CPUDeviceContext, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& input,
const std::vector<const framework::Tensor*>& ref_inputs,
const int axis, std::vector<framework::Tensor*>* outputs) {
// TODO(zcd): Add input data validity checking
size_t num = outputs->size();
int input_rows = 1;
auto dim_0 = ref_inputs[0]->dims();
for (int i = 0; i < axis; ++i) {
input_rows *= dim_0[i];
}
int input_cols = 0;
std::vector<int64_t> output_cols(outputs->size());
for (size_t i = 0; i < num; ++i) {
int t_cols = ref_inputs[i]->numel() / input_rows;
input_cols += t_cols;
output_cols[i] = t_cols;
}
auto cpu_place = boost::get<platform::CPUPlace>(context.GetPlace());
// computation
for (int k = 0; k < input_rows; ++k) {
const T* src_ptr = input.data<T>() + k * input_cols;
int col_idx = 0;
for (size_t j = 0; j < num; ++j) {
int col_len = output_cols[j];
auto* out_tensor = outputs->at(j);
if (out_tensor != nullptr) {
T* dst_ptr = out_tensor->data<T>() + k * col_len;
memory::Copy(cpu_place, dst_ptr, cpu_place, src_ptr + col_idx,
sizeof(T) * col_len);
}
col_idx += col_len;
}
}
}
};
#define DEFINE_FUNCTOR(type) \
template class ConcatFunctor<platform::CPUDeviceContext, type>; \
template class SplitFunctor<platform::CPUDeviceContext, type>;
FOR_ALL_TYPES(DEFINE_FUNCTOR);
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/lod_tensor.h"
namespace paddle {
namespace operators {
namespace math {
/*
* \brief Concatenate the input tensors along the dimension axis.
* TODO(zcd): maybe it needs to be more detailed.
* Examples:
* Input[0] = [[1,2],[3,4]]
* Input[1] = [[5,6]]
* axis = 0
*
* Output = [[1,2],
* [3,4],
* [5,6]]
*/
template <typename DeviceContext, typename T>
class ConcatFunctor {
public:
void operator()(const DeviceContext& context,
const std::vector<framework::Tensor>& input, int axis,
framework::Tensor* output);
};
/*
* \brief Split the input tensors along the dimension axis into outputs.
* TODO(zcd): maybe it needs to be more detailed.
* Examples:
* Input = [[1,2],
* [3,4],
* [5,6]]
* axis = 0
*
* Output[0] = [[1,2],[3,4]]
* Output[1] = [[5,6]]
*/
template <typename DeviceContext, typename T>
class SplitFunctor {
public:
void operator()(const DeviceContext& context, const framework::Tensor& input,
const std::vector<const framework::Tensor*>& ref_inputs,
int axis, std::vector<framework::Tensor*>* outputs);
};
} // namespace math
} // namespace operators
} // namespace paddle
#define FOR_ALL_TYPES(macro) \
macro(int64_t); \
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "./im2col.h"
#include <vector>
#include "./im2col_cfo_cpu.h"
namespace paddle {
namespace operators {
namespace math {
/*
* im = [input_channels, input_height, input_width]
* col =
* [input_channels, filter_height, filter_width, output_height, output_width]
*/
template <class T>
class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
platform::CPUDeviceContext, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& im, const std::vector<int>& dilation,
const std::vector<int>& stride,
const std::vector<int>& padding, framework::Tensor* col,
const DataLayout data_layout) {
PADDLE_ENFORCE_EQ(im.dims().size(), 3, "The dimension of im should be 3.");
PADDLE_ENFORCE_EQ(col->dims().size(), 5,
"The dimension of col should be 5.");
if (stride[0] == 1 && stride[1] == 1 && dilation[0] == 1 &&
dilation[1] == 1) {
if (padding[0] == 0 && padding[1] == 0 && padding[2] == 0 &&
padding[3] == 0) {
im2col_sh1sw1dh1dw1ph0pw0<T>(im, col, data_layout);
return;
} else if (padding[0] == 1 && padding[1] == 1 && padding[2] == 1 &&
padding[3] == 1) {
im2col_sh1sw1dh1dw1ph1pw1<T>(im, col, data_layout);
return;
}
// TODO(TJ): complete padding >=2
}
im2col_common<T>(im, dilation, stride, padding, col, data_layout);
}
};
/*
* im = [input_channels, input_height, input_width]
* col =
* [input_channels, filter_height, filter_width, output_height, output_width]
*/
template <class T>
class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
platform::CPUDeviceContext, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& col,
const std::vector<int>& dilation,
const std::vector<int>& stride,
const std::vector<int>& padding, framework::Tensor* im,
const DataLayout data_layout) {
PADDLE_ENFORCE_EQ(im->dims().size(), 3, "The dimension of im should be 3.");
PADDLE_ENFORCE_EQ(col.dims().size(), 5,
"The dimension of col should be 5.");
int im_channels =
(data_layout != DataLayout::kNHWC ? im->dims()[0] : im->dims()[2]);
int im_height =
(data_layout != DataLayout::kNHWC ? im->dims()[1] : im->dims()[0]);
int im_width =
(data_layout != DataLayout::kNHWC ? im->dims()[2] : im->dims()[1]);
int filter_height = col.dims()[1];
int filter_width = col.dims()[2];
int col_height = col.dims()[3];
int col_width = col.dims()[4];
PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] -
((dilation[0] * (filter_height - 1) + 1))) /
stride[0] +
1,
col_height,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent.");
PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] -
((dilation[1] * (filter_width - 1) + 1))) /
stride[1] +
1,
col_width,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent.");
int channels_col = im_channels * filter_height * filter_width;
T* im_data = im->data<T>();
const T* col_data = col.data<T>();
for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width;
int h_offset = (c / filter_width) % filter_height;
int c_im = c / (filter_width * filter_height);
for (int h = 0; h < col_height; ++h) {
int im_row_idx = h * stride[0] - padding[0] + h_offset * dilation[0];
for (int w = 0; w < col_width; ++w) {
int im_col_idx = w * stride[1] - padding[1] + w_offset * dilation[1];
if ((im_row_idx) >= 0 && (im_row_idx) < im_height &&
(im_col_idx) >= 0 && (im_col_idx) < im_width) {
int im_offset;
if (data_layout != DataLayout::kNHWC) {
im_offset =
(c_im * im_height + im_row_idx) * im_width + im_col_idx;
} else {
im_offset =
(im_row_idx * im_width + im_col_idx) * im_channels + c_im;
}
im_data[im_offset] +=
col_data[(c * col_height + h) * col_width + w];
}
}
}
}
}
};
template class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
platform::CPUDeviceContext, int64_t>;
template class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
platform::CPUDeviceContext, int64_t>;
/*
* im = [input_channels, input_height, input_width]
* col =
* [output_height, output_width, input_channels, filter_height, filter_width]
*/
template <class T>
class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
platform::CPUDeviceContext, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& im, const std::vector<int>& dilation,
const std::vector<int>& stride,
const std::vector<int>& padding, framework::Tensor* col,
const DataLayout data_layout) {
PADDLE_ENFORCE_EQ(im.dims().size(), 3, "The dimension of im should be 3.");
PADDLE_ENFORCE_EQ(col->dims().size(), 5,
"The dimension of col should be 5.");
int im_channels = im.dims()[0];
int im_height = im.dims()[1];
int im_width = im.dims()[2];
int filter_height = col->dims()[3];
int filter_width = col->dims()[4];
int col_height = col->dims()[0];
int col_width = col->dims()[1];
const T* im_data = im.data<T>();
T* col_data = col->data<T>();
for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) {
for (int col_col_idx = 0; col_col_idx < col_width; ++col_col_idx) {
for (int channel = 0; channel < im_channels; ++channel) {
for (int filter_row_idx = 0; filter_row_idx < filter_height;
++filter_row_idx) {
int im_row_offset =
col_row_idx * stride[0] + filter_row_idx - padding[0];
for (int filter_col_idx = 0; filter_col_idx < filter_width;
++filter_col_idx) {
int im_col_offset =
col_col_idx * stride[1] + filter_col_idx - padding[1];
int col_offset =
((((col_row_idx)*col_width + col_col_idx) * im_channels +
channel) *
filter_height +
filter_row_idx) *
filter_width +
filter_col_idx;
int im_offset = (channel * im_height + im_row_offset) * im_width +
im_col_offset;
col_data[col_offset] =
(im_row_offset < 0 || im_row_offset >= im_height ||
im_col_offset < 0 || im_col_offset >= im_width)
? static_cast<T>(0)
: im_data[im_offset];
}
}
}
}
}
}
};
/*
* im = [input_channels, input_height, input_width]
* col =
* [output_height, output_width, input_channels, filter_height, filter_width]
*/
template <class T>
class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
platform::CPUDeviceContext, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& col,
const std::vector<int>& dilation,
const std::vector<int>& stride,
const std::vector<int>& padding, framework::Tensor* im,
const DataLayout data_layout) {
PADDLE_ENFORCE_EQ(im->dims().size(), 3, "The dimension of im should be 3.");
PADDLE_ENFORCE_EQ(col.dims().size(), 5,
"The dimension of col should be 5.");
int im_channels = im->dims()[0];
int im_height = im->dims()[1];
int im_width = im->dims()[2];
int filter_height = col.dims()[3];
int filter_width = col.dims()[4];
int col_height = col.dims()[0];
int col_width = col.dims()[1];
PADDLE_ENFORCE_EQ(
(im_height + padding[0] + padding[2] - filter_height) / stride[0] + 1,
col_height,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent.");
PADDLE_ENFORCE_EQ(
(im_width + padding[1] + padding[3] - filter_width) / stride[1] + 1,
col_width,
"col_width and padding(padding_left, padding_right) are "
"inconsistent.");
T* im_data = im->data<T>();
const T* col_data = col.data<T>();
for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) {
for (int col_col_idx = 0; col_col_idx < col_width; ++col_col_idx) {
for (int channel = 0; channel < im_channels; ++channel) {
for (int filter_row_idx = 0; filter_row_idx < filter_height;
++filter_row_idx) {
int im_row_offset =
col_row_idx * stride[0] + filter_row_idx - padding[0];
for (int filter_col_idx = 0; filter_col_idx < filter_width;
++filter_col_idx) {
int im_col_offset =
col_col_idx * stride[1] + filter_col_idx - padding[1];
int col_offset =
(((col_row_idx * col_width + col_col_idx) * im_channels +
channel) *
filter_height +
filter_row_idx) *
filter_width +
filter_col_idx;
if (im_row_offset >= 0 && im_row_offset < im_height &&
im_col_offset >= 0 && im_col_offset < im_width) {
int im_offset =
(channel * im_height + im_row_offset) * im_width +
im_col_offset;
im_data[im_offset] += col_data[col_offset];
}
}
}
}
}
}
}
};
template class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
platform::CPUDeviceContext, int64_t>;
template class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
platform::CPUDeviceContext, int64_t>;
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/device_context.h"
namespace paddle {
namespace operators {
namespace math {
using DataLayout = framework::DataLayout;
/* The storage format of the coldata in the Im2ColFunctor and Col2ImFunctor. */
enum class ColFormat { kCFO = 0, kOCF = 1 };
/*
* \brief Converts the image data of three dimensions(CHW) into a colData of
* five dimensions in the Im2ColFunctor calculation,
* And in the Col2ImFunctor calculation, it is reversed.
*
* \param imData Image data.
* \param imShape The shape of imData,
* [input_channels, input_height, input_width].
* \param colData Column data.
* \param colShape The shape of colData.
*
* \param dilations dilation data.
* \param 2-dimension [dilation_height, dilation_width].
*
* \param strides stride data.
* \param 2-dimension [stride_height, stride_width].
*
* \param paddings padding data.
* \param 4-dimension [up_pad, left_pad, down_pad, right_pad].
*
* If the template argument Format is kCFO, the shape of colData is:
* [input_channels, filter_height, filter_width, output_height, output_width]
* So, it is easy to reshape into a convolution matrix for convolution
* calculation based on matrix multiplication.
* The shape of convolution matrix is [height, width], where the height is equal
* input_channels * filter_height * filter_width, and the width is equal
* output_height * output_width.
*
* Reshape:
* shape of colData shape of convolution matrix
* [input_channels,
* filter_height,
* filter_width, ======> [height, width]
* output_height,
* output_width]
*
* If the template argument Format is kOCF, the shape of colData is:
* [output_height, output_width, input_channels, filter_height, filter_width]
* So, it is easy to reshape into a sequence matrix for rnn calculation.
* The shape of sequence matrix is [seq_length, step_size], where the seq_length
* is equal output_height * output_width, and the step_size is equal
* input_channels * filter_height * filter_width.
*
* Reshape:
* shape of colData shape of sequence matrix
* [output_height,
* output_width,
* input_channels, ======> [seqLength, stepSize]
* filter_height,
* filter_width]
*
* \note The caller needs to ensure that imShape.inputChannels is equal to
* colShape.inputChannels.
*/
template <ColFormat Format, typename DeviceContext, typename T>
class Im2ColFunctor {
public:
void operator()(const DeviceContext& context, const framework::Tensor& im,
const std::vector<int>& dilation,
const std::vector<int>& stride,
const std::vector<int>& padding, framework::Tensor* col,
const DataLayout data_layout = DataLayout::kNCHW);
};
template <ColFormat Format, typename DeviceContext, typename T>
class Col2ImFunctor {
public:
void operator()(const DeviceContext& context, const framework::Tensor& col,
const std::vector<int>& dilation,
const std::vector<int>& stride,
const std::vector<int>& padding, framework::Tensor* im,
const DataLayout data_layout = DataLayout::kNCHW);
};
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/tensor.h"
namespace paddle {
namespace operators {
namespace math {
/**
* The most common im2col algorithm.
* Support dilation, stride and padding.
*/
template <typename T>
inline void im2col_common(const framework::Tensor& im,
const std::vector<int>& dilation,
const std::vector<int>& stride,
const std::vector<int>& padding,
framework::Tensor* col,
const DataLayout data_layout = DataLayout::kNCHW) {
int im_channels =
(data_layout != DataLayout::kNHWC ? im.dims()[0] : im.dims()[2]);
int im_height =
(data_layout != DataLayout::kNHWC ? im.dims()[1] : im.dims()[0]);
int im_width =
(data_layout != DataLayout::kNHWC ? im.dims()[2] : im.dims()[1]);
int filter_height = col->dims()[1];
int filter_width = col->dims()[2];
int output_height = col->dims()[3];
int output_width = col->dims()[4];
int channels_col = im_channels * filter_height * filter_width;
const T* im_data = im.data<T>();
T* col_data = col->data<T>();
for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width;
int h_offset = (c / filter_width) % filter_height;
int c_im = c / (filter_width * filter_height);
for (int h = 0; h < output_height; ++h) {
int im_row_idx = h * stride[0] - padding[0] + h_offset * dilation[0];
for (int w = 0; w < output_width; ++w) {
int im_col_idx = w * stride[1] - padding[1] + w_offset * dilation[1];
int im_idx;
if (data_layout != DataLayout::kNHWC) {
im_idx = (im_row_idx + c_im * im_height) * im_width + im_col_idx;
} else {
im_idx = (im_row_idx * im_width + im_col_idx) * im_channels + c_im;
}
int col_idx = (c * output_height + h) * output_width + w;
col_data[col_idx] = (im_row_idx < 0 || im_row_idx >= im_height ||
im_col_idx < 0 || im_col_idx >= im_width)
? static_cast<T>(0)
: im_data[im_idx];
}
}
}
}
/**
* im2col algorithm with strides == 1, dilations == 1, paddings == 0
*/
template <typename T>
inline void im2col_sh1sw1dh1dw1ph0pw0(
const framework::Tensor& im, framework::Tensor* col,
const DataLayout data_layout = DataLayout::kNCHW) {
int im_channels =
(data_layout != DataLayout::kNHWC ? im.dims()[0] : im.dims()[2]);
int im_height =
(data_layout != DataLayout::kNHWC ? im.dims()[1] : im.dims()[0]);
int im_width =
(data_layout != DataLayout::kNHWC ? im.dims()[2] : im.dims()[1]);
int filter_height = col->dims()[1];
int filter_width = col->dims()[2];
int output_height = col->dims()[3];
int output_width = col->dims()[4];
const T* im_data = im.data<T>();
T* col_data = col->data<T>();
int col_matrix_width = output_width * output_height;
int im_size = im_height * im_width;
size_t copy_size = sizeof(T) * output_width;
const T* im_data_oh = im_data;
T* dst_data_oh = col_data;
for (int oh = 0; oh < output_height; ++oh) {
const T* src_data_ic = im_data_oh;
T* dst_data = dst_data_oh;
for (int ic = 0; ic < im_channels; ++ic) {
const T* src_data = src_data_ic;
for (int kh = 0; kh < filter_height; ++kh) {
for (int kw = 0; kw < filter_width; ++kw) {
if (data_layout != DataLayout::kNHWC) {
std::memcpy(dst_data, src_data + kw, copy_size);
} else {
for (int kow = 0; kow < output_width; ++kow) {
dst_data[kow] =
im_data[((oh + kh) * im_width + kw + kow) * im_channels + ic];
}
}
dst_data = dst_data + col_matrix_width;
}
src_data = src_data + im_width;
}
src_data_ic = src_data_ic + im_size;
}
im_data_oh = im_data_oh + im_width;
dst_data_oh = dst_data_oh + output_width;
}
}
/**
* im2col algorithm with strides == 1, dilations == 1, paddings == 1
* and filter_width == 1 have a special implementation
*/
template <typename T>
inline void im2col_sh1sw1dh1dw1ph1pw1(const framework::Tensor& im,
framework::Tensor* col,
const DataLayout data_layout) {
int im_channels =
(data_layout != DataLayout::kNHWC ? im.dims()[0] : im.dims()[2]);
int im_height =
(data_layout != DataLayout::kNHWC ? im.dims()[1] : im.dims()[0]);
int im_width =
(data_layout != DataLayout::kNHWC ? im.dims()[2] : im.dims()[1]);
int filter_height = col->dims()[1];
int filter_width = col->dims()[2];
int output_height = col->dims()[3];
int output_width = col->dims()[4];
constexpr int plh = 1;
constexpr int prh = 1;
constexpr int plw = 1;
constexpr int prw = 1;
const T* im_data = im.data<T>();
T* col_data = col->data<T>();
int im_size = im_height * im_width;
int col_matrix_width = output_width * output_height;
int col_block_fh = filter_width * col_matrix_width; // fw*oh*ow
int col_block_ic = filter_height * col_block_fh; // fh*fw*oh*ow
// fill height padding
{
size_t copy_size = sizeof(T) * output_width;
T* col_start_l = col_data;
T* col_start_r = col_data + (filter_height - 1) * col_block_fh +
col_matrix_width - output_width;
for (int ic = 0; ic < im_channels; ++ic) {
T* dst_data_l = col_start_l;
T* dst_data_r = col_start_r;
for (int kw = 0; kw < filter_width; ++kw) {
std::memset(dst_data_l, 0, copy_size);
std::memset(dst_data_r, 0, copy_size);
dst_data_l = dst_data_l + col_matrix_width;
dst_data_r = dst_data_r + col_matrix_width;
}
col_start_l = col_start_l + col_block_ic;
col_start_r = col_start_r + col_block_ic;
}
}
auto pad = static_cast<T>(0);
if (filter_width == 1) {
// fill width padding
T* dst_data_ic = col_data;
for (int ic = 0; ic < im_channels; ++ic) {
T* dst_data_kh = dst_data_ic;
for (int kh = 0; kh < filter_height; ++kh) {
T* dst_data = dst_data_kh;
for (int oh = 0; oh < output_height; ++oh) {
*dst_data = pad;
dst_data = dst_data + output_width - 1;
*dst_data = pad;
++dst_data;
}
dst_data_kh = dst_data_kh + col_block_fh;
}
dst_data_ic = dst_data_ic + col_block_ic;
}
// fill core
size_t copy_size = sizeof(T) * (output_width - plw - prw);
for (int oh = 0; oh < output_height; ++oh) {
const T* im_data_start =
im_data + (oh - plh > 0 ? oh - plh : 0) * im_width;
T* dst_data = col_data + oh * output_width;
for (int ic = 0; ic < im_channels; ++ic) {
const T* src_data = im_data_start + ic * im_size;
for (int kh = 0; kh < filter_height; ++kh) {
if ((oh < plh && kh < plh) || (oh > (output_height - prh - 1) &&
kh > (filter_height - prh - 1))) {
dst_data = dst_data + col_matrix_width;
continue;
}
if (data_layout != DataLayout::kNHWC) {
std::memcpy(dst_data + plw, src_data, copy_size);
} else {
for (int kow = 0; kow < output_width - plw - prw; ++kow) {
dst_data[plw + kow] =
im_data[(((oh - plh > 0 ? oh - plh : 0) + kh) * im_width +
kow) *
im_channels +
ic];
}
}
dst_data = dst_data + col_matrix_width;
src_data = src_data + im_width;
}
}
}
return;
}
// filter_width != 1
// fill width padding
T* dst_data_ic = col_data;
for (int ic = 0; ic < im_channels; ++ic) {
T* dst_data_kh = dst_data_ic;
for (int kh = 0; kh < filter_height; ++kh) {
for (T* dst_data :
{dst_data_kh, dst_data_kh + (filter_width - prw) * col_matrix_width +
output_width - 1}) {
// TODO(TJ): from plh, saving repeated assignment
for (int oh = 0; oh < output_height; ++oh) {
*dst_data = pad;
dst_data = dst_data + output_width;
}
}
dst_data_kh = dst_data_kh + col_block_fh;
}
dst_data_ic = dst_data_ic + col_block_ic;
}
// TODO(TJ): use array like: size_t copy_size[kw]={sizeof(T) *
// (output_width-1)}
// length of copy_size is equal kw.
for (int oh = 0; oh < output_height; ++oh) {
const T* im_data_start = im_data + (oh - plh > 0 ? oh - plh : 0) * im_width;
T* dst_data = col_data + oh * output_width;
for (int ic = 0; ic < im_channels; ++ic) {
const T* src_data = im_data_start + ic * im_size;
for (int kh = 0; kh < filter_height; ++kh) {
if ((oh < plh && kh < plh) || (oh > (output_height - prh - 1) &&
kh > (filter_height - prh - 1))) {
dst_data = dst_data + filter_width * col_matrix_width;
continue;
}
// TODO(TJ): reuse plw-kw outside this for
// try to unify
for (int kw = 0; kw < plw; ++kw) {
if (data_layout != DataLayout::kNHWC) {
std::memcpy(dst_data + (plw - kw), src_data,
sizeof(T) * (output_width - (plw - kw)));
} else {
for (int kow = 0; kow < output_width - (plw - kw); ++kow) {
dst_data[plw - kw + kow] =
im_data[(((oh - plh > 0 ? oh - plh : 0) + kh) * im_width +
kow) *
im_channels +
ic];
}
}
dst_data = dst_data + col_matrix_width;
}
for (int kw = plw; kw < filter_width - prw; ++kw) {
if (data_layout != DataLayout::kNHWC) {
std::memcpy(dst_data, src_data + (kw - plw),
sizeof(T) * output_width);
} else {
for (int kow = 0; kow < output_width; ++kow) {
dst_data[kow] =
im_data[(((oh - plh > 0 ? oh - plh : 0) + kh) * im_width +
kw - plw + kow) *
im_channels +
ic];
}
}
dst_data = dst_data + col_matrix_width;
}
int i = 1;
for (int kw = filter_width - prw; kw < filter_width; ++kw, ++i) {
if (data_layout != DataLayout::kNHWC) {
std::memcpy(dst_data, src_data + (kw - plw),
sizeof(T) * (output_width - i));
} else {
for (int kow = 0; kow < output_width - i; ++kow) {
dst_data[kow] =
im_data[(((oh - plh > 0 ? oh - plh : 0) + kh) * im_width +
kw - plw + kow) *
im_channels +
ic];
}
}
dst_data = dst_data + col_matrix_width;
}
src_data = src_data + im_width;
}
}
}
}
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "core/paddlefl_mpc/operators/math/math_function.h"
#include <vector>
#include "paddle/fluid/framework/data_type.h"
namespace paddle {
namespace operators {
namespace math {
template <typename T>
struct RowwiseAdd<platform::CPUDeviceContext, T> {
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& input,
const framework::Tensor& vector, framework::Tensor* output) {
auto in_dims = input.dims();
auto size = input.numel() / in_dims[0];
PADDLE_ENFORCE_EQ(vector.numel(), size);
PADDLE_ENFORCE_EQ(output->dims(), in_dims);
auto in = framework::EigenMatrix<T>::From(input);
auto vec = framework::EigenVector<T>::Flatten(vector);
auto out = framework::EigenMatrix<T>::From(*output);
for (int64_t i = 0; i < in_dims[0]; ++i) {
out.chip(i, 0) = in.chip(i, 0) + vec;
}
}
};
template struct RowwiseAdd<platform::CPUDeviceContext, int64_t>;
using float16 = paddle::platform::float16;
template struct SetConstant<platform::CPUDeviceContext, platform::float16>;
template struct SetConstant<platform::CPUDeviceContext, float>;
template struct SetConstant<platform::CPUDeviceContext, double>;
template struct SetConstant<platform::CPUDeviceContext, int>;
template struct SetConstant<platform::CPUDeviceContext, int64_t>;
template struct SetConstant<platform::CPUDeviceContext, bool>;
template struct SetConstant<platform::CPUDeviceContext, uint8_t>;
#define DEFINE_CPU_TRANS(RANK) \
template struct Transpose<platform::CPUDeviceContext, platform::float16, \
RANK>; \
template struct Transpose<platform::CPUDeviceContext, float, RANK>; \
template struct Transpose<platform::CPUDeviceContext, double, RANK>; \
template struct Transpose<platform::CPUDeviceContext, int, RANK>; \
template struct Transpose<platform::CPUDeviceContext, int64_t, RANK>; \
template struct Transpose<platform::CPUDeviceContext, bool, RANK>; \
template struct Transpose<platform::CPUDeviceContext, int16_t, RANK>; \
template struct Transpose<platform::CPUDeviceContext, uint8_t, RANK>; \
template struct Transpose<platform::CPUDeviceContext, int8_t, RANK>;
DEFINE_CPU_TRANS(1);
DEFINE_CPU_TRANS(2);
DEFINE_CPU_TRANS(3);
DEFINE_CPU_TRANS(4);
DEFINE_CPU_TRANS(5);
DEFINE_CPU_TRANS(6);
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cmath>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace operators {
namespace math {
template <typename DeviceContext, typename T>
struct RowwiseAdd {
void operator()(const DeviceContext& context, const framework::Tensor& input,
const framework::Tensor& vec, framework::Tensor* output);
};
template <typename DeviceContext, typename T>
struct SetConstant {
void operator()(const DeviceContext& context, framework::Tensor* tensor,
T num);
};
template <typename DeviceContext, typename T, int Rank>
struct Transpose {
void operator()(const DeviceContext& context, const framework::Tensor& in,
framework::Tensor* out, const std::vector<int>& axis);
};
template <typename DeviceContext, typename T>
struct ColwiseSum {
void operator()(const DeviceContext& context, const framework::Tensor& input,
framework::Tensor* vec);
};
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/data_type.h"
#include "core/paddlefl_mpc/operators/math/math_function.h"
namespace paddle {
namespace operators {
namespace math {
template <typename DeviceContext, typename T>
void SetConstant<DeviceContext, T>::operator()(const DeviceContext& context,
framework::Tensor* tensor,
T num) {
auto t = framework::EigenVector<T>::Flatten(*tensor);
t.device(*context.eigen_device()) = t.constant(static_cast<T>(num));
}
template <typename DeviceContext, typename T, int Rank>
void Transpose<DeviceContext, T, Rank>::operator()(
const DeviceContext& context, const framework::Tensor& in,
framework::Tensor* out, const std::vector<int>& axis) {
Eigen::array<int, Rank> permute;
for (int i = 0; i < Rank; i++) {
permute[i] = axis[i];
}
auto eigen_in = framework::EigenTensor<T, Rank>::From(in);
auto eigen_out = framework::EigenTensor<T, Rank>::From(*out);
auto* dev = context.eigen_device();
eigen_out.device(*dev) = eigen_in.shuffle(permute);
}
template <typename DeviceContext, typename T>
void ColwiseSum<DeviceContext, T>::operator()(const DeviceContext& context,
const framework::Tensor& input,
framework::Tensor* out) {
auto in_dims = input.dims();
auto size = input.numel() / in_dims[0];
PADDLE_ENFORCE_EQ(out->numel(), size);
auto in = framework::EigenMatrix<T>::From(input);
auto vec = framework::EigenVector<T>::Flatten(*out);
vec.device(*context.eigen_device()) = in.sum(Eigen::array<int, 1>({{0}}));
}
// Specialize for CPU, since Eigen implement a general reduce. However,
// colwise-sum can be easily implemented. General reduce has a huge overhead in
// CPU
template <typename T>
class ColwiseSum<platform::CPUDeviceContext, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& input, framework::Tensor* out) {
auto& in_dims = input.dims();
auto height = in_dims[0];
auto size = in_dims[1];
PADDLE_ENFORCE_EQ(out->numel(), size);
T* out_buf = out->mutable_data<T>(out->place());
const T* in_buf = input.data<T>();
for (size_t i = 0; i < static_cast<size_t>(height); ++i) {
for (size_t j = 0; j < static_cast<size_t>(size); ++j) {
if (i == 0) {
out_buf[j] = in_buf[i * size + j];
} else {
out_buf[j] += in_buf[i * size + j];
}
}
}
}
};
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "core/paddlefl_mpc/operators/math/sequence2batch.h"
namespace paddle {
namespace operators {
namespace math {
template <typename T>
class CopyMatrixRowsFunctor<platform::CPUDeviceContext, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& src,
framework::Vector<size_t> index_lod, framework::Tensor* dst,
bool is_src_index) {
size_t* index = index_lod.data();
auto src_dims = src.dims();
auto dst_dims = dst->dims();
PADDLE_ENFORCE_EQ(src_dims.size(), 2UL,
"The src must be matrix with rank 2.");
PADDLE_ENFORCE_EQ(dst_dims.size(), 2UL,
"The dst must be matrix with rank 2.");
PADDLE_ENFORCE_EQ(src_dims[1], dst_dims[1],
"The width of src and dst must be same.");
auto height = dst_dims[0];
auto width = dst_dims[1];
auto* src_data = src.data<T>();
auto* dst_data = dst->data<T>();
const int sz = width * sizeof(T);
if (is_src_index) {
for (int i = 0; i < height; ++i) {
memcpy(dst_data + i * width, src_data + index[i] * width, sz);
}
} else {
for (int i = 0; i < height; ++i) {
memcpy(dst_data + index[i] * width, src_data + i * width, sz);
}
}
}
};
template class CopyMatrixRowsFunctor<platform::CPUDeviceContext, int64_t>;
template class LoDTensor2BatchFunctor<platform::CPUDeviceContext, int64_t>;
template class Batch2LoDTensorFunctor<platform::CPUDeviceContext, int64_t>;
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/device_context.h"
namespace paddle {
namespace operators {
namespace math {
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
template <typename DeviceContext, typename T>
class CopyMatrixRowsFunctor {
public:
// If is_src_index is true,
// copy the indexed rows of input src to the output dst.
// If is_src_index is false,
// copy the input src to the indexed rows of output dst.
// The indexed rows are based on the input index.
void operator()(const DeviceContext& context, const framework::Tensor& src,
framework::Vector<size_t> index_lod, framework::Tensor* dst,
bool is_src_index);
};
template <typename DeviceContext, typename T>
class LoDTensor2BatchFunctor {
// Calculate the length of each sequence and
// sort sequence index by the length.
// example: sequences = {s0, s1, s2}
// s0: 0 0 0 0, s1: 1 1 1 1 1, s2: 2 2 2
// seq_info[3] = {(4, 5, 1), (0, 4, 0), (9, 3, 2)}
//
struct SeqInfo {
SeqInfo(size_t start, size_t length, size_t seq_idx)
: start(start), length(length), seq_idx(seq_idx) {}
size_t start;
size_t length;
size_t seq_idx;
};
public:
void operator()(const DeviceContext& context,
const framework::LoDTensor& lod_tensor,
framework::LoDTensor* batch, bool is_cal_batch_lod,
bool is_reverse = false) const {
if (!is_cal_batch_lod) {
auto lods = batch->lod();
PADDLE_ENFORCE_GT(lods.size(), 2UL,
"The LoD of LoDTensor should inlcude at least 2-level "
"sequence information.");
PADDLE_ENFORCE_EQ(
lods[1].size(), static_cast<size_t>(lod_tensor.dims()[0]),
"The LoD information should be consistent with the dims.");
CopyMatrixRowsFunctor<DeviceContext, T> to_batch;
to_batch(context, lod_tensor, lods[1], batch, true);
return;
}
auto lods = lod_tensor.lod();
PADDLE_ENFORCE_EQ(lods.size(), 1UL, "Only support one level sequence now.");
const auto& lod = lods[0];
std::vector<SeqInfo> seq_info;
for (size_t seq_id = 0; seq_id < lod.size() - 1; ++seq_id) {
size_t length = lod[seq_id + 1] - lod[seq_id];
seq_info.emplace_back(lod[seq_id], length, seq_id);
}
std::sort(seq_info.begin(), seq_info.end(),
[](SeqInfo a, SeqInfo b) {
return a.length > b.length;
});
// Calculate the start position of each batch.
// example: sequences = {s0, s1, s2}
// s0: 0 0 0 0, s1: 1 1 1 1 1, s2: 2 2 2
// max_seqlen = 5,
// batchIndex = {b0, b1, b2, b3, b4}
// b0: 1 0 2, b1: 1 0 2, b2: 1 0 2, b3: 1 0, b4: 1
// batch_start_positions[6] = {0, 3, 6, 9, 11, 12}
// batch_start_positions[0] = len(b0)
// batch_start_positions[1] = len(b0) + len(b1)
// batch_start_positions[2] = len(b0) + len(b1) + len(b2)
// ...
// seq2batch_idx[12] = {4, 0, 9,
// 5, 1, 10,
// 6, 2, 11,
// 7, 3,
// 8}
// seq_order = {1, 0, 2}, the sort order.
// where 1 is the second sequence,
// 0 is the first sequence,
// 2 is the third sequence.
// The max_seqlen represents batch size after rearranging the
// input LodTensor. It is also the maximum length of input sequence.
paddle::framework::LoD batch_lods;
batch_lods.emplace_back(std::vector<size_t> {0});
batch_lods.emplace_back(std::vector<size_t> {0});
batch_lods.emplace_back(std::vector<size_t> {0});
// batch_lods[0] is the start positions for batch LoDTensor
size_t max_seqlen = seq_info[0].length;
batch_lods[0].resize(max_seqlen + 1);
// batch_lods[1] is the raw index in the input LoDTensor
batch_lods[1].resize(static_cast<size_t>(lod_tensor.dims()[0]));
// batch_lods[2] is the sort order for the input LoDTensor.
batch_lods[2].resize(seq_info.size());
size_t* batch_starts = batch_lods[0].data();
size_t* seq2batch_idx = batch_lods[1].data();
batch_starts[0] = 0;
for (size_t n = 0; n < max_seqlen; n++) {
size_t batch_id = batch_starts[n];
for (size_t i = 0; i < seq_info.size(); ++i) {
size_t seq_len = seq_info[i].length;
size_t start = seq_info[i].start;
if (n < seq_len) {
seq2batch_idx[batch_id] =
is_reverse ? start + seq_len - 1 - n : start + n;
batch_id++;
} else {
break;
}
}
batch_starts[n + 1] = batch_id;
}
size_t* seq_order = batch_lods[2].data();
for (size_t i = 0; i < seq_info.size(); ++i) {
seq_order[i] = seq_info[i].seq_idx;
}
batch->set_lod(batch_lods);
CopyMatrixRowsFunctor<DeviceContext, T> to_batch;
to_batch(context, lod_tensor, batch_lods[1], batch, true);
}
};
template <typename DeviceContext, typename T>
class Batch2LoDTensorFunctor {
public:
void operator()(const DeviceContext& context,
const framework::LoDTensor& batch,
framework::LoDTensor* lod_tensor) const {
auto in_lod = batch.lod();
PADDLE_ENFORCE_GT(in_lod.size(), 2UL,
"The LoD of LoDTensor should inlcude at least 2-level "
"sequence information.");
PADDLE_ENFORCE_EQ(
in_lod[1].size(), static_cast<size_t>(lod_tensor->dims()[0]),
"The LoD information should be consistent with the dims.");
CopyMatrixRowsFunctor<DeviceContext, T> to_seq;
to_seq(context, batch, in_lod[1], lod_tensor, false);
}
};
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "./vol2col.h"
#include <vector>
namespace paddle {
namespace operators {
namespace math {
/*
* vol = [input_channels, input_depth, input_height, input_width]
* col =
* [input_channels, filter_depth, filter_height, filter_width,
* output_depth, output_height, output_width]
*/
template <class T>
class Vol2ColFunctor<platform::CPUDeviceContext, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& vol,
const std::vector<int>& dilations,
const std::vector<int>& strides,
const std::vector<int>& paddings, framework::Tensor* col,
const DataLayout data_layout) const {
PADDLE_ENFORCE_EQ(vol.dims().size(), 4,
"The dimension of vol should be 4.");
PADDLE_ENFORCE_EQ(col->dims().size(), 7,
"The dimension of col should be 7.");
int input_channels =
(data_layout != DataLayout::kNHWC ? vol.dims()[0] : vol.dims()[3]);
int input_depth =
(data_layout != DataLayout::kNHWC ? vol.dims()[1] : vol.dims()[0]);
int input_height =
(data_layout != DataLayout::kNHWC ? vol.dims()[2] : vol.dims()[1]);
int input_width =
(data_layout != DataLayout::kNHWC ? vol.dims()[3] : vol.dims()[2]);
int filter_depth = col->dims()[1];
int filter_height = col->dims()[2];
int filter_width = col->dims()[3];
int output_depth = col->dims()[4];
int output_height = col->dims()[5];
int output_width = col->dims()[6];
int channels_col =
input_channels * filter_depth * filter_height * filter_width;
// changed
bool paddings_size_is_6 = (paddings.size() == 6);
int pad_d_forth = paddings_size_is_6 ? paddings[0] : paddings[0];
int pad_d_back = paddings_size_is_6 ? paddings[1] : paddings[0];
int pad_h_up = paddings_size_is_6 ? paddings[2] : paddings[1];
int pad_h_down = paddings_size_is_6 ? paddings[3] : paddings[1];
int pad_w_left = paddings_size_is_6 ? paddings[4] : paddings[2];
int pad_w_right = paddings_size_is_6 ? paddings[5] : paddings[2];
PADDLE_ENFORCE_EQ((input_depth + pad_d_forth + pad_d_back -
((dilations[0] * (filter_depth - 1) + 1))) /
strides[0] +
1,
output_depth,
"input_depth and output_depth are "
"mismatching.");
PADDLE_ENFORCE_EQ((input_height + pad_h_up + pad_h_down -
((dilations[1] * (filter_height - 1) + 1))) /
strides[1] +
1,
output_height,
"input_height and output_height are "
"mismatching.");
PADDLE_ENFORCE_EQ((input_width + pad_w_left + pad_w_right -
((dilations[2] * (filter_width - 1) + 1))) /
strides[2] +
1,
output_width,
"input_width and output_width are "
"mismatching.");
const T* vol_data = vol.data<T>();
T* col_data = col->data<T>();
for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width;
int h_offset = (c / filter_width) % filter_height;
int d_offset = (c / filter_width / filter_height) % filter_depth;
int c_in = c / filter_width / filter_height / filter_depth;
for (int d = 0; d < output_depth; ++d) {
int d_pad = d * strides[0] - pad_d_forth + d_offset * dilations[0];
for (int h = 0; h < output_height; ++h) {
int h_pad = h * strides[1] - pad_h_up + h_offset * dilations[1];
for (int w = 0; w < output_width; ++w) {
int w_pad = w * strides[2] - pad_w_left + w_offset * dilations[2];
int col_idx =
((c * output_depth + d) * output_height + h) * output_width + w;
int vol_idx;
if (data_layout != DataLayout::kNHWC) {
vol_idx = ((c_in * input_depth + d_pad) * input_height + h_pad) *
input_width +
w_pad;
} else {
vol_idx = ((d_pad * input_height + h_pad) * input_width + w_pad) *
input_channels +
c_in;
}
col_data[col_idx] =
(h_pad < 0 || h_pad >= input_height || w_pad < 0 ||
w_pad >= input_width || d_pad < 0 || d_pad >= input_depth)
? static_cast<T>(0)
: vol_data[vol_idx];
}
}
}
}
}
};
/*
* vol = [input_channels,input_depth, input_height, input_width]
* col =
* [input_channels, filter_depth, filter_height, filter_width,
* output_depth, output_height, output_width]
*/
template <class T>
class Col2VolFunctor<platform::CPUDeviceContext, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& col,
const std::vector<int>& dilations,
const std::vector<int>& strides,
const std::vector<int>& paddings, framework::Tensor* vol,
const DataLayout data_layout) const {
PADDLE_ENFORCE_EQ(vol->dims().size(), 4,
"The dimension of vol should be 4.");
PADDLE_ENFORCE_EQ(col.dims().size(), 7,
"The dimension of col should be 7.");
int input_channels =
(data_layout != DataLayout::kNHWC ? vol->dims()[0] : vol->dims()[3]);
int input_depth =
(data_layout != DataLayout::kNHWC ? vol->dims()[1] : vol->dims()[0]);
int input_height =
(data_layout != DataLayout::kNHWC ? vol->dims()[2] : vol->dims()[1]);
int input_width =
(data_layout != DataLayout::kNHWC ? vol->dims()[3] : vol->dims()[2]);
int filter_depth = col.dims()[1];
int filter_height = col.dims()[2];
int filter_width = col.dims()[3];
int output_depth = col.dims()[4];
int output_height = col.dims()[5];
int output_width = col.dims()[6];
int channels_col =
input_channels * filter_depth * filter_height * filter_width;
bool paddings_size_is_6 = (paddings.size() == 6);
int pad_d_forth = paddings_size_is_6 ? paddings[0] : paddings[0];
int pad_d_back = paddings_size_is_6 ? paddings[1] : paddings[0];
int pad_h_up = paddings_size_is_6 ? paddings[2] : paddings[1];
int pad_h_down = paddings_size_is_6 ? paddings[3] : paddings[1];
int pad_w_left = paddings_size_is_6 ? paddings[4] : paddings[2];
int pad_w_right = paddings_size_is_6 ? paddings[5] : paddings[2];
PADDLE_ENFORCE_EQ((input_depth + pad_d_forth + pad_d_back -
((dilations[0] * (filter_depth - 1) + 1))) /
strides[0] +
1,
output_depth,
"input_depth and output_depth are "
"mismatching.");
PADDLE_ENFORCE_EQ((input_height + pad_h_up + pad_h_down -
((dilations[1] * (filter_height - 1) + 1))) /
strides[1] +
1,
output_height,
"input_height and output_height are "
"mismatching.");
PADDLE_ENFORCE_EQ((input_width + pad_w_left + pad_w_right -
((dilations[2] * (filter_width - 1) + 1))) /
strides[2] +
1,
output_width,
"input_width and output_width are "
"mismatching.");
T* vol_data = vol->data<T>();
const T* col_data = col.data<T>();
for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width;
int h_offset = (c / filter_width) % filter_height;
int d_offset = (c / filter_width / filter_height) % filter_depth;
int cIm = c / filter_width / filter_height / filter_depth;
for (int d = 0; d < output_depth; ++d) {
int d_pad = d * strides[0] - pad_d_forth + d_offset * dilations[0];
for (int h = 0; h < output_height; ++h) {
int h_pad = h * strides[1] - pad_h_up + h_offset * dilations[1];
for (int w = 0; w < output_width; ++w) {
int w_pad = w * strides[2] - pad_w_left + w_offset * dilations[2];
if (h_pad >= 0 && h_pad < input_height && w_pad >= 0 &&
w_pad < input_width && d_pad >= 0 && d_pad < input_depth) {
int vol_idx;
if (data_layout != DataLayout::kNHWC) {
vol_idx = ((cIm * input_depth + d_pad) * input_height + h_pad) *
input_width +
w_pad;
} else {
vol_idx =
((d_pad * input_height + h_pad) * input_width + w_pad) *
input_channels +
cIm;
}
int col_idx =
((c * output_depth + d) * output_height + h) * output_width +
w;
vol_data[vol_idx] += col_data[col_idx];
}
}
}
}
}
}
};
template class Vol2ColFunctor<platform::CPUDeviceContext, int64_t>;
template class Col2VolFunctor<platform::CPUDeviceContext, int64_t>;
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/device_context.h"
namespace paddle {
namespace operators {
namespace math {
using DataLayout = framework::DataLayout;
/*
* \brief Converts the feature data of four dimensions(CDHW) into a colData of
* seven dimensions in the Vol2ColFunctor calculation,
* And in the Col2VolFunctor calculation, it is reversed.
*
* \param volData Vol data.
* \param volShape The shape of volData,
* [input_channels, input_depth, input_height, input_width].
* \param colData Column data.
* \param colShape The shape of colData.
*
* \param dilations dilation data.
* \param 3-dimension [dilation_depth, dilation_height, dilation_width].
*
* \param strides stride data.
* \param 3-dimension [stride_depth, stride_height, stride_width].
*
* \param paddings padding data.
* \param 3-dimension [d_pad, h_pad, w_pad].
*
* The shape of colData is:
* [input_channels, filter_depth, filter_height, filter_width, output_depth,
* output_height, output_width]
* So, it is easy to reshape into a convolution matrix for convolution
* calculation based on matrix multiplication.
* The shape of convolution matrix is [height, width], where the height is equal
* input_channels * filter_depth * filter_height * filter_width, and the width
* is equal output_depth * output_height * output_width.
*
* Reshape:
* shape of colData shape of convolution matrix
* [input_channels,
* filter_depth,
* filter_height,
* filter_width, ======> [height, width]
* output_depth,
* output_height,
* output_width]
*
* \note The caller needs to ensure that volShape.inputChannels is equal to
* colShape.inputChannels.
*/
template <typename DeviceContext, typename T>
class Vol2ColFunctor {
public:
void operator()(const DeviceContext& context, const framework::Tensor& vol,
const std::vector<int>& dilations,
const std::vector<int>& strides,
const std::vector<int>& paddings, framework::Tensor* col,
const DataLayout data_layout = DataLayout::kNCHW) const;
};
template <typename DeviceContext, typename T>
class Col2VolFunctor {
public:
void operator()(const DeviceContext& context, const framework::Tensor& col,
const std::vector<int>& dilations,
const std::vector<int>& strides,
const std::vector<int>& paddings, framework::Tensor* vol,
const DataLayout data_layout = DataLayout::kNCHW) const;
};
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "mpc_adam_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include <string>
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
class MpcAdamOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override;
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override;
framework::OpKernelType GetKernelTypeForVar(
const std::string& var_name, const framework::Tensor& tensor,
const framework::OpKernelType& expected_kernel_type) const override;
};
void MpcAdamOp::InferShape(framework::InferShapeContext *ctx) const {
PADDLE_ENFORCE_EQ(
ctx->HasInput("Param"), true,
platform::errors::NotFound("Input(Param) of AdamOp should not be null."));
PADDLE_ENFORCE_EQ(
ctx->HasInput("Grad"), true,
platform::errors::NotFound("Input(Grad) of AdamOp should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasInput("Moment1"), true,
platform::errors::NotFound(
"Input(Moment1) of AdamOp should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasInput("Moment2"), true,
platform::errors::NotFound(
"Input(Moment2) of AdamOp should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasInput("LearningRate"), true,
platform::errors::NotFound(
"Input(LearningRate) of AdamOp should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasInput("Beta1Pow"), true,
platform::errors::NotFound(
"Input(Beta1Pow) of AdamOp should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasInput("Beta2Pow"), true,
platform::errors::NotFound(
"Input(Beta2Pow) of AdamOp should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasOutput("ParamOut"), true,
platform::errors::NotFound(
"Output(ParamOut) of AdamOp should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasOutput("Moment1Out"), true,
platform::errors::NotFound(
"Output(Moment1Out) of AdamOp should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasOutput("Moment2Out"), true,
platform::errors::NotFound(
"Output(Moment2Out) of AdamOp should not be null."));
auto lr_dims = ctx->GetInputDim("LearningRate");
PADDLE_ENFORCE_NE(
framework::product(lr_dims), 0,
platform::errors::InvalidArgument(
"The number of LearningRate shall not be 0, but received %d. Maybe "
"the Input variable LearningRate has not "
"been initialized. You may need to confirm "
"if you put exe.run(startup_program) "
"after optimizer.minimize function.",
framework::product(lr_dims)));
PADDLE_ENFORCE_EQ(
framework::product(lr_dims), 1,
platform::errors::InvalidArgument(
"Learning rate should have 1 dimension, but received %d",
framework::product(lr_dims)));
auto beta1_pow_dims = ctx->GetInputDim("Beta1Pow");
VLOG(3) << "dims of Beta1Pow : [" << beta1_pow_dims << "]";
PADDLE_ENFORCE_GE(framework::product(beta1_pow_dims), 1,
platform::errors::InvalidArgument(
"The size of Beta1 power accumulator should be greater "
"than 0, but received %d.",
framework::product(beta1_pow_dims)));
auto beta2_pow_dims = ctx->GetInputDim("Beta2Pow");
VLOG(3) << "dims of Beta2Pow : [" << beta2_pow_dims << "]";
PADDLE_ENFORCE_GE(framework::product(beta2_pow_dims), 1,
platform::errors::InvalidArgument(
"The size of Beta2 power accumulator should be greater "
"than 0, but received %d.",
framework::product(beta2_pow_dims)));
auto param_dims = ctx->GetInputDim("Param");
if (ctx->GetInputsVarType("Grad")[0] ==
framework::proto::VarType::LOD_TENSOR) {
PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Grad"),
platform::errors::InvalidArgument(
"Param and Grad input of AdamOp should have same dimension. But "
"received Param dims: [%s], Grad dims: [%s].",
param_dims, ctx->GetInputDim("Grad")));
}
PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Moment1"),
platform::errors::InvalidArgument(
"Param and Moment1 input of AdamOp should have same dimension. But "
"received Param dims: [%s], Moment1 dims: [%s].",
param_dims, ctx->GetInputDim("Moment1")));
PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Moment2"),
platform::errors::InvalidArgument(
"Param and Moment2 input of AdamOp should have same dimension. But "
"received Param dims: [%s], Moment2 dims: [%s].",
param_dims, ctx->GetInputDim("Moment2")));
ctx->SetOutputDim("ParamOut", param_dims);
ctx->SetOutputDim("Moment1Out", param_dims);
ctx->SetOutputDim("Moment2Out", param_dims);
ctx->SetOutputDim("Beta1PowOut", beta1_pow_dims);
ctx->SetOutputDim("Beta2PowOut", beta2_pow_dims);
}
framework::OpKernelType MpcAdamOp::GetExpectedKernelType(
const framework::ExecutionContext &ctx) const {
auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Param");
return framework::OpKernelType(input_data_type, ctx.GetPlace());
}
framework::OpKernelType MpcAdamOp::GetKernelTypeForVar(
const std::string &var_name, const framework::Tensor &tensor,
const framework::OpKernelType &expected_kernel_type) const {
if (var_name == "Beta1Pow" || var_name == "Beta2Pow") {
return expected_kernel_type;
} else {
return framework::OpKernelType(expected_kernel_type.data_type_,
tensor.place(), tensor.layout());
}
}
class MpcAdamOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("Param", "(Tensor) Input parameter");
AddInput("Grad", "(Tensor) Input gradient");
AddInput("LearningRate", "(Tensor) Learning rate");
AddInput("Moment1", "(Tensor) Input first moment");
AddInput("Moment2", "(Tensor) Input second moment");
AddInput("Beta1Pow", "(Tensor) Input beta1 power accumulator");
AddInput("Beta2Pow", "(Tensor) Input beta2 power accumulator");
AddInput("Beta1Tensor",
"(Tensor<float32>, optional) If provided, Adam will use this "
"as beta1, this has a higher priority than attr(beta1), the "
"shape of this tensor MUST BE [1].")
.AsDispensable();
AddInput("Beta2Tensor",
"(Tensor<float32>, optional) If provided, Adam will use this "
"as beta2, this has a higher priority than attr(beta2), the "
"shape of this tensor MUST BE [1].")
.AsDispensable();
AddOutput("ParamOut", "(Tensor) Output parameter");
AddOutput("Moment1Out", "(Tensor) Output first moment");
AddOutput("Moment2Out", "(Tensor) Output second moment");
AddOutput("Beta1PowOut", "(Tensor) Output beta1 power accumulator");
AddOutput("Beta2PowOut", "(Tensor) Output beta2 power accumulator");
AddAttr<float>("beta1",
"(float, default 0.9) "
"Exponential decay rate for the "
"first moment estimates.")
.SetDefault(0.9f);
AddAttr<float>("beta2",
"(float, default 0.999) "
"exponential decay rate for the "
"second moment estimates.")
.SetDefault(0.999f);
AddAttr<float>("epsilon",
"(float, default 1.0e-4) "
"Constant for numerical stability")
.SetDefault(1.0e-4f);
AddComment(R"DOC(
Adam Optimizer.
This implements the Adam optimizer from Section 2 of the Adam
paper : https://arxiv.org/abs/1412.6980.
Adam is a first-order gradient-based optimization method based on
adaptive estimates of lower-order moments.
Adam updates:
$$
moment\_1\_out = \beta_1 * moment\_1 + (1 - \beta_1) * grad \\
moment\_2_\out = \beta_2 * moment\_2 + (1 - \beta_2) * grad * grad \\
learning\_rate = learning\_rate *
\frac{\sqrt{1 - \beta_{2\_pow}}}{1 - \beta_{1\_pow}} \\
param\_out = param - learning\_rate * \frac{moment\_1}{\sqrt{moment\_2} + \epsilon}
$$
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(
mpc_adam, ops::MpcAdamOp, ops::MpcAdamOpMaker);
REGISTER_OP_CPU_KERNEL(
mpc_adam,
ops::MpcAdamOpKernel<paddle::platform::CPUDeviceContext, int64_t, float>);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "mpc_op.h"
#include <math.h>
#include "./math/math_function.h"
#include "core/paddlefl_mpc/mpc_protocol/aby3_operators.h"
namespace paddle {
namespace operators {
static inline float GetAttrFromTensor(const framework::Tensor* tensor) {
const float* tensor_data = tensor->data<float>();
framework::Tensor cpu_tensor;
return tensor_data[0];
}
template <typename DeviceContext, typename T, typename T1>
class MpcAdamOpKernel : public MpcOpKernel<T> {
public:
void ComputeImpl(const framework::ExecutionContext &ctx) const override{
const auto* param_var = ctx.InputVar("Param");
PADDLE_ENFORCE(param_var->IsType<framework::LoDTensor>(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s",
ctx.InputNames("Param").front(),
framework::ToTypeName(param_var->Type()));
using paddle::framework::LoDTensor;
T1 epsilon = static_cast<T1>(ctx.Attr<float>("epsilon"));
auto* param = ctx.Input<LoDTensor>("Param");
auto* grad_var = ctx.InputVar("Grad");
auto* mom1 = ctx.Input<LoDTensor>("Moment1");
auto* mom2 = ctx.Input<LoDTensor>("Moment2");
auto* lr = ctx.Input<LoDTensor>("LearningRate");
auto* beta1_pow = ctx.Input<LoDTensor>("Beta1Pow");
auto* beta2_pow = ctx.Input<LoDTensor>("Beta2Pow");
auto* param_out = ctx.Output<LoDTensor>("ParamOut");
auto* mom1_out = ctx.Output<LoDTensor>("Moment1Out");
auto* mom2_out = ctx.Output<LoDTensor>("Moment2Out");
auto* beta1_pow_out = ctx.Output<LoDTensor>("Beta1PowOut");
auto* beta2_pow_out = ctx.Output<LoDTensor>("Beta2PowOut");
T1 beta1 = static_cast<T1>(ctx.Attr<float>("beta1"));
if (ctx.HasInput("Beta1Tensor")) {
auto* beta1_tensor = ctx.Input<framework::Tensor>("Beta1Tensor");
PADDLE_ENFORCE_EQ(beta1_tensor->numel(), 1,
platform::errors::InvalidArgument(
"Input(Beta1Tensor) size must be 1, but get %d",
beta1_tensor->numel()));
beta1 = static_cast<T1>(GetAttrFromTensor(beta1_tensor));
}
T1 beta2 = static_cast<T1>(ctx.Attr<float>("beta2"));
if (ctx.HasInput("Beta2Tensor")) {
auto* beta2_tensor = ctx.Input<framework::Tensor>("Beta2Tensor");
PADDLE_ENFORCE_EQ(beta2_tensor->numel(), 1,
platform::errors::InvalidArgument(
"Input(Beta2Tensor) size must be 1, but get %d",
beta2_tensor->numel()));
beta2 = static_cast<T1>(GetAttrFromTensor(beta2_tensor));
}
VLOG(3) << "beta1_pow.numel() : " << beta1_pow->numel()
<< "beta2_pow.numel() : " << beta2_pow->numel();
VLOG(3) << "param.numel(): " << param->numel();
PADDLE_ENFORCE_EQ(beta1_pow_out->numel(), 1,
platform::errors::InvalidArgument(
"beta1 pow output size should be 1, but received "
"value is:%d.",
beta1_pow_out->numel()));
PADDLE_ENFORCE_EQ(beta2_pow_out->numel(), 1,
platform::errors::InvalidArgument(
"beta2 pow output size should be 1, but received "
"value is:%d.",
beta2_pow_out->numel()));
if (grad_var->IsType<framework::LoDTensor>()) {
auto* grad = ctx.Input<LoDTensor>("Grad");
// AdamFunctor<T, CPUAdam> functor(
// beta1, beta2, epsilon, beta1_pow->data<T>(), beta2_pow->data<T>(),
// mom1->data<T>(), mom1_out->mutable_data<T>(ctx.GetPlace()),
// mom2->data<T>(), mom2_out->mutable_data<T>(ctx.GetPlace()),
// lr->data<T>(), grad->data<T>(), param->data<T>(),
// param_out->mutable_data<T>(ctx.GetPlace()));
// functor(param->numel());
T1 lr_value = *lr->template data<T1>();
T1 beta1_pow_ = *beta1_pow->template data<T1>();
T1 beta2_pow_ = *beta2_pow->template data<T1>();
double lr_ = lr_value * sqrt(1 - beta2_pow_) / (1 - beta1_pow_);
framework::Tensor temp;
temp.mutable_data<T>(param->dims(), ctx.GetPlace());
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->scale(grad, (1 - beta1), &temp);
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->scale(mom1, beta1, mom1_out);
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->add(mom1_out, &temp, mom1_out);
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->scale(grad, (1 - beta2), &temp);
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->mul(grad, &temp, &temp);
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->scale(mom2, beta2, mom2_out);
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->add(mom2_out, &temp, mom2_out);
// mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->scale(grad, lr[0], &temp);
// mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->sub(param, &temp, param_out);
math::SetConstant<DeviceContext, T> set_const;
auto& dev_ctx = ctx.template device_context<DeviceContext>();
set_const(
dev_ctx,
&temp,
T(epsilon * pow(2, mpc::ABY3_SCALING_FACTOR) / 3));
// temp = epsilon + mom2_out
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->add(mom2_out, &temp, &temp);
// temp = 1 / sqrt(epsilon + mom2_out)
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->inverse_square_root(&temp, &temp);
// temp = mom1_out / sqrt(epsilon + mom2_out)
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->mul(mom1_out, &temp, &temp);
// temp = lr * mom1_out / sqrt(epsilon + mom2_out)
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->scale(&temp, lr_, &temp);
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->sub(param, &temp, param_out);
beta1_pow_out->mutable_data<T1>(ctx.GetPlace())[0] =
beta1 * beta1_pow->template data<T1>()[0];
beta2_pow_out->mutable_data<T1>(ctx.GetPlace())[0] =
beta2 * beta2_pow->template data<T1>()[0];
} else {
PADDLE_THROW("Variable type not supported by adam_op");
}
}
};
} // namespace operators
} // namespace paddle
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/data_layout.h"
#include <memory>
#include <string>
#include <unordered_map>
#include "mpc_batch_norm_op.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace paddle {
namespace operators {
class MpcBatchNormOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override{
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "BatchNorm");
OP_INOUT_CHECK(ctx->HasInput("Scale"), "Input", "Scale", "BatchNorm");
OP_INOUT_CHECK(ctx->HasInput("Bias"), "Input", "Bias", "BatchNorm");
OP_INOUT_CHECK(ctx->HasInput("Mean"), "Input", "Mean", "BatchNorm");
OP_INOUT_CHECK(ctx->HasInput("Variance"), "Input", "Variance", "BatchNorm");
OP_INOUT_CHECK(ctx->HasOutput("Y"), "Output", "Y", "BatchNorm");
bool is_test = ctx->Attrs().Get<bool>("is_test");
bool trainable_stats = ctx->Attrs().Get<bool>("trainable_statistics");
bool test_mode = is_test && (!trainable_stats);
if (!test_mode) {
OP_INOUT_CHECK(ctx->HasOutput("MeanOut"), "Output", "MeanOut", "BatchNorm");
OP_INOUT_CHECK(ctx->HasOutput("VarianceOut"), "Output", "VarianceOut",
"BatchNorm");
OP_INOUT_CHECK(ctx->HasOutput("SavedMean"), "Output", "SavedMean",
"BatchNorm");
OP_INOUT_CHECK(ctx->HasOutput("SavedVariance"), "Output", "SavedVariance",
"BatchNorm");
}
// make sure Mean/MeanOut and Variance/VarianceOut share memory in Python
PADDLE_ENFORCE_EQ(ctx->Inputs("Mean")[0], ctx->Outputs("MeanOut")[0],
platform::errors::InvalidArgument(
"Mean and MeanOut should share the same memory"));
PADDLE_ENFORCE_EQ(
ctx->Inputs("Variance")[0], ctx->Outputs("VarianceOut")[0],
platform::errors::InvalidArgument(
"Variance and VarianceOut should share the same memory"));
const auto x_dims = ctx->GetInputDim("X");
const DataLayout data_layout = framework::StringToDataLayout(
ctx->Attrs().Get<std::string>("data_layout"));
if (ctx->IsRuntime() && ctx->HasInput("MomentumTensor")) {
auto mom = ctx->Inputs("MomentumTensor");
PADDLE_ENFORCE_EQ(mom.size(), 1,
platform::errors::InvalidArgument(
"The input tensor MomentumTensor's size must be 1"
"But received: MomentumTensor's size is [%d]",
mom.size()));
}
PADDLE_ENFORCE_GE(
x_dims.size(), 3,
platform::errors::InvalidArgument(
"ShapeError: the dimension of input "
"X must greater than or equal to 3. But received: the shape of input "
"X = [%s], the dimension of input X =[%d]",
x_dims, x_dims.size()));
PADDLE_ENFORCE_LE(
x_dims.size(), 6,
platform::errors::InvalidArgument(
"ShapeError: the dimension of input X "
"must smaller than or equal to 6. But received: the shape of input X "
"= [%s], the dimension of input X = [%d]",
x_dims, x_dims.size()));
const int64_t C =
((this->IsMKLDNNType() == true) || (data_layout == DataLayout::kNCHW)
? x_dims[2]
: x_dims[x_dims.size() - 1]);
auto scale_dim = ctx->GetInputDim("Scale");
auto bias_dim = ctx->GetInputDim("Bias");
VLOG(3) << "*** scale_dims: " << scale_dim;
VLOG(3) << "*** bias_dims: " << bias_dim;
VLOG(3) << "*** mean_dims: " << ctx->GetInputDim("Mean");
VLOG(3) << "*** variance_dims: " << ctx->GetInputDim("Variance");
//VLOG(3) << "*** Y_dims: " << ctx->GetInputDim("Y");
PADDLE_ENFORCE_EQ(
scale_dim.size(), 2UL,
platform::errors::InvalidArgument(
"ShapeError: the dimension of scale must equal to 2."
"But received: the shape of scale is [%s], the dimension "
"of scale is [%d]",
scale_dim, scale_dim.size()));
PADDLE_ENFORCE_EQ(bias_dim.size(), 2UL,
platform::errors::InvalidArgument(
"ShapeError: the dimension of bias must equal to 2."
"But received: the shape of bias is [%s],the dimension "
"of bias is [%d]",
bias_dim, bias_dim.size()));
bool check = true;
if ((!ctx->IsRuntime()) && (framework::product(scale_dim) <= 0 ||
framework::product(bias_dim) <= 0)) {
check = false;
}
if (check) {
PADDLE_ENFORCE_EQ(scale_dim[1], C,
platform::errors::InvalidArgument(
"ShapeError: the shape of scale must equal to [%d]"
"But received: the shape of scale is [%d]",
C, scale_dim[1]));
PADDLE_ENFORCE_EQ(bias_dim[1], C,
platform::errors::InvalidArgument(
"ShapeError: the shape of bias must equal to [%d]"
"But received: the shape of bias is [%d]",
C, bias_dim[1]));
}
ctx->SetOutputDim("Y", x_dims);
ctx->SetOutputDim("MeanOut", {2, C}); // 2: share_num
ctx->SetOutputDim("VarianceOut", {2, C});
ctx->SetOutputDim("SavedMean", {2, C});
ctx->SetOutputDim("SavedVariance", {2, C});
ctx->ShareLoD("X", "Y");
}
protected:
framework::OpKernelType GetExpectedKernelType(const framework::ExecutionContext& ctx) const {
framework::LibraryType library_{framework::LibraryType::kPlain};
std::string data_format = "AnyLayout";
framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
return framework::OpKernelType(
OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace(),
layout_, library_);
}
framework::OpKernelType GetKernelTypeForVar(
const std::string& var_name, const Tensor& tensor,
const framework::OpKernelType& expected_kernel_type) const {
return framework::OpKernelType(expected_kernel_type.data_type_,
tensor.place(), tensor.layout());
}
};
class MpcBatchNormGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override{
// check input
OP_INOUT_CHECK(ctx->HasInput("Scale"), "Input", "Scale", "BatchNormGrad");
OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Y")), "Input",
framework::GradVarName("Y"), "BatchNormGrad");
OP_INOUT_CHECK(ctx->HasInput("SavedMean"), "Input", "SavedMean",
"BatchNormGrad");
OP_INOUT_CHECK(ctx->HasInput("SavedVariance"), "Input", "SavedVariance",
"BatchNormGrad");
// check output
OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")), "Output",
framework::GradVarName("X"), "BatchNormGrad");
const bool has_scale_grad = ctx->HasOutput(framework::GradVarName("Scale"));
const bool has_bias_grad = ctx->HasOutput(framework::GradVarName("Bias"));
PADDLE_ENFORCE_EQ((has_scale_grad == has_bias_grad), true,
platform::errors::NotFound(
"Output(Scale@GRAD) and Output(Bias@GRAD) must be null "
"or not be null at same time. But now, "
"has Scale@Grad=[%d], has Bias@GRAD=[%d]",
has_scale_grad, has_bias_grad));
const bool use_global_stats = ctx->Attrs().Get<bool>("use_global_stats");
if (use_global_stats) {
PADDLE_ENFORCE_EQ(
!ctx->Attrs().Get<bool>("use_mkldnn"), true,
platform::errors::InvalidArgument(
"Using global stats during training is not supported "
"in gradient op kernel of batch_norm_mkldnn_op now."));
}
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "BatchNormGrad");
const auto x_dims = ctx->GetInputDim("X");
const DataLayout data_layout = framework::StringToDataLayout(
ctx->Attrs().Get<std::string>("data_layout"));
const int C =
((this->IsMKLDNNType() == true) || (data_layout == DataLayout::kNCHW)
? x_dims[2]
: x_dims[x_dims.size() - 1]);
ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
// has_scale_grad == has_bias_grad, judge has_scale_grad is enough
if (has_scale_grad) {
ctx->SetOutputDim(framework::GradVarName("Scale"), {2, C}); // 2: share_num
ctx->SetOutputDim(framework::GradVarName("Bias"), {2, C});
}
}
protected:
framework::OpKernelType GetExpectedKernelType(const framework::ExecutionContext& ctx) const {
framework::LibraryType library_{framework::LibraryType::kPlain};
std::string data_format = "AnyLayout";
framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_, library_);
}
framework::OpKernelType GetKernelTypeForVar(
const std::string& var_name, const Tensor& tensor,
const framework::OpKernelType& expected_kernel_type) const {
return framework::OpKernelType(expected_kernel_type.data_type_,
tensor.place(), tensor.layout());
}
};
class MpcBatchNormOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() {
AddAttr<bool>("is_test",
"(bool, default false) Set to true for inference only, false "
"for training. Some layers may run faster when this is true.")
.SetDefault(false);
AddAttr<float>("momentum", "").SetDefault(0.9);
AddAttr<float>("epsilon", "")
.SetDefault(1e-5)
.AddCustomChecker([](const float &epsilon) {
PADDLE_ENFORCE_GE(
epsilon, 0.0f,
platform::errors::InvalidArgument(
"'epsilon' should be greater or equal than 0.0."));
PADDLE_ENFORCE_LE(epsilon, 0.001f,
platform::errors::InvalidArgument(
"'epsilon' should be less or equal than 0.001."));
});
AddAttr<std::string>("data_layout", "").SetDefault("NCHW");
AddInput("X", "The input tensor");
AddInput("Scale",
"Scale is a 1-dimensional tensor of size C "
"that is applied to the output");
AddInput("Bias",
"Bias is a 1-dimensional tensor of size C "
"that is applied to the output");
AddInput("Mean",
"The global mean (for training) or "
"estimated mean (for testing)");
AddInput("Variance",
"The global variance (for training) "
"or estimated Variance (for testing)");
AddInput("MomentumTensor",
"(Tensor<float32>, optional) If provided, batch_norm will "
"use this as momentum, this has a higher priority than "
"attr(momentum), the shape of this tensor MUST BE [1].")
.AsDispensable();
AddOutput("Y", "result after normalization");
AddOutput("MeanOut",
"Share memory with Mean. "
"Store the global mean when training");
AddOutput("VarianceOut",
"Share memory with Variance. "
"Store the global Variance when training");
AddOutput("SavedMean",
"Mean of the current mini batch, "
"will apply to output when training")
.AsIntermediate();
AddOutput("SavedVariance",
"Variance of the current mini batch, "
"will apply to output when training")
.AsIntermediate();
AddOutput("ReserveSpace",
"Reserve GPU space for triggering the new semi-persistent "
"NHWC kernel")
.AsDispensable();
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddAttr<bool>("fuse_with_relu",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddAttr<bool>("use_global_stats",
"(bool, default false) Whether to use global mean and "
"variance. In inference or test mode, set use_global_stats "
"to true or is_test true. the behavior is equivalent. "
"In train mode, when setting use_global_stats True, the "
"global mean and variance are also used during train time, "
"the BN acts as scaling and shiffting.")
.SetDefault(false);
AddAttr<bool>("trainable_statistics",
"(bool, default false) Whether to calculate mean and variance "
"in test mode. If setting true in test mode, mean and variace "
"will be calculated by current batch statistics.")
.SetDefault(false);
AddComment(R"DOC(
Batch Normalization.
Batch Norm has been implemented as discussed in the paper:
https://arxiv.org/pdf/1502.03167.pdf
Can be used as a normalizer function for conv2d and fully_connected operations.
The required data format for this layer is one of the following:
1. NHWC `[batch, in_height, in_width, in_channels]`
2. NCHW `[batch, in_channels, in_height, in_width]`
)DOC");
}
};
template <typename T>
class MpcBatchNormGradOpMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> op) const {
op->SetType(this->ForwardOpType() + "_grad");
op->SetInput("X", this->Input("X"));
op->SetInput(framework::GradVarName("Y"), this->OutputGrad("Y"));
op->SetInput("Scale", this->Input("Scale"));
op->SetInput("Bias", this->Input("Bias"));
op->SetInput("SavedMean", this->Output("SavedMean"));
op->SetInput("SavedVariance", this->Output("SavedVariance"));
if (this->HasOutput("ReserveSpace")) {
op->SetInput("ReserveSpace", this->Output("ReserveSpace"));
}
// used when setting use_global_stats True during training
if (boost::get<bool>(this->GetAttr("use_global_stats"))) {
op->SetInput("Mean", this->Output("MeanOut"));
op->SetInput("Variance", this->Output("VarianceOut"));
}
op->SetAttrMap(this->Attrs());
op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
op->SetOutput(framework::GradVarName("Scale"), this->InputGrad("Scale"));
op->SetOutput(framework::GradVarName("Bias"), this->InputGrad("Bias"));
}
};
class MpcBatchNormOpInferVarType : public framework::PassInDtypeAndVarTypeToOutput {
protected:
std::unordered_map<std::string, std::string>& GetInputOutputWithSameType() const override {
static std::unordered_map<std::string, std::string> m{{"X", /*->*/ "Y"}};
return m;
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(
mpc_batch_norm, ops::MpcBatchNormOp, ops::MpcBatchNormOpMaker,
ops::MpcBatchNormOpInferVarType,
ops::MpcBatchNormGradOpMaker<paddle::framework::OpDesc>,
ops::MpcBatchNormGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(mpc_batch_norm_grad, ops::MpcBatchNormGradOp);
REGISTER_OP_CPU_KERNEL(
mpc_batch_norm, ops::MpcBatchNormKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL(
mpc_batch_norm_grad, ops::MpcBatchNormGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
此差异已折叠。
...@@ -69,6 +69,119 @@ private: ...@@ -69,6 +69,119 @@ private:
int64_t n_; int64_t n_;
}; };
template <typename T, typename DeviceContext>
class MidWiseTransformIterator;
template <typename T>
class MidWiseTransformIterator<T, platform::CPUDeviceContext>
: public std::iterator<std::random_access_iterator_tag, T, std::ptrdiff_t,
T *, T &> {
public:
MidWiseTransformIterator(const T *ptr, int n, int post)
: ptr_(ptr), i_(0), j_(0), n_(n), post_(post) {}
MidWiseTransformIterator<T, platform::CPUDeviceContext> &operator++() {
++j_;
if (UNLIKELY(j_ == post_)) {
++i_;
j_ = 0;
if (UNLIKELY(i_ == n_)) {
i_ = 0;
}
}
return *this;
}
MidWiseTransformIterator<T, platform::CPUDeviceContext> &operator+(int n) {
while (n-- > 0) {
++j_;
if (UNLIKELY(j_ == post_)) {
++i_;
j_ = 0;
if (UNLIKELY(i_ == n_)) {
i_ = 0;
}
}
}
return *this;
}
bool operator==(const MidWiseTransformIterator<T, platform::CPUDeviceContext>
&rhs) const {
return (ptr_ + i_) == &(*rhs);
}
bool operator!=(const MidWiseTransformIterator<T, platform::CPUDeviceContext>
&rhs) const {
return (ptr_ + i_) != &(*rhs);
}
const T &operator*() { return ptr_[i_]; }
private:
const T *ptr_;
int64_t i_;
int64_t j_;
int64_t n_;
int64_t post_;
};
template <typename Functor, typename T, typename DeviceContext,
typename OutType = T>
class TransformFunctor {
public:
TransformFunctor(const framework::Tensor *x, const framework::Tensor *y,
framework::Tensor *z, const DeviceContext &ctx, Functor func,
const bool is_xsize_larger = true)
: x_(x->data<T>()),
y_(y->data<T>()),
z_(z->mutable_data<OutType>(ctx.GetPlace())),
nx_(x->numel()),
ctx_(ctx),
func_(func),
is_xsize_larger_(is_xsize_larger) {
if (is_xsize_larger_ == false) {
nx_ = y->numel();
}
}
inline void Run() const {
platform::Transform<DeviceContext> trans;
trans(ctx_, x_, x_ + nx_, y_, z_, func_);
}
inline void RunRowWise(int n, int pre) const {
platform::Transform<DeviceContext> trans;
if (is_xsize_larger_) {
trans(ctx_, x_, x_ + nx_,
RowwiseTransformIterator<T, DeviceContext>(y_, n), z_, func_);
} else {
trans(ctx_, y_, y_ + nx_,
RowwiseTransformIterator<T, DeviceContext>(x_, n), z_, func_);
}
}
inline void RunMidWise(int n, int pre, int post) const {
platform::Transform<DeviceContext> trans;
if (is_xsize_larger_) {
trans(ctx_, x_, x_ + nx_,
MidWiseTransformIterator<T, DeviceContext>(y_, n, post), z_, func_);
} else {
trans(ctx_, y_, y_ + nx_,
MidWiseTransformIterator<T, DeviceContext>(x_, n, post), z_, func_);
}
}
private:
const T *x_;
const T *y_;
OutType *z_;
int64_t nx_;
const DeviceContext &ctx_;
Functor func_;
bool is_xsize_larger_;
};
template <typename T> template <typename T>
struct AddFunctor { struct AddFunctor {
inline HOSTDEVICE T operator()(T x, T y) { return x + y; } inline HOSTDEVICE T operator()(T x, T y) { return x + y; }
...@@ -114,38 +227,45 @@ public: ...@@ -114,38 +227,45 @@ public:
if (in_x_t->dims() == in_y_t->dims()) { if (in_x_t->dims() == in_y_t->dims()) {
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->add(in_x_t, in_y_t, out_t); mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->add(in_x_t, in_y_t, out_t);
} else { } else {
Tensor in_x_t_slice; Tensor in_x_t_slice;
Tensor in_y_t_slice; Tensor in_y_t_slice;
Tensor out_t_slice; Tensor out_t_slice;
for (size_t i = 0; i < SHARE_NUM; ++i) { for (size_t i = 0; i < SHARE_NUM; ++i) {
in_x_t_slice = in_x_t->Slice(i, i + 1); in_x_t_slice = in_x_t->Slice(i, i + 1);
in_y_t_slice = in_y_t->Slice(i, i + 1); in_y_t_slice = in_y_t->Slice(i, i + 1);
out_t_slice = out_t->Slice(i, i + 1); out_t_slice = out_t->Slice(i, i + 1);
auto x_dims = in_x_t_slice.dims();
auto y_dims = in_y_t_slice.dims();
axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis); auto x_dims = in_x_t_slice.dims();
PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(), auto y_dims = in_y_t_slice.dims();
"Axis should be in range [0, x_dims)");
int pre, n, post; axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
GetMidDims get_mid_dims;
get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post); PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(),
PADDLE_ENFORCE_EQ(post, 1, "Axis should be in range [0, x_dims)");
"post should be equal 1, but received post is [%s]", post);
int pre, n, post;
auto x_ = in_x_t_slice.data<T>(); GetMidDims get_mid_dims;
auto y_ = in_y_t_slice.data<T>(); get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post);
auto out_ = out_t_slice.data<T>();
auto nx_ = in_x_t_slice.numel(); auto x_ = in_x_t_slice.data<T>();
paddle::platform::Transform<DeviceContext> trans; auto y_ = in_y_t_slice.data<T>();
auto out_ = out_t_slice.data<T>();
auto nx_ = in_x_t_slice.numel();
paddle::platform::Transform<DeviceContext> trans;
if (post == 1) {
trans(ctx.template device_context<DeviceContext>(), x_, x_ + nx_, trans(ctx.template device_context<DeviceContext>(), x_, x_ + nx_,
RowwiseTransformIterator<T, DeviceContext>(y_, n), RowwiseTransformIterator<T, DeviceContext>(y_, n),
out_, AddFunctor<T>()); out_, AddFunctor<T>());
} else {
trans(ctx.template device_context<DeviceContext>(), x_, x_ + nx_,
MidWiseTransformIterator<T, DeviceContext>(y_, n, post),
out_, AddFunctor<T>());
} }
} }
}
} }
}; };
...@@ -185,17 +305,15 @@ public: ...@@ -185,17 +305,15 @@ public:
int pre, n, post; int pre, n, post;
GetMidDims get_mid_dims; GetMidDims get_mid_dims;
get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post); get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post);
PADDLE_ENFORCE_EQ(post, 1,
"post should be equal 1, but received post is [%s]", post); std::fill(dy_data, dy_data + dy->numel(), static_cast<T>(0));
for (size_t i = 0; i < SHARE_NUM; ++i) { for (size_t i = 0; i < SHARE_NUM; ++i) {
int y_offset = i * n; int y_offset = i * n;
for (size_t j = 0; j < pre; ++j) { for (size_t j = 0; j < pre; ++j) {
for (size_t k = 0; k < n; ++k) { for (size_t k = 0; k < n; ++k) {
int out_offset = i * pre * n + j * n + k; for (size_t m = 0; m < post; ++m) {
if (0 == j) { int out_offset = i * pre * n * post + j * n * post + k * post + m;
dy_data[k + y_offset] = dout_data[out_offset];
} else {
dy_data[k + y_offset] += dout_data[out_offset]; dy_data[k + y_offset] += dout_data[out_offset];
} }
} }
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
...@@ -150,6 +150,7 @@ public: ...@@ -150,6 +150,7 @@ public:
if (dx) { if (dx) {
dx->mutable_data<T>(ctx.GetPlace()); dx->mutable_data<T>(ctx.GetPlace());
auto dx_dim = dx->dims();
if (dx->dims().size() > 3) { if (dx->dims().size() > 3) {
dx->Resize({2, x_mat_width, x_mat_height}); dx->Resize({2, x_mat_width, x_mat_height});
} }
...@@ -160,7 +161,6 @@ public: ...@@ -160,7 +161,6 @@ public:
// dx = dout * y'. dx: M x K, dout : M x N, y : K x N // dx = dout * y'. dx: M x K, dout : M x N, y : K x N
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->matmul( mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->matmul(
&dout_matrix, &y_matrix_trans, dx); &dout_matrix, &y_matrix_trans, dx);
auto dx_dim = dx->dims();
if (dx_dim.size() > 3) { if (dx_dim.size() > 3) {
dx->Resize(dx_dim); dx->Resize(dx_dim);
} }
...@@ -168,6 +168,7 @@ public: ...@@ -168,6 +168,7 @@ public:
if (dy) { if (dy) {
dy->mutable_data<T>(ctx.GetPlace()); dy->mutable_data<T>(ctx.GetPlace());
auto dy_dim = dy->dims();
if (dy->dims().size() > 3) { if (dy->dims().size() > 3) {
dy->Resize({2, y_mat_width, y_mat_height}); dy->Resize({2, y_mat_width, y_mat_height});
} }
...@@ -179,7 +180,6 @@ public: ...@@ -179,7 +180,6 @@ public:
// dy = x' * dout. dy K x N, dout : M x N, x : M x K // dy = x' * dout. dy K x N, dout : M x N, x : M x K
mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->matmul( mpc::MpcInstance::mpc_instance()->mpc_protocol()->mpc_operators()->matmul(
&x_matrix_trans, &dout_matrix, dy); &x_matrix_trans, &dout_matrix, dy);
auto dy_dim = dy->dims();
if (dy_dim.size() > 3) { if (dy_dim.size() > 3) {
dy->Resize(dy_dim); dy->Resize(dy_dim);
} }
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册