Merge pull request #105 from jhjiangcs/smc612

fix conflict bugs.

Merge pull request #105 from jhjiangcs/smc612
fix conflict bugs.
7eb82e3f · Qinghe JING · GitHub · 280ebac9 · 3d195298 · 7eb82e3f
9 changed file
--- a/core/paddlefl_mpc/data_utils/data_utils.cc
+++ b/core/paddlefl_mpc/data_utils/data_utils.cc
-<<<<<<< HEAD
 /* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-=======
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
 #include <atomic>
 #include <set>
 #include <string>
@@ -90,7 +87,6 @@ PYBIND11_MODULE(mpc_data_utils, m)
 {
    // optional module docstring
    m.doc() = "pybind11 paddle-mpc plugin: data_utils (share, reveal, psi)";
-<<<<<<< HEAD

    m.def("share", &share<long long, paddle::mpc::ABY3_SCALING_FACTOR>,
          "split plaintext into three shares.");
@@ -100,17 +96,6 @@ PYBIND11_MODULE(mpc_data_utils, m)
    m.def("send_psi", &send_psi, "Send input in two party PSI.");
    m.def("recv_psi", &recv_psi, "Send input and return PSI result as output in two party PSI.");

-=======
-
-    m.def("share", &share<long long, paddle::mpc::ABY3_SCALING_FACTOR>,
-          "split plaintext into three shares.");
-    m.def("reveal", &reveal<long long, paddle::mpc::ABY3_SCALING_FACTOR>,
-          "combine three shares to reveal plaintext.");
-
-    m.def("send_psi", &send_psi, "Send input in two party PSI.");
-    m.def("recv_psi", &recv_psi, "Send input and return PSI result as output in two party PSI.");
-
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
    m.attr("mpc_one_share") = (1 << paddle::mpc::ABY3_SCALING_FACTOR) / 3;
 }


--- a/core/paddlefl_mpc/operators/mpc_relu_op.cc
+++ b/core/paddlefl_mpc/operators/mpc_relu_op.cc
@@ -25,11 +25,7 @@ class MpcReluOp : public framework::OperatorWithKernel {

  void InferShape(framework::InferShapeContext* ctx) const override {
    auto in_dims = ctx->GetInputDim("X");
-<<<<<<< HEAD
    ctx->SetOutputDim("Out", in_dims);
-=======
-    ctx->SetOutputDim("Y", in_dims);
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
    ctx->SetOutputDim("Derivative", in_dims);
  }
 };
@@ -39,11 +35,7 @@ class MpcReluOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("X", "The input tensor.");
-<<<<<<< HEAD
    AddOutput("Out", "Output of relu_op");
-=======
-    AddOutput("Y", "Output of relu_op");
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
    AddOutput("Derivative", "Derivative of relu_op");
    AddComment(R"DOC(
 Mpc Relu Operator.
@@ -71,15 +63,9 @@ public:
 protected:
    void Apply(GradOpPtr<T> grad) const override {
        grad->SetType("mpc_relu_grad");
-<<<<<<< HEAD
        grad->SetInput("Out", this->Output("Out"));
        grad->SetInput("Derivative", this->Output("Derivative"));
        grad->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
-=======
-        grad->SetInput("Y", this->Output("Y"));
-        grad->SetInput("Derivative", this->Output("Derivative"));
-        grad->SetInput(framework::GradVarName("Y"), this->OutputGrad("Y"));
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
        grad->SetAttrMap(this->Attrs());
        grad->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
    }

--- a/core/paddlefl_mpc/operators/mpc_relu_op.h
+++ b/core/paddlefl_mpc/operators/mpc_relu_op.h
@@ -25,11 +25,7 @@ class MpcReluKernel : public MpcOpKernel<T> {
 public:
    void ComputeImpl(const framework::ExecutionContext& ctx) const override {
        const Tensor* in_t = ctx.Input<Tensor>("X");
-<<<<<<< HEAD
        Tensor* out_t = ctx.Output<Tensor>("Out");
-=======
-        Tensor* out_t = ctx.Output<Tensor>("Y");
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
        Tensor* der_t = ctx.Output<Tensor>("Derivative");
        auto x = in_t->data<T>();
        auto y = out_t->mutable_data<T>(ctx.GetPlace());
@@ -45,13 +41,8 @@ template <typename DeviceContext, typename T>
 class MpcReluGradKernel : public MpcOpKernel<T> {
 public:
    void ComputeImpl(const framework::ExecutionContext& ctx) const override {
-<<<<<<< HEAD
        auto* dy_t = ctx.Input<Tensor>(framework::GradVarName("Out"));
        auto* y_t = ctx.Input<Tensor>("Out");
-=======
-        auto* dy_t = ctx.Input<Tensor>(framework::GradVarName("Y"));
-        auto* y_t = ctx.Input<Tensor>("Y");
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
        auto* der_t = ctx.Input<Tensor>("Derivative");
        auto* dx_t = ctx.Output<Tensor>(framework::GradVarName("X"));
        auto dx = dx_t->mutable_data<T>(ctx.GetPlace());

--- a/core/privc3/circuit_context.h
+++ b/core/privc3/circuit_context.h
@@ -15,7 +15,6 @@

 #include <algorithm>
 #include <memory>
-#include <algorithm>

 #include "core/paddlefl_mpc/mpc_protocol/abstract_network.h"
 #include "prng_utils.h"
@@ -81,85 +80,6 @@ public:
    void set_network(std::shared_ptr<AbstractNetwork> network) {
        _network = network;
    }
-<<<<<<< HEAD
-
-    AbstractNetwork* network() {
-        return _network.get();
-    }
-
-    void set_random_seed(const block& seed, size_t idx) {
-        if (idx >= 3) {
-            // exception handling
-        }
-        _prng[idx].set_seed(seed);
-    }
-
-    size_t party() const {
-        return _party;
-    }
-
-    size_t pre_party() const {
-        return (_party + 3 - 1) % 3;
-    }
-
-    size_t next_party() const {
-        return (_party + 1) % 3;
-    }
-
-    template <typename T>
-    T gen_random(bool next) {
-        return _prng[next].get<T>();
-    }
-
-    template<typename T, template <typename> class Tensor>
-    void gen_random(Tensor<T>& tensor, bool next) {
-        std::for_each(tensor.data(), tensor.data() + tensor.numel(),
-                      [this, next](T& val) {
-                          val = this->template gen_random<T>(next);
-                      });
-    }
-
-    template <typename T>
-    T gen_random_private() {
-        return _prng[2].get<T>();
-    }
-
-    template<typename T, template <typename> class Tensor>
-    void gen_random_private(Tensor<T>& tensor) {
-        std::for_each(tensor.data(), tensor.data() + tensor.numel(),
-                      [this](T& val) {
-                          val = this->template gen_random_private<T>();
-                      });
-    }
-
-    template <typename T>
-    T gen_zero_sharing_arithmetic() {
-        return _prng[0].get<T>() - _prng[1].get<T>();
-    }
-
-    template<typename T, template <typename> class Tensor>
-    void gen_zero_sharing_arithmetic(Tensor<T>& tensor) {
-        std::for_each(tensor.data(), tensor.data() + tensor.numel(),
-                      [this](T& val) {
-                          val = this->template gen_zero_sharing_arithmetic<T>();
-                      });
-    }
-
-    template <typename T>
-    T gen_zero_sharing_boolean() {
-        return _prng[0].get<T>() ^ _prng[1].get<T>();
-    }
-
-    template<typename T, template <typename> class Tensor>
-    void gen_zero_sharing_boolean(Tensor<T>& tensor) {
-        std::for_each(tensor.data(), tensor.data() + tensor.numel(),
-                      [this](T& val) {
-                          val = this->template gen_zero_sharing_boolean<T>();
-                      });
-    }
-
-    template<typename T, template <typename> class Tensor>
-=======

    AbstractNetwork* network() {
        return _network.get();
@@ -237,7 +157,6 @@ public:
    }

    template<typename T, template <typename> class Tensor>
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
    void ot(size_t sender, size_t receiver, size_t helper,
            const Tensor<T>* choice, const Tensor<T>* m[2],
            Tensor<T>* buffer[2], Tensor<T>* ret) {

--- a/core/privc3/fixedpoint_tensor.h
+++ b/core/privc3/fixedpoint_tensor.h
@@ -28,7 +28,6 @@ class FixedPointTensor {

 public:
    explicit FixedPointTensor(TensorAdapter<T>* share_tensor[2]);
-<<<<<<< HEAD

    explicit FixedPointTensor(TensorAdapter<T>* share_tensor_0,
                              TensorAdapter<T>* share_tensor_1);
@@ -162,141 +161,6 @@ public:
            size_t... N1>
    void gt(const CTensor<T, N1...>* rhs, BooleanTensor<T>* ret) const;

-=======
-
-    explicit FixedPointTensor(TensorAdapter<T>* share_tensor_0,
-                              TensorAdapter<T>* share_tensor_1);
-
-    ~FixedPointTensor() {};
-
-    //get mutable shape of tensor
-    TensorAdapter<T>* mutable_share(size_t idx);
-
-    const TensorAdapter<T>* share(size_t idx) const;
-
-    size_t numel() const {
-        return _share[0]->numel();
-    }
-
-    // reveal fixedpointtensor to one party
-    void reveal_to_one(size_t party, TensorAdapter<T>* ret) const;
-
-    // reveal fixedpointtensor to all parties
-    void reveal(TensorAdapter<T>* ret) const;
-
-    const std::vector<size_t> shape() const;
-
-    //convert TensorAdapter to shares
-    static void share(const TensorAdapter<T>* input,
-                      TensorAdapter<T>* output_shares[3],
-                      block seed = g_zero_block);
-
-    // element-wise add with FixedPointTensor
-    void add(const FixedPointTensor* rhs, FixedPointTensor* ret) const;
-
-    // element-wise add with TensorAdapter
-
-    void add(const TensorAdapter<T>* rhs, FixedPointTensor* ret) const;
-
-    // element-wise sub with FixedPointTensor
-    void sub(const FixedPointTensor* rhs, FixedPointTensor* ret) const;
-
-    // element-wise sub with TensorAdapter
-    void sub(const TensorAdapter<T>* rhs, FixedPointTensor* ret) const;
-
-    // negative
-    void negative(FixedPointTensor* ret) const;
-
-    // element-wise mul with FixedPointTensor using truncate1
-    void mul(const FixedPointTensor* rhs, FixedPointTensor* ret) const;
-
-    // element-wise mul with TensorAdapter
-    void mul(const TensorAdapter<T>* rhs, FixedPointTensor* ret) const;
-
-    // div by TensorAdapter
-    void div(const TensorAdapter<T>* rhs, FixedPointTensor* ret) const;
-
-    // div by FixedPointedTensor
-    // TODO@yqy : not surport operator rhs <= 0 now
-    void div(const FixedPointTensor* rhs, FixedPointTensor* ret,
-             size_t iter = 16, double x0 = pow(2, -15)) const;
-
-    // long div by boolean circuit
-    // res_int_len: estimated bit len of the integer part of result
-    void long_div(const FixedPointTensor* rhs,
-                  FixedPointTensor* ret, size_t res_int_len = 20) const;
-
-    void inverse_square_root(FixedPointTensor* ret,
-                             size_t iter = 16, double x0 = 0x1p-10) const;
-
-    // dot_mul
-    template<template<typename U, size_t...> class CTensor,
-            size_t... N1>
-    void dot_mul(const CTensor<T, N1...>* rhs, FixedPointTensor* ret) const;
-
-    //sum all element
-    void sum(FixedPointTensor* ret) const;
-
-    // mat_mul with FixedPointTensor
-    void mat_mul(const FixedPointTensor* rhs, FixedPointTensor* ret) const;
-
-    // mat_mul with TensorAdapter
-    void mat_mul(const TensorAdapter<T>* rhs, FixedPointTensor* ret) const;
-
-    // exp approximate: exp(x) = \lim_{n->inf} (1+x/n)^n
-    // where n = 2^ite
-    void exp(FixedPointTensor* ret, size_t iter = 8) const;
-
-    // element-wise relu
-    void relu(FixedPointTensor* ret) const;
-
-    // element-wise relu with relu'
-    void relu_with_derivative(FixedPointTensor* ret, BooleanTensor<T>* derivative) const;
-
-    // element-wise sigmoid using 3 piecewise polynomials
-    void sigmoid(FixedPointTensor* ret) const;
-
-    // element-wise sigmoid using 5 pieces polynomial
-    // see paper [Privacy-preserving collaborative machine learning
-    //            on genomic data using TensorFlow]
-    void sigmoid_enhanced(FixedPointTensor* ret) const;
-
-    // element-wise sigmoid using Chebyshev polynomial approximation
-    // implemented with ref to tfe[https://github.com/tf-encrypted/tf-encrypted]
-    void sigmoid_chebyshev(FixedPointTensor* ret) const;
-
-    // softmax axis = -1
-    void softmax(FixedPointTensor* ret,
-                 bool use_relu = false,
-                 bool use_long_div = true) const;
-
-    // element-wise polynomial
-    void polynomial(const TensorAdapter<T>* coeff,
-                    FixedPointTensor* ret) const;
-
-    // element-wise piecewise polynomial
-    void polynomial_piecewise(
-                const TensorAdapter<T>* coeff,
-                const TensorAdapter<T>* break_point,
-                FixedPointTensor* ret) const;
-
-    // element-wise compare
-    // <
-    template<template<typename U, size_t...> class CTensor,
-            size_t... N1>
-    void lt(const CTensor<T, N1...>* rhs, BooleanTensor<T>* ret) const;
-
-    // <=
-    template<template<typename U, size_t...> class CTensor,
-            size_t... N1>
-    void leq(const CTensor<T, N1...>* rhs, BooleanTensor<T>* ret) const;
-
-    // >
-    template<template<typename U, size_t...> class CTensor,
-            size_t... N1>
-    void gt(const CTensor<T, N1...>* rhs, BooleanTensor<T>* ret) const;
-
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
    // >=
    template<template<typename U, size_t...> class CTensor,
            size_t... N1>
@@ -332,7 +196,6 @@ private:
    static inline std::shared_ptr<CircuitContext> aby3_ctx() {
        return paddle::mpc::ContextHolder::mpc_ctx();
    }
-<<<<<<< HEAD

    static inline std::shared_ptr<TensorAdapterFactory> tensor_factory() {
        return paddle::mpc::ContextHolder::tensor_factory();
@@ -368,38 +231,6 @@ private:
        return aby3_ctx()->next_party();
    }

-=======
-
-    static inline std::shared_ptr<TensorAdapterFactory> tensor_factory() {
-        return paddle::mpc::ContextHolder::tensor_factory();
-    }
-
-    static void truncate(const FixedPointTensor* op, FixedPointTensor* ret,
-                          size_t scaling_factor);
-
-    template<typename MulFunc>
-    static void mul_trunc(const FixedPointTensor<T, N>* lhs,
-                          const FixedPointTensor<T, N>* rhs,
-                          FixedPointTensor<T, N>* ret,
-                          MulFunc mul_func);
-
-    // reduce last dim
-    static void reduce(FixedPointTensor<T, N>* input,
-                       FixedPointTensor<T, N>* ret);
-
-    static size_t party() {
-        return aby3_ctx()->party();
-    }
-
-    static size_t pre_party() {
-        return aby3_ctx()->pre_party();
-    }
-
-    static size_t next_party() {
-        return aby3_ctx()->next_party();
-    }
-
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
    static void reshare(const TensorAdapter<T>* send_val,
                 TensorAdapter<T>* recv_val) {
        if (party() == 0) {

--- a/core/privc3/fixedpoint_tensor_imp.h
+++ b/core/privc3/fixedpoint_tensor_imp.h
@@ -208,7 +208,6 @@ void FixedPointTensor<T, N>::truncate(const FixedPointTensor<T, N>* op,
    return;
 }

-<<<<<<< HEAD
 // Protocol. `truncate3`
 // P2 randomly generates r' \in (-2^62, 2^62), randomly generates r'_0, r_0, r_1 in Z_{2^64},
 // P2 compute r'_1 = r' - r'_0, r_2 = r'/2^N - r_0 - r_1, let x2 = r_2
@@ -550,249 +549,6 @@ void FixedPointTensor<T, N>::sigmoid_chebyshev(FixedPointTensor<T, N>* ret) cons
 }

 template< typename T, size_t N>
-=======
-template<typename T, size_t N>
-template<typename MulFunc>
-void FixedPointTensor<T, N>::mul_trunc(const FixedPointTensor<T, N>* lhs,
-                                        const FixedPointTensor<T, N>* rhs,
-                                        FixedPointTensor<T, N>* ret,
-                                        MulFunc mul_func) {
-
-    auto r_zero = tensor_factory()->template create<T>(ret->shape());
-    aby3_ctx()->gen_zero_sharing_arithmetic(*r_zero.get());
-
-    // temp = _share[0]->mul(rhs->_share[0]) +
-    //        _share[0]->mul(rhs->_share[1]) +
-    //        _share[1]->mul(rhs->_share[0]) +
-    //        r_zero
-    auto temp = tensor_factory()->template create<T>(ret->shape());
-    auto temp1 = tensor_factory()->template create<T>(ret->shape());
-
-    // use mul_func to fit both element_wise mul and mat mul
-    (lhs->share(0)->*mul_func)(rhs->share(0), temp.get());
-    (lhs->share(0)->*mul_func)(rhs->share(1), temp1.get());
-    temp1->add(temp.get(), temp1.get());
-
-    (lhs->share(1)->*mul_func)(rhs->share(0), temp.get());
-    temp1->add(r_zero.get(), temp1.get());
-    temp->add(temp1.get(), temp.get());
-
-    auto temp2 = tensor_factory()->template create<T>(ret->shape());
-    auto temp3 = tensor_factory()->template create<T>(ret->shape());
-
-    TensorAdapter<int64_t>* temp_array[2] = {temp2.get(), temp3.get()};
-
-    std::shared_ptr<FixedPointTensor<T, N>> ret_no_trunc =
-            std::make_shared<FixedPointTensor<T, N>>(temp_array);
-
-    temp->copy(ret_no_trunc->_share[0]);
-    reshare(temp.get(), ret_no_trunc->_share[1]);
-
-    truncate(ret_no_trunc.get(), ret, N);
-}
-
-template<typename T, size_t N>
-void FixedPointTensor<T, N>::mul(const TensorAdapter<T>* rhs,
-                                 FixedPointTensor<T, N>* ret) const {
-    // PADDLE_ENFORCE_EQ(N, rhs->scaling_factor(),
-    //                   "no match scaling factor");
-    auto temp0 = tensor_factory()->template create<T>(this->shape());
-    auto temp1 = tensor_factory()->template create<T>(this->shape());
-    std::shared_ptr<FixedPointTensor<T, N>> temp =
-        std::make_shared<FixedPointTensor<T, N>>(temp0.get(), temp1.get());
-
-    _share[0]->mul(rhs, temp->_share[0]);
-    _share[1]->mul(rhs, temp->_share[1]);
-    truncate(temp.get(), ret, rhs->scaling_factor());
-}
-
-template<typename T, size_t N>
-void FixedPointTensor<T, N>::sum(FixedPointTensor<T, N>* ret) const {
-    PADDLE_ENFORCE_EQ(ret->numel(), 1, "output size should be 1.");
-    T sum1 = (T) 0;
-    T sum2 = (T) 0;
-    T* iter_0 = _share[0]->data();
-    T* iter_1 = _share[1]->data();
-    for (int i = 0; i < this->numel(); ++i) {
-        sum1 += *(iter_0 + i);
-        sum2 += *(iter_1 + i);
-    }
-    assign_to_tensor(ret->_share[0], sum1);
-    assign_to_tensor(ret->_share[1], sum2);
-}
-
-template<typename T, size_t N>
-template<template<typename U, size_t...> class CTensor,
-            size_t... N1>
-void FixedPointTensor<T, N>::dot_mul(const CTensor<T, N1...>* rhs,
-                                     FixedPointTensor<T, N>* ret) const {
-    PADDLE_ENFORCE_EQ(ret->numel(), 1, "output size should be 1.");
-
-    auto temp0 = tensor_factory()->template create<T>(this->shape());
-    auto temp1 = tensor_factory()->template create<T>(this->shape());
-    std::shared_ptr<FixedPointTensor<T, N>> temp =
-            std::make_shared<FixedPointTensor<T, N>>(temp0.get(), temp1.get());
-    this->mul(rhs, temp.get());
-    temp->sum(ret);
-}
-
-template<typename T, size_t N>
-void FixedPointTensor<T, N>::mat_mul(const FixedPointTensor<T, N>* rhs,
-                                     FixedPointTensor<T, N>* ret) const {
-    mul_trunc(this, rhs, ret, &TensorAdapter<T>::mat_mul);
-}
-
-template<typename T, size_t N>
-void FixedPointTensor<T, N>::mat_mul(const TensorAdapter<T>* rhs,
-                                     FixedPointTensor<T, N>* ret) const {
-    _share[0]->mat_mul(rhs, ret->_share[0]);
-    _share[1]->mat_mul(rhs, ret->_share[1]);
-    truncate(ret, ret, rhs->scaling_factor());
-}
-
-template< typename T, size_t N>
-void FixedPointTensor<T, N>::div(const TensorAdapter<T>* rhs,
-                                 FixedPointTensor<T, N>* ret) const {
-    PADDLE_ENFORCE_EQ(N, rhs->scaling_factor(),
-                        "no match scaling factor");
-
-    auto temp = tensor_factory()->template create<T>(this->shape());
-
-    double scale = std::pow(2, rhs->scaling_factor());
-    auto inverse = [scale](T d) -> T {
-                    return 1.0 * scale / d * scale; };
-    std::transform(rhs->data(), rhs->data() + rhs->numel(),
-                                temp->data(), inverse);
-    temp->scaling_factor() = rhs->scaling_factor();
-
-    this->mul(temp.get(), ret);
-}
-
-template<typename T, size_t N>
-void FixedPointTensor<T, N>::div(const FixedPointTensor<T, N>* rhs,
-                                 FixedPointTensor<T, N>* ret,
-                                 size_t iter, double x0) const {
-    auto temp0 = tensor_factory()->template create<T>(ret->shape());
-    auto temp1 = tensor_factory()->template create<T>(ret->shape());
-    std::shared_ptr<FixedPointTensor<T, N>> temp =
-        std::make_shared<FixedPointTensor<T, N>>(temp0.get(), temp1.get());
-    reciprocal(rhs, temp.get(), iter, x0);
-    this->mul(temp.get(), ret);
-}
-
-template<typename T, size_t N>
-void FixedPointTensor<T, N>::exp(FixedPointTensor<T, N>* ret,
-                                 size_t iter) const {
-    // exp approximate: exp(x) = \lim_{n->inf} (1+x/n)^n
-    // where n = 2^ite
-    auto pow_iter = tensor_factory()->template create<T>(this->shape());
-    assign_to_tensor(pow_iter.get(), (T) (pow(2, N -iter)));
-    pow_iter->scaling_factor() = N;
-
-    auto tensor_one = tensor_factory()->template create<T>(this->shape());
-    assign_to_tensor(tensor_one.get(), (T) 1 << N);
-    tensor_one->scaling_factor() = N;
-
-    this->mul(pow_iter.get(), ret);
-
-    ret->add(tensor_one.get(), ret);
-
-    for (int i = 0; i < iter; ++i) {
-        ret->mul(ret, ret);
-    }
-}
-
-template< typename T, size_t N>
-void FixedPointTensor<T, N>::relu(FixedPointTensor<T, N>* ret) const {
-    //utilize polynomial_piecewise
-    // break_point = {0}, coeff[0] = {0, 0}, coeff[1] = {0, 1}
-    // break_point.shape = {1, this->shape}, coeff.shape = {2, 2, this->shape}
-
-    auto shape_ = shape();
-    //construct break_point
-    auto b_shape = shape_;
-    b_shape.insert(b_shape.begin(), 1);
-
-    auto break_point = tensor_factory()->template create<T>(b_shape);
-
-    T* b_ptr = break_point->data();
-    for (size_t i = 0; i < break_point->numel(); ++i) {
-        b_ptr[i] = 0;
-    }
-    break_point->scaling_factor() = N;
-
-    //contruct coeff
-    std::vector<size_t> c_shape = {2, 2};
-    c_shape.insert(c_shape.end(), shape_.begin(), shape_.end());
-
-    auto coeff = tensor_factory()->template create<T>(c_shape);
-
-    T* c_ptr = coeff->data();
-
-    for (size_t i = 0; i < 3 * this->numel(); ++i) {
-        c_ptr[i] = 0;
-    }
-    for (size_t i = 3 * this->numel(); i < 4 * this->numel(); ++i) {
-        c_ptr[i] = (T) 1 << N;
-    }
-    coeff->scaling_factor() = N;
-
-    this->polynomial_piecewise(coeff.get(), break_point.get(), ret);
-}
-
-template< typename T, size_t N>
-void FixedPointTensor<T, N>::relu_with_derivative(
-    FixedPointTensor<T, N>* ret, BooleanTensor<T>* derivative) const {
-
-    auto shape_ = shape();
-    auto zero = tensor_factory()->template create<T>(shape_);
-
-    assign_to_tensor(zero.get(), (T)0);
-    zero->scaling_factor() = N;
-
-    auto tmp0 = tensor_factory()->template create<T>(shape_);
-    auto tmp1 = tensor_factory()->template create<T>(shape_);
-
-    BooleanTensor<T> der(tmp0.get(), tmp1.get());
-
-    gt(zero.get(), &der);
-
-    der.mul(this, ret);
-
-    if (derivative) {
-        der.share(0)->copy(derivative->share(0));
-        der.share(1)->copy(derivative->share(1));
-    }
-}
-
-template< typename T, size_t N>
-void FixedPointTensor<T, N>::sigmoid_chebyshev(FixedPointTensor<T, N>* ret) const {
-    //utilize Chebyshev polynomial approximation
-    // more accurate in small range, such as [-4, 4]
-    auto shape = ret->shape();
-    std::vector<size_t> shape_ = shape;
-    shape_.insert(shape_.begin(), 10);
-    auto numel = ret->numel();
-    auto coeff = tensor_factory()->template create<T>(shape_);
-    std::vector<double> w;
-    w.resize(10, 0.0f);
-    w[0] = 0.5;
-    w[1] = 0.2159198015;
-    w[3] = -0.0082176259;
-    w[5] = 0.0001825597;
-    w[7] = -0.0000018848;
-    w[9] = 0.0000000072;
-    for (int i = 0; i < 10; ++i) {
-        for (int j = 0; j < numel; ++j) {
-            *(coeff->data() + i * numel + j) = (T) (w[i] * pow(2, N));
-        }
-    }
-    coeff->scaling_factor() = N;
-    polynomial(coeff.get(), ret);
-}
-
-template< typename T, size_t N>
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
 void FixedPointTensor<T, N>::sigmoid(FixedPointTensor<T, N>* ret) const {
    //utilize polynomial_piecewise
    // break_point = {-2.5, 2.5}
@@ -823,25 +579,6 @@ void FixedPointTensor<T, N>::sigmoid(FixedPointTensor<T, N>* ret) const {
    //contruct coeff
    std::vector<size_t> c_shape = {3, 2};
    c_shape.insert(c_shape.end(), shape_.begin(), shape_.end());
-<<<<<<< HEAD
-
-    auto coeff = tensor_factory()->template create<T>(c_shape);
-
-    T* c_ptr = coeff->data();
-
-    size_t numel = this->numel();
-    double scale = std::pow(2, N);
-    for (size_t i = 0; i < numel; ++i) {
-        c_ptr[i] = 0.0001 * scale;
-        c_ptr[i + numel] = 0;
-        c_ptr[i + 2 * numel] = 0.5 * scale;
-        c_ptr[i + 3 * numel] = 0.17 * scale;
-        c_ptr[i + 4 * numel] = (1 - 0.0001) * scale;
-        c_ptr[i + 5 * numel] = 0;
-    }
-    coeff->scaling_factor() = N;
-
-=======

    auto coeff = tensor_factory()->template create<T>(c_shape);

@@ -859,7 +596,6 @@ void FixedPointTensor<T, N>::sigmoid(FixedPointTensor<T, N>* ret) const {
    }
    coeff->scaling_factor() = N;

->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
    this->polynomial_piecewise(coeff.get(), break_point.get(), ret);
 }

@@ -947,41 +683,6 @@ void FixedPointTensor<T, N>::softmax(FixedPointTensor<T, N>* ret,
    temp[8]->reshape({row, col});
    temp[9]->reshape({row, col});
    FixedPointTensor<T, N> max_x_broadcast(temp[8].get(), temp[9].get());
-<<<<<<< HEAD
-
-    temp[10]->reshape({row, col});
-    auto exp_lower_bound = temp[10].get();
-
-    auto transpose = [](const TensorAdapter<T>* in, TensorAdapter<T>* out) {
-        // suppose input dims = 2
-        const size_t col = in->shape()[1];
-        const size_t row = in->shape()[0];
-        const size_t numel = in->numel();
-
-        for (size_t k = 0; k < numel; ++k) {
-            size_t i = k / row;
-            size_t j = k % row;
-            out->data()[k] = in->data()[j * col + i];
-        }
-    };
-
-    auto broadcast = [](const TensorAdapter<T>* in, TensorAdapter<T>* out) {
-        // suppose input dims = 2
-        // in shape = [row, 1]
-        const size_t col = out->shape()[1];
-        const size_t row = out->shape()[0];
-        for (size_t k = 0; k < out->numel(); ++k) {
-            size_t i = k / col;
-            out->data()[k] = in->data()[i];
-        }
-    };
-
-    share(0)->copy(x.mutable_share(0));
-    share(1)->copy(x.mutable_share(1));
-
-    if (use_relu) {
-
-=======

    temp[10]->reshape({row, col});
    auto exp_lower_bound = temp[10].get();
@@ -1015,7 +716,6 @@ void FixedPointTensor<T, N>::softmax(FixedPointTensor<T, N>* ret,

    if (use_relu) {

->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
        x.relu(&x);

    } else { // use exp
@@ -1087,7 +787,6 @@ void FixedPointTensor<T, N>::long_div(const FixedPointTensor<T, N>* rhs,

    assign_to_tensor(cmp_res_all.share(0), (T)0);
    assign_to_tensor(cmp_res_all.share(1), (T)0);
-<<<<<<< HEAD

    const size_t msb = sizeof(T) * 8 - 1;
    sign_lhs.bit_extract(msb, this);
@@ -1121,41 +820,6 @@ void FixedPointTensor<T, N>::long_div(const FixedPointTensor<T, N>* rhs,
        lshift(&abs_rhs, i, &sub_rhs);


-=======
-
-    const size_t msb = sizeof(T) * 8 - 1;
-    sign_lhs.bit_extract(msb, this);
-    sign_rhs.bit_extract(msb, rhs);
-    sign_lhs.bitwise_xor(&sign_rhs, &sign_ret);
-
-    auto lshift = []  (const FixedPointTensor<T, N>* in,
-                       size_t rhs,
-                       FixedPointTensor<T, N>* out) {
-        in->share(0)->lshift(rhs, out->mutable_share(0));
-        in->share(1)->lshift(rhs, out->mutable_share(1));
-    };
-
-    // abs = val - 2 * sign * val
-    auto abs = [lshift] (const FixedPointTensor<T, N>* in,
-                   const BooleanTensor<T>* sign,
-                   FixedPointTensor<T, N>* out) {
-        lshift(in, 1, out);
-        sign->mul(out, out);
-        in->sub(out, out);
-    };
-
-    auto out0 = tensor_factory()->template create<T>(ret->shape());
-
-    abs(this, &sign_lhs, &abs_lhs);
-
-    abs(rhs, &sign_rhs, &abs_rhs);
-
-
-    for (ssize_t i = int_len - 1; i >= 0; --i) {
-        lshift(&abs_rhs, i, &sub_rhs);
-
-
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
        abs_lhs.gt(&sub_rhs, &cmp_res);


@@ -1167,11 +831,7 @@ void FixedPointTensor<T, N>::long_div(const FixedPointTensor<T, N>* rhs,
    }

    for (size_t i = 1; i <= N; ++i) {
-<<<<<<< HEAD
        truncate3(&abs_rhs, &sub_rhs, i);
-=======
-        truncate(&abs_rhs, &sub_rhs, i);
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
        abs_lhs.gt(&sub_rhs, &cmp_res);
        cmp_res.mul(&sub_rhs, &sub_rhs);
        cmp_res.lshift(N - i, &cmp_res);
@@ -1312,16 +972,6 @@ void FixedPointTensor<T, N>::polynomial_piecewise(
                                    temp[temp_index++].get()));
        msb[i]->bit_extract(sizeof(T) * 8 - 1, temp1[i].get());
    }
-<<<<<<< HEAD
-
-    // b[0] = msb[0], b[i + 1] = ~ msb[i] & msb[i + 1]
-    std::vector<std::shared_ptr<BooleanTensor<T>>> b;
-    b.emplace_back(std::make_shared<BooleanTensor<T>>(
-                                    temp[temp_index++].get(),
-                                    temp[temp_index++].get()));
-    b[0] = msb[0];
-
-=======

    // b[0] = msb[0], b[i + 1] = ~ msb[i] & msb[i + 1]
    std::vector<std::shared_ptr<BooleanTensor<T>>> b;
@@ -1330,7 +980,6 @@ void FixedPointTensor<T, N>::polynomial_piecewise(
                                    temp[temp_index++].get()));
    b[0] = msb[0];

->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
    for (int i = 0; i < len_break_point - 1; ++i) {
        b.emplace_back(std::make_shared<BooleanTensor<T>>(
                                    temp[temp_index++].get(),
@@ -1535,11 +1184,7 @@ void FixedPointTensor<T, N>::inverse_square_root(const FixedPointTensor* op,
    std::shared_ptr<FixedPointTensor<T, N>> x2 =
        std::make_shared<FixedPointTensor<T, N>>(temp[2].get(), temp[3].get());
    // x2 = 0.5 * op
-<<<<<<< HEAD
    truncate3(op, x2.get(), 1);
-=======
-    truncate(op, x2.get(), 1);
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259

    assign_to_tensor(y->mutable_share(0), (T)(x0 * pow(2, N)));
    assign_to_tensor(y->mutable_share(1), (T)(x0 * pow(2, N)));

--- a/core/privc3/fixedpoint_tensor_test.cc
+++ b/core/privc3/fixedpoint_tensor_test.cc
@@ -1223,10 +1223,7 @@ TEST_F(FixedTensorTest, mulfixed) {
    std::vector<double> in0_val = {1.0, 1.0, 1.0, 1.0};
    std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
    std::vector<double> res_val = {2.0, 2.0, 2.0, 2.0};
-<<<<<<< HEAD

-=======
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
                            {gen(shape), gen(shape)};

@@ -1270,7 +1267,6 @@ TEST_F(FixedTensorTest, mulfixed) {
    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
 }

-<<<<<<< HEAD
 TEST_F(FixedTensorTest, mulfixed_multi_times) {

    std::vector<size_t> shape = {100000, 1};
@@ -1289,118 +1285,6 @@ TEST_F(FixedTensorTest, mulfixed_multi_times) {
                        [] (double& a, double& b){ return a * b;});
        };
    fill_mul_data();
-=======
-TEST_F(FixedTensorTest, mulfixed_overflow) {
-
-    std::vector<size_t> shape = {1};
-    // result greater than 2^32 is overflow
-    // notice: multiplier larger than 2^20 may lead to error result
-    // as 2^l << 2^k [ stated in ABY3]
-    std::vector<double> in0_val = {0x1p16};
-    std::vector<double> in1_val = {0x1p16};
-    std::vector<double> res_val = {0};
-    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
-                            {gen(shape), gen(shape)};
-
-    test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
-                                shape, _cpu_ctx).copy(in[0].get());
-    test_fixedt_gen_paddle_tensor<int64_t, 16>(in1_val,
-                                shape, _cpu_ctx).copy(in[1].get());
-
-    auto out0 = _s_tensor_factory->create<int64_t>(shape);
-    auto out1 = _s_tensor_factory->create<int64_t>(shape);
-    auto out2 = _s_tensor_factory->create<int64_t>(shape);
-
-    PaddleTensor<int64_t> result =
-            test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
-
-    _t[0] = std::thread([this, in, out0]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
-            test_fixedt_mul_fixed(0, in, out0.get());
-        });
-
-    });
-    _t[1] = std::thread([this, in, out1]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
-            test_fixedt_mul_fixed(1, in, out1.get());
-        });
-
-    });
-    _t[2] = std::thread([this, in, out2]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
-            test_fixedt_mul_fixed(2, in, out2.get());
-        });
-
-    });
-
-    _t[0].join();
-    _t[1].join();
-    _t[2].join();
-
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get()));
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get()));
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
-}
-
-TEST_F(FixedTensorTest, mulfixed_upper_bound) {
-
-    std::vector<size_t> shape = {1, 2};
-    // recommend each input less than 2^20
-    // larger than 2^20 may lead to error result
-    // as 2^l << 2^k [stated in ABY3]
-    std::vector<double> in0_val = {1.0, 1.0};
-    std::vector<double> in1_val = {0x1p20, -0x1p20};
-    std::vector<double> res_val = {0x1p20, -0x1p20};
-    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
-                            {gen(shape), gen(shape)};
-
-    test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
-                                shape, _cpu_ctx).copy(in[0].get());
-    test_fixedt_gen_paddle_tensor<int64_t, 16>(in1_val,
-                                shape, _cpu_ctx).copy(in[1].get());
-
-    auto out0 = _s_tensor_factory->create<int64_t>(shape);
-    auto out1 = _s_tensor_factory->create<int64_t>(shape);
-    auto out2 = _s_tensor_factory->create<int64_t>(shape);
-
-    PaddleTensor<int64_t> result =
-            test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
-
-    _t[0] = std::thread([this, in, out0]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
-            test_fixedt_mul_fixed(0, in, out0.get());
-        });
-
-    });
-    _t[1] = std::thread([this, in, out1]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
-            test_fixedt_mul_fixed(1, in, out1.get());
-        });
-
-    });
-    _t[2] = std::thread([this, in, out2]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
-            test_fixedt_mul_fixed(2, in, out2.get());
-        });
-
-    });
-
-    _t[0].join();
-    _t[1].join();
-    _t[2].join();
-
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get()));
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get()));
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
-}
-
-TEST_F(FixedTensorTest, mulfixed_low_bound) {
-
-    std::vector<size_t> shape = {1};
-    std::vector<double> in0_val = {1.0};
-    std::vector<double> in1_val = {0x1p-16};
-    std::vector<double> res_val = {0};
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
                            {gen(shape), gen(shape)};

@@ -1446,7 +1330,6 @@ TEST_F(FixedTensorTest, mulfixed_low_bound) {

 TEST_F(FixedTensorTest, mulfixed_overflow) {

-<<<<<<< HEAD
    std::vector<size_t> shape = {1};
    // result greater than 2^32 is overflow
    // notice: multiplier larger than 2^20 may lead to error result
@@ -1454,12 +1337,6 @@ TEST_F(FixedTensorTest, mulfixed_overflow) {
    std::vector<double> in0_val = {0x1p16};
    std::vector<double> in1_val = {0x1p16};
    std::vector<double> res_val = {0};
-=======
-    std::vector<size_t> shape = {2, 2};
-    std::vector<double> in0_val = {1.0, 1.0, 1.0, 1.0};
-    std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
-    std::vector<double> res_val = {2.0, 2.0, 2.0, 2.0};
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
                            {gen(shape), gen(shape)};

@@ -1505,7 +1382,6 @@ TEST_F(FixedTensorTest, mulfixed_overflow) {

 TEST_F(FixedTensorTest, mulfixed_upper_bound) {

-<<<<<<< HEAD
    std::vector<size_t> shape = {1, 2};
    // recommend each input less than 2^20
    // larger than 2^20 may lead to error result
@@ -1513,12 +1389,6 @@ TEST_F(FixedTensorTest, mulfixed_upper_bound) {
    std::vector<double> in0_val = {1.0, 1.0};
    std::vector<double> in1_val = {0x1p20, -0x1p20};
    std::vector<double> res_val = {0x1p20, -0x1p20};
-=======
-    std::vector<size_t> shape = {2, 2};
-    std::vector<double> in0_val = {1.0, 1.0, 1.0, 1.0};
-    std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
-    std::vector<double> res_val = {0.5, 0.5, 0.5, 0.5};
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
                            {gen(shape), gen(shape)};

@@ -1562,21 +1432,12 @@ TEST_F(FixedTensorTest, mulfixed_upper_bound) {
    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
 }

-<<<<<<< HEAD
 TEST_F(FixedTensorTest, mulfixed_low_bound) {

    std::vector<size_t> shape = {1};
    std::vector<double> in0_val = {1.0};
    std::vector<double> in1_val = {0x1p-16};
    std::vector<double> res_val = {0};
-=======
-TEST_F(FixedTensorTest, divfixed) {
-
-    std::vector<size_t> shape = {2, 2};
-    std::vector<double> in0_val = {1.0, 1.0, 1.0, 1.0};
-    std::vector<double> in1_val = {1.0, 10.0, 1000.0, 700.0};
-    std::vector<double> res_val = {1.0, 0.1, 0.001, 1.0 / 700};
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
                            {gen(shape), gen(shape)};

@@ -1594,31 +1455,19 @@ TEST_F(FixedTensorTest, divfixed) {

    _t[0] = std::thread([this, in, out0]() mutable {
        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
-<<<<<<< HEAD
            test_fixedt_mul_fixed(0, in, out0.get());
-=======
-            test_fixedt_div_fixed(0, in, out0.get());
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
        });

    });
    _t[1] = std::thread([this, in, out1]() mutable {
        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
-<<<<<<< HEAD
            test_fixedt_mul_fixed(1, in, out1.get());
-=======
-            test_fixedt_div_fixed(1, in, out1.get());
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
        });

    });
    _t[2] = std::thread([this, in, out2]() mutable {
        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
-<<<<<<< HEAD
            test_fixedt_mul_fixed(2, in, out2.get());
-=======
-            test_fixedt_div_fixed(2, in, out2.get());
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
        });

    });
@@ -1629,25 +1478,15 @@ TEST_F(FixedTensorTest, divfixed) {

    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get()));
    EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get()));
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result, 0.2, true));
+    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
 }

-<<<<<<< HEAD
 TEST_F(FixedTensorTest, mulplain) {

    std::vector<size_t> shape = {2, 2};
    std::vector<double> in0_val = {1.0, 1.0, 1.0, 1.0};
    std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
    std::vector<double> res_val = {2.0, 2.0, 2.0, 2.0};
-=======
-TEST_F(FixedTensorTest, divfixed_low_bound) {
-
-    std::vector<size_t> shape = {1};
-    std::vector<double> in0_val = {1.0};
-    // divisor > 1/x0, default x0 = 2^-15
-    std::vector<double> in1_val = {0x1p15};
-    std::vector<double> res_val = {0x1p-15};
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
                            {gen(shape), gen(shape)};

@@ -1669,83 +1508,19 @@ TEST_F(FixedTensorTest, divfixed_low_bound) {

    _t[0] = std::thread([this, in, out0]() mutable {
        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
-<<<<<<< HEAD
            test_fixedt_mul_plain(0, in, out0.get());
-=======
-            test_fixedt_div_fixed(0, in, out0.get());
-        });
-
-    });
-    _t[1] = std::thread([this, in, out1]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
-            test_fixedt_div_fixed(1, in, out1.get());
-        });
-
-    });
-    _t[2] = std::thread([this, in, out2]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
-            test_fixedt_div_fixed(2, in, out2.get());
-        });
-
-    });
-
-    _t[0].join();
-    _t[1].join();
-    _t[2].join();
-
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get()));
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get()));
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result, 0.0001, true));
-}
-
-TEST_F(FixedTensorTest, sum) {
-
-    std::vector<size_t> shape = {2, 2};
-    std::vector<double> in0_val = {1.0, 1.0, 1.0, 1.0};
-    //std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
-    std::vector<double> res_val = {4.0};
-    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
-
-    test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
-                                shape, _cpu_ctx).copy(in[0].get());
-    //not copy scaling factor in copy funtion
-    dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
-                                scaling_factor() = 16;
-
-    std::vector<size_t> ret_shape = {1};
-    auto out0 = _s_tensor_factory->create<int64_t>(ret_shape);
-    auto out1 = _s_tensor_factory->create<int64_t>(ret_shape);
-    auto out2 = _s_tensor_factory->create<int64_t>(ret_shape);
-
-
-
-    PaddleTensor<int64_t> result =
-            test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, ret_shape, _cpu_ctx);
-
-    _t[0] = std::thread([this, in, out0]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
-            test_fixedt_sum_fixed(0, in, out0.get());
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
        });

    });
    _t[1] = std::thread([this, in, out1]() mutable {
        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
-<<<<<<< HEAD
            test_fixedt_mul_plain(1, in, out1.get());
-=======
-            test_fixedt_sum_fixed(1, in, out1.get());
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
        });

    });
    _t[2] = std::thread([this, in, out2]() mutable {
        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
-<<<<<<< HEAD
            test_fixedt_mul_plain(2, in, out2.get());
-=======
-            test_fixedt_sum_fixed(2, in, out2.get());
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
        });

    });
@@ -1764,11 +1539,7 @@ TEST_F(FixedTensorTest, divplain) {
    std::vector<size_t> shape = {2, 2};
    std::vector<double> in0_val = {1.0, 1.0, 1.0, 1.0};
    std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
-<<<<<<< HEAD
    std::vector<double> res_val = {0.5, 0.5, 0.5, 0.5};
-=======
-    std::vector<double> res_val = {4.0, 4.0, 4.0, 4.0};
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
                            {gen(shape), gen(shape)};

@@ -1820,13 +1591,8 @@ TEST_F(FixedTensorTest, divfixed) {

    std::vector<size_t> shape = {2, 2};
    std::vector<double> in0_val = {1.0, 1.0, 1.0, 1.0};
-<<<<<<< HEAD
    std::vector<double> in1_val = {1.0, 10.0, 1000.0, 700.0};
    std::vector<double> res_val = {1.0, 0.1, 0.001, 1.0 / 700};
-=======
-    std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
-    std::vector<double> res_val = {8.0};
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
                            {gen(shape), gen(shape)};

@@ -1872,18 +1638,11 @@ TEST_F(FixedTensorTest, divfixed) {

 TEST_F(FixedTensorTest, divfixed_low_bound) {

-<<<<<<< HEAD
    std::vector<size_t> shape = {1};
    std::vector<double> in0_val = {1.0};
    // divisor > 1/x0, default x0 = 2^-15
    std::vector<double> in1_val = {0x1p15};
    std::vector<double> res_val = {0x1p-15};
-=======
-    std::vector<size_t> shape = {2, 2};
-    std::vector<double> in0_val = {1.0, 1.0, 1.0, 1.0};
-    std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
-    std::vector<double> res_val = {8.0};
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
                            {gen(shape), gen(shape)};

@@ -1930,18 +1689,10 @@ TEST_F(FixedTensorTest, divfixed_low_bound) {
 TEST_F(FixedTensorTest, sum) {

    std::vector<size_t> shape = {2, 2};
-<<<<<<< HEAD
    std::vector<double> in0_val = {1.0, 1.0, 1.0, 1.0};
    //std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
    std::vector<double> res_val = {4.0};
    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
-=======
-    std::vector<double> in0_val = {3.0, 3.0, 3.0, 3.0};
-    std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
-    std::vector<double> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 1 / pow(2, 16), 1 / pow(2, 16)};
-    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
-                            {gen(shape), gen(shape)};
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259

    test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
                                shape, _cpu_ctx).copy(in[0].get());
@@ -1990,15 +1741,9 @@ TEST_F(FixedTensorTest, sum) {
 TEST_F(FixedTensorTest, mat_mulplain) {

    std::vector<size_t> shape = {2, 2};
-<<<<<<< HEAD
    std::vector<double> in0_val = {1.0, 1.0, 1.0, 1.0};
    std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
    std::vector<double> res_val = {4.0, 4.0, 4.0, 4.0};
-=======
-    std::vector<double> in0_val = {3.0, 3.0, 3.0, 3.0};
-    std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
-    std::vector<double> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 1 / pow(2, 16), 1 / pow(2, 16)};
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
                            {gen(shape), gen(shape)};

@@ -2049,15 +1794,9 @@ TEST_F(FixedTensorTest, mat_mulplain) {
 TEST_F(FixedTensorTest, dot_mul_fixed) {

    std::vector<size_t> shape = {2, 2};
-<<<<<<< HEAD
    std::vector<double> in0_val = {1.0, 1.0, 1.0, 1.0};
    std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
    std::vector<double> res_val = {8.0};
-=======
-    std::vector<double> in0_val = {2.0, 2.0, 3.0, 3.0};
-    std::vector<double> in1_val = {3.0, 3.0, 2.0, 2.0};
-    std::vector<double> res_val = {1 / pow(2, 16), 1 / pow(2, 16), 0, 0};
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in =
                            {gen(shape), gen(shape)};

@@ -2856,8 +2595,6 @@ TEST_F(FixedTensorTest, exp_fixed_low_bound) {
    // exp(-511) = exp(-1), exp(-256) = 0
    std::vector<double> in0_val = {-512, -511, -256};
    std::vector<double> res_val = {1, 0.367879, 0};
-<<<<<<< HEAD
-=======
    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};

    test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
@@ -2955,7 +2692,6 @@ TEST_F(FixedTensorTest, polynomial) {
    std::vector<double> in0_val = {-1.0, 2.0, 2.0, 2.0};
    //std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
    std::vector<double> res_val = {0.0, 3.0, 3.0, 3.0};
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};

    test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
@@ -2973,19 +2709,19 @@ TEST_F(FixedTensorTest, polynomial) {

    _t[0] = std::thread([this, in, out0]() mutable {
        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
-            test_fixedt_exp_fixed(0, in, out0.get());
+            test_fixedt_poly_fixed(0, in, out0.get());
        });

    });
    _t[1] = std::thread([this, in, out1]() mutable {
        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
-            test_fixedt_exp_fixed(1, in, out1.get());
+            test_fixedt_poly_fixed(1, in, out1.get());
        });

    });
    _t[2] = std::thread([this, in, out2]() mutable {
        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
-            test_fixedt_exp_fixed(2, in, out2.get());
+            test_fixedt_poly_fixed(2, in, out2.get());
        });

    });
@@ -2994,18 +2730,11 @@ TEST_F(FixedTensorTest, polynomial) {
    _t[1].join();
    _t[2].join();

-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get(), 0.01, true));
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get(), 0.01, true));
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result, 0.01, true));
+    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get()));
+    EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get()));
+    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
 }

-<<<<<<< HEAD
-TEST_F(FixedTensorTest, exp_fixed_upper_bound) {
-    std::vector<size_t> shape = {1};
-    // input large than 15 may get error result because of multiplication error
-    std::vector<double> in0_val = {15};
-    std::vector<double> res_val = {3269017.37};
-=======
 TEST_F(FixedTensorTest, polynomial_wise) {
    // y = x + 1 (x >= 0)
    // y = 1 (x < 0)
@@ -3013,7 +2742,6 @@ TEST_F(FixedTensorTest, polynomial_wise) {
    std::vector<double> in0_val = {-1.0, 1.0, 2.0, 2.0};
    //std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
    std::vector<double> res_val = {1.0, 2.0, 3.0, 3.0};
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};

    test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
@@ -3031,19 +2759,19 @@ TEST_F(FixedTensorTest, polynomial_wise) {

    _t[0] = std::thread([this, in, out0]() mutable {
        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
-            test_fixedt_exp_fixed(0, in, out0.get());
+            test_fixedt_poly_wise_fixed(0, in, out0.get());
        });

    });
    _t[1] = std::thread([this, in, out1]() mutable {
        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
-            test_fixedt_exp_fixed(1, in, out1.get());
+            test_fixedt_poly_wise_fixed(1, in, out1.get());
        });

    });
    _t[2] = std::thread([this, in, out2]() mutable {
        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
-            test_fixedt_exp_fixed(2, in, out2.get());
+            test_fixedt_poly_wise_fixed(2, in, out2.get());
        });

    });
@@ -3052,19 +2780,14 @@ TEST_F(FixedTensorTest, polynomial_wise) {
    _t[1].join();
    _t[2].join();

-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get(), 0.4, true));
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get(), 0.4, true));
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result, 0.4, true));
+    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get()));
+    EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get()));
+    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
 }

-TEST_F(FixedTensorTest, polynomial) {
-    // y = 1 + x
+TEST_F(FixedTensorTest, relu) {
+
    std::vector<size_t> shape = {2, 2};
-<<<<<<< HEAD
-    std::vector<double> in0_val = {-1.0, 2.0, 2.0, 2.0};
-    //std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
-    std::vector<double> res_val = {0.0, 3.0, 3.0, 3.0};
-=======
    std::vector<double> in0_val = {1.0, -1.0, -2, 2};
    //std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
    std::vector<double> res_val = {1.0, 0.0, 0.0, 2};
@@ -3164,7 +2887,6 @@ TEST_F(FixedTensorTest, relu_upper_bound) {
    std::vector<size_t> shape = {1};
    std::vector<double> in0_val = {0x1p20};
    std::vector<double> res_val = {0x1p20};
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};

    test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
@@ -3182,19 +2904,19 @@ TEST_F(FixedTensorTest, relu_upper_bound) {

    _t[0] = std::thread([this, in, out0]() mutable {
        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
-            test_fixedt_poly_fixed(0, in, out0.get());
+            test_fixedt_relu_fixed(0, in, out0.get());
        });

    });
    _t[1] = std::thread([this, in, out1]() mutable {
        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
-            test_fixedt_poly_fixed(1, in, out1.get());
+            test_fixedt_relu_fixed(1, in, out1.get());
        });

    });
    _t[2] = std::thread([this, in, out2]() mutable {
        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
-            test_fixedt_poly_fixed(2, in, out2.get());
+            test_fixedt_relu_fixed(2, in, out2.get());
        });

    });
@@ -3208,17 +2930,8 @@ TEST_F(FixedTensorTest, relu_upper_bound) {
    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
 }

-<<<<<<< HEAD
-TEST_F(FixedTensorTest, polynomial_wise) {
-    // y = x + 1 (x >= 0)
-    // y = 1 (x < 0)
-    std::vector<size_t> shape = {2, 2};
-    std::vector<double> in0_val = {-1.0, 1.0, 2.0, 2.0};
-    //std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
-    std::vector<double> res_val = {1.0, 2.0, 3.0, 3.0};
-=======
-TEST_F(FixedTensorTest, relu2) {
-
+TEST_F(FixedTensorTest, relu2) {
+
    std::vector<size_t> shape = {2, 2};
    std::vector<double> in0_val = {1.0, -1.0, -2, 2};
    //std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
@@ -3272,7 +2985,6 @@ TEST_F(FixedTensorTest, softmax) {
    std::vector<double> in0_val = {1.0, 1.0, 1, 1};
    //std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
    std::vector<double> res_val = {0.5, 0.5, 0.5, 0.5};
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};

    test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
@@ -3290,19 +3002,19 @@ TEST_F(FixedTensorTest, softmax) {

    _t[0] = std::thread([this, in, out0]() mutable {
        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
-            test_fixedt_poly_wise_fixed(0, in, out0.get());
+            test_fixedt_softmax_fixed(0, in, out0.get());
        });

    });
    _t[1] = std::thread([this, in, out1]() mutable {
        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
-            test_fixedt_poly_wise_fixed(1, in, out1.get());
+            test_fixedt_softmax_fixed(1, in, out1.get());
        });

    });
    _t[2] = std::thread([this, in, out2]() mutable {
        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
-            test_fixedt_poly_wise_fixed(2, in, out2.get());
+            test_fixedt_softmax_fixed(2, in, out2.get());
        });

    });
@@ -3365,613 +3077,6 @@ TEST_F(FixedTensorTest, sigmoid_chebyshev) {
    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result, 0.03));
 }

-TEST_F(FixedTensorTest, relu) {
-
-    std::vector<size_t> shape = {2, 2};
-<<<<<<< HEAD
-    std::vector<double> in0_val = {1.0, -1.0, -2, 2};
-    //std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
-    std::vector<double> res_val = {1.0, 0.0, 0.0, 2};
-=======
-    std::vector<double> in0_val = {0.0, 3, 7, 0.5};
-    std::vector<double> res_val = {0.5, 0.9525, 0.999, 0.6225};
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
-    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
-
-    test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
-                                shape, _cpu_ctx).copy(in[0].get());
-    //not copy scaling factor in copy funtion
-    dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
-                                scaling_factor() = 16;
-
-    auto out0 = _s_tensor_factory->create<int64_t>(shape);
-    auto out1 = _s_tensor_factory->create<int64_t>(shape);
-    auto out2 = _s_tensor_factory->create<int64_t>(shape);
-
-    PaddleTensor<int64_t> result =
-            test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
-
-    _t[0] = std::thread([this, in, out0]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
-            test_fixedt_relu_fixed(0, in, out0.get());
-        });
-
-    });
-    _t[1] = std::thread([this, in, out1]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
-            test_fixedt_relu_fixed(1, in, out1.get());
-        });
-
-    });
-    _t[2] = std::thread([this, in, out2]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
-            test_fixedt_relu_fixed(2, in, out2.get());
-        });
-
-    });
-
-    _t[0].join();
-    _t[1].join();
-    _t[2].join();
-
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get()));
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get()));
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
-}
-
-TEST_F(FixedTensorTest, relu_low_bound) {
-
-    std::vector<size_t> shape = {1};
-    std::vector<double> in0_val = {-0x1p-20};
-    std::vector<double> res_val = {0.0};
-    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
-
-    test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
-                                shape, _cpu_ctx).copy(in[0].get());
-    //not copy scaling factor in copy funtion
-    dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
-                                scaling_factor() = 16;
-
-    auto out0 = _s_tensor_factory->create<int64_t>(shape);
-    auto out1 = _s_tensor_factory->create<int64_t>(shape);
-    auto out2 = _s_tensor_factory->create<int64_t>(shape);
-
-    PaddleTensor<int64_t> result =
-            test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
-
-    _t[0] = std::thread([this, in, out0]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
-            test_fixedt_relu_fixed(0, in, out0.get());
-        });
-
-    });
-    _t[1] = std::thread([this, in, out1]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
-            test_fixedt_relu_fixed(1, in, out1.get());
-        });
-
-    });
-    _t[2] = std::thread([this, in, out2]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
-            test_fixedt_relu_fixed(2, in, out2.get());
-        });
-
-    });
-
-    _t[0].join();
-    _t[1].join();
-    _t[2].join();
-
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get()));
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get()));
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
-}
-
-TEST_F(FixedTensorTest, relu_upper_bound) {
-
-    std::vector<size_t> shape = {1};
-    std::vector<double> in0_val = {0x1p20};
-    std::vector<double> res_val = {0x1p20};
-    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
-
-    test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
-                                shape, _cpu_ctx).copy(in[0].get());
-    //not copy scaling factor in copy funtion
-    dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
-                                scaling_factor() = 16;
-
-    auto out0 = _s_tensor_factory->create<int64_t>(shape);
-    auto out1 = _s_tensor_factory->create<int64_t>(shape);
-    auto out2 = _s_tensor_factory->create<int64_t>(shape);
-
-    PaddleTensor<int64_t> result =
-            test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
-
-    _t[0] = std::thread([this, in, out0]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
-            test_fixedt_relu_fixed(0, in, out0.get());
-        });
-
-    });
-    _t[1] = std::thread([this, in, out1]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
-            test_fixedt_relu_fixed(1, in, out1.get());
-        });
-
-    });
-    _t[2] = std::thread([this, in, out2]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
-            test_fixedt_relu_fixed(2, in, out2.get());
-        });
-
-    });
-
-    _t[0].join();
-    _t[1].join();
-    _t[2].join();
-
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get()));
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get()));
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
-}
-
-TEST_F(FixedTensorTest, relu2) {
-
-    std::vector<size_t> shape = {2, 2};
-    std::vector<double> in0_val = {1.0, -1.0, -2, 2};
-    //std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
-    std::vector<double> res_val = {1.0, 0.0, 0.0, 2};
-    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
-
-    test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
-                                shape, _cpu_ctx).copy(in[0].get());
-    //not copy scaling factor in copy funtion
-    dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
-                                scaling_factor() = 16;
-
-    auto out0 = _s_tensor_factory->create<int64_t>(shape);
-    auto out1 = _s_tensor_factory->create<int64_t>(shape);
-    auto out2 = _s_tensor_factory->create<int64_t>(shape);
-
-    PaddleTensor<int64_t> result =
-            test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
-
-    _t[0] = std::thread([this, in, out0]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
-            test_fixedt_relu2_fixed(0, in, out0.get());
-        });
-
-    });
-    _t[1] = std::thread([this, in, out1]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
-            test_fixedt_relu2_fixed(1, in, out1.get());
-        });
-
-    });
-    _t[2] = std::thread([this, in, out2]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
-            test_fixedt_relu2_fixed(2, in, out2.get());
-        });
-
-    });
-
-    _t[0].join();
-    _t[1].join();
-    _t[2].join();
-
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get()));
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get()));
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result));
-}
-
-TEST_F(FixedTensorTest, softmax) {
-
-    std::vector<size_t> shape = {2, 2};
-    std::vector<double> in0_val = {1.0, 1.0, 1, 1};
-    //std::vector<double> in1_val = {2.0, 2.0, 2.0, 2.0};
-    std::vector<double> res_val = {0.5, 0.5, 0.5, 0.5};
-    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
-
-    test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
-                                shape, _cpu_ctx).copy(in[0].get());
-    //not copy scaling factor in copy funtion
-    dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
-                                scaling_factor() = 16;
-
-    auto out0 = _s_tensor_factory->create<int64_t>(shape);
-    auto out1 = _s_tensor_factory->create<int64_t>(shape);
-    auto out2 = _s_tensor_factory->create<int64_t>(shape);
-
-    PaddleTensor<int64_t> result =
-            test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
-
-    _t[0] = std::thread([this, in, out0]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
-            test_fixedt_softmax_fixed(0, in, out0.get());
-        });
-
-    });
-    _t[1] = std::thread([this, in, out1]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
-            test_fixedt_softmax_fixed(1, in, out1.get());
-        });
-
-    });
-    _t[2] = std::thread([this, in, out2]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
-            test_fixedt_softmax_fixed(2, in, out2.get());
-        });
-
-    });
-
-    _t[0].join();
-    _t[1].join();
-    _t[2].join();
-
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get(), 0.08));
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get(), 0.08));
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result, 0.08));
-}
-
-TEST_F(FixedTensorTest, sigmoid_enhanced) {
-
-    std::vector<size_t> shape = {2, 2};
-    std::vector<double> in0_val = {0.0, 3, 7, 0.5};
-    std::vector<double> res_val = {0.5, 0.9525, 0.999, 0.6225};
-    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
-
-    test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
-                                shape, _cpu_ctx).copy(in[0].get());
-    //not copy scaling factor in copy funtion
-    dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
-                                scaling_factor() = 16;
-
-    auto out0 = _s_tensor_factory->create<int64_t>(shape);
-    auto out1 = _s_tensor_factory->create<int64_t>(shape);
-    auto out2 = _s_tensor_factory->create<int64_t>(shape);
-
-    PaddleTensor<int64_t> result =
-            test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
-
-    _t[0] = std::thread([this, in, out0]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
-            test_fixedt_sigmoid_enhanced_fixed(0, in, out0.get());
-        });
-
-    });
-    _t[1] = std::thread([this, in, out1]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
-            test_fixedt_sigmoid_enhanced_fixed(1, in, out1.get());
-        });
-
-    });
-    _t[2] = std::thread([this, in, out2]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
-            test_fixedt_sigmoid_enhanced_fixed(2, in, out2.get());
-        });
-
-    });
-
-    _t[0].join();
-    _t[1].join();
-    _t[2].join();
-
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get(), 0.08));
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get(), 0.08));
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result, 0.08));
-}
-
-TEST_F(FixedTensorTest, max_test) {
-    std::vector<size_t> shape = { 1 };
-    std::shared_ptr<TensorAdapter<int64_t>> sl[3] = { gen(shape), gen(shape), gen(shape) };
-    std::shared_ptr<TensorAdapter<int64_t>> sr[3] = { gen(shape), gen(shape), gen(shape) };
-
-    std::shared_ptr<TensorAdapter<int64_t>> sout[6] = { gen(shape), gen(shape), gen(shape),
-                                                        gen(shape), gen(shape), gen(shape)};
-
-    std::shared_ptr<TensorAdapter<int64_t>> sbout[6] = {
-        gen(shape), gen(shape), gen(shape), gen(shape), gen(shape), gen(shape)};
-
-    // lhs = 6 = 1 + 2 + 3
-    sl[0]->data()[0] = 1;
-    sl[1]->data()[0] = 2;
-    sl[2]->data()[0] = 3;
-    // rhs = 15 = 4 + 5 + 6
-    sr[0]->data()[0] = 4;
-    sr[1]->data()[0] = 5;
-    sr[2]->data()[0] = 6;
-    Fix64N16 fl0(sl[0].get(), sl[1].get());
-    Fix64N16 fl1(sl[1].get(), sl[2].get());
-    Fix64N16 fl2(sl[2].get(), sl[0].get());
-    Fix64N16 fr0(sr[0].get(), sr[1].get());
-    Fix64N16 fr1(sr[1].get(), sr[2].get());
-    Fix64N16 fr2(sr[2].get(), sr[0].get());
-    Fix64N16 fout0(sout[0].get(), sout[1].get());
-    Fix64N16 fout1(sout[2].get(), sout[3].get());
-    Fix64N16 fout2(sout[4].get(), sout[5].get());
-    BooleanTensor<int64_t> bout0(sbout[0].get(), sbout[1].get());
-    BooleanTensor<int64_t> bout1(sbout[2].get(), sbout[3].get());
-    BooleanTensor<int64_t> bout2(sbout[4].get(), sbout[5].get());
-
-    auto p = gen(shape);
-    auto pb = gen(shape);
-
-    _t[0] = std::thread(
-        [&] () {
-        g_ctx_holder::template run_with_context(
-            _exec_ctx.get(), _mpc_ctx[0], [&](){
-                fl0.max(&fr0, &fout0, &bout0);
-                fout0.reveal_to_one(0, p.get());
-                bout0.reveal_to_one(0, pb.get());
-            });
-        }
-    );
-    _t[1] = std::thread(
-        [&] () {
-        g_ctx_holder::template run_with_context(
-            _exec_ctx.get(), _mpc_ctx[1], [&](){
-                fl1.max(&fr1, &fout1, &bout1);
-                fout1.reveal_to_one(0, nullptr);
-                bout1.reveal_to_one(0, nullptr);
-            });
-        }
-    );
-    _t[2] = std::thread(
-        [&] () {
-        g_ctx_holder::template run_with_context(
-            _exec_ctx.get(), _mpc_ctx[2], [&](){
-                fl2.max(&fr2, &fout2, &bout2);
-                fout2.reveal_to_one(0, nullptr);
-                bout2.reveal_to_one(0, nullptr);
-            });
-        }
-    );
-    for (auto &t: _t) {
-        t.join();
-    }
-    EXPECT_EQ(std::max(6, 15), p->data()[0]);
-    EXPECT_EQ(1, pb->data()[0]);
-}
-
-TEST_F(FixedTensorTest, max_test2) {
-    std::vector<size_t> shape = { 1 };
-    std::shared_ptr<TensorAdapter<int64_t>> sl[3] = { gen(shape), gen(shape), gen(shape) };
-    std::shared_ptr<TensorAdapter<int64_t>> sout[6] = { gen(shape), gen(shape), gen(shape),
-                                                        gen(shape), gen(shape), gen(shape)};
-    // lhs = 6 = 1 + 2 + 3
-    sl[0]->data()[0] = 1 << 16;
-    sl[1]->data()[0] = 2 << 16;
-    sl[2]->data()[0] = 3 << 16;
-
-    auto pr = gen(shape);
-
-    // rhs = 15
-    pr->data()[0] = 15 << 16;
-    pr->scaling_factor() = 16;
-    Fix64N16 fl0(sl[0].get(), sl[1].get());
-    Fix64N16 fl1(sl[1].get(), sl[2].get());
-    Fix64N16 fl2(sl[2].get(), sl[0].get());
-    Fix64N16 fout0(sout[0].get(), sout[1].get());
-    Fix64N16 fout1(sout[2].get(), sout[3].get());
-    Fix64N16 fout2(sout[4].get(), sout[5].get());
-
-    auto p = gen(shape);
-
-    _t[0] = std::thread(
-        [&] () {
-        g_ctx_holder::template run_with_context(
-            _exec_ctx.get(), _mpc_ctx[0], [&](){
-                fl0.max(pr.get(), &fout0);
-                fout0.reveal_to_one(0, p.get());
-            });
-        }
-    );
-    _t[1] = std::thread(
-        [&] () {
-        g_ctx_holder::template run_with_context(
-            _exec_ctx.get(), _mpc_ctx[1], [&](){
-                fl1.max(pr.get(), &fout1);
-                fout1.reveal_to_one(0, nullptr);
-            });
-        }
-    );
-    _t[2] = std::thread(
-        [&] () {
-        g_ctx_holder::template run_with_context(
-            _exec_ctx.get(), _mpc_ctx[2], [&](){
-                fl2.max(pr.get(), &fout2);
-                fout2.reveal_to_one(0, nullptr);
-            });
-        }
-    );
-    for (auto &t: _t) {
-        t.join();
-    }
-    EXPECT_EQ(std::max(6, 15), p->data()[0] >> 16);
-}
-
-TEST_F(FixedTensorTest, max_pooling_test) {
-    std::vector<size_t> shape = { 4, 1 };
-    std::vector<size_t> shape_ = { 1, 1 };
-
-    std::shared_ptr<TensorAdapter<int64_t>> sl[3] = { gen(shape), gen(shape), gen(shape) };
-    std::shared_ptr<TensorAdapter<int64_t>> sfout[6] = {
-        gen(shape_), gen(shape_), gen(shape_), gen(shape_), gen(shape_), gen(shape_)};
-    std::shared_ptr<TensorAdapter<int64_t>> sbout[6] = {
-        gen(shape), gen(shape), gen(shape), gen(shape), gen(shape), gen(shape)};
-
-    assign_to_tensor(sl[1].get(), 0l);
-    assign_to_tensor(sl[2].get(), 0l);
-    sl[0]->data()[0] = 2;
-    sl[0]->data()[1] = 1;
-    sl[0]->data()[2] = 4;
-    sl[0]->data()[3] = 3;
-    // input [2 1 4 3]
-
-    auto pmax = gen(shape_);
-    auto ppos = gen(shape);
-
-    Fix64N16 fl0(sl[0].get(), sl[1].get());
-    Fix64N16 fl1(sl[1].get(), sl[2].get());
-    Fix64N16 fl2(sl[2].get(), sl[0].get());
-
-    Fix64N16 fout0(sfout[0].get(), sfout[1].get());
-    Fix64N16 fout1(sfout[2].get(), sfout[3].get());
-    Fix64N16 fout2(sfout[4].get(), sfout[5].get());
-
-    BooleanTensor<int64_t> bout0(sbout[0].get(), sbout[1].get());
-    BooleanTensor<int64_t> bout1(sbout[2].get(), sbout[3].get());
-    BooleanTensor<int64_t> bout2(sbout[4].get(), sbout[5].get());
-
-    _t[0] = std::thread(
-        [&] () {
-        g_ctx_holder::template run_with_context(
-            _exec_ctx.get(), _mpc_ctx[0], [&](){
-                fl0.max_pooling(&fout0, &bout0);
-                fout0.reveal_to_one(0, pmax.get());
-                bout0.reveal_to_one(0, ppos.get());
-            });
-        }
-    );
-    _t[1] = std::thread(
-        [&] () {
-        g_ctx_holder::template run_with_context(
-            _exec_ctx.get(), _mpc_ctx[1], [&](){
-                fl1.max_pooling(&fout1, &bout1);
-                fout1.reveal_to_one(0, nullptr);
-                bout1.reveal_to_one(0, nullptr);
-            });
-        }
-    );
-    _t[2] = std::thread(
-        [&] () {
-        g_ctx_holder::template run_with_context(
-            _exec_ctx.get(), _mpc_ctx[2], [&](){
-                fl2.max_pooling(&fout2, &bout2);
-                fout2.reveal_to_one(0, nullptr);
-                bout2.reveal_to_one(0, nullptr);
-            });
-        }
-    );
-    for (auto &t: _t) {
-        t.join();
-    }
-
-    EXPECT_EQ(4, pmax->data()[0]);
-
-    EXPECT_EQ(0, ppos->data()[0]);
-    EXPECT_EQ(0, ppos->data()[1]);
-    EXPECT_EQ(1, ppos->data()[2]);
-    EXPECT_EQ(0, ppos->data()[3]);
-}
-
-TEST_F(FixedTensorTest, inv_sqrt_test) {
-    std::vector<size_t> shape = { 1 };
-
-    std::shared_ptr<TensorAdapter<int64_t>> sl[3] = { gen(shape), gen(shape), gen(shape) };
-    std::shared_ptr<TensorAdapter<int64_t>> sfout[6] = {
-        gen(shape), gen(shape), gen(shape), gen(shape), gen(shape), gen(shape)};
-
-    sl[0]->data()[0] = 0x4p16;
-    sl[1]->data()[0] = 0;
-    sl[2]->data()[0] = 0;
-    // input [4]
-
-    auto p = gen(shape);
-
-    Fix64N16 fl0(sl[0].get(), sl[1].get());
-    Fix64N16 fl1(sl[1].get(), sl[2].get());
-    Fix64N16 fl2(sl[2].get(), sl[0].get());
-
-    Fix64N16 fout0(sfout[0].get(), sfout[1].get());
-    Fix64N16 fout1(sfout[2].get(), sfout[3].get());
-    Fix64N16 fout2(sfout[4].get(), sfout[5].get());
-
-    _t[0] = std::thread(
-        [&] () {
-        g_ctx_holder::template run_with_context(
-            _exec_ctx.get(), _mpc_ctx[0], [&](){
-                fl0.inverse_square_root(&fout0);
-                fout0.reveal_to_one(0, p.get());
-            });
-        }
-    );
-    _t[1] = std::thread(
-        [&] () {
-        g_ctx_holder::template run_with_context(
-            _exec_ctx.get(), _mpc_ctx[1], [&](){
-                fl1.inverse_square_root(&fout1);
-                fout1.reveal_to_one(0, nullptr);
-            });
-        }
-    );
-    _t[2] = std::thread(
-        [&] () {
-        g_ctx_holder::template run_with_context(
-            _exec_ctx.get(), _mpc_ctx[2], [&](){
-                fl2.inverse_square_root(&fout2);
-                fout2.reveal_to_one(0, nullptr);
-            });
-        }
-    );
-    for (auto &t: _t) {
-        t.join();
-    }
-
-    // inv_sqrt(4) = 1/2
-    EXPECT_NEAR(0.5, p->data()[0] / 0x1p16f, 2 / 0x1p16f);
-
-}
-
-TEST_F(FixedTensorTest, sigmoid_chebyshev) {
-
-    std::vector<size_t> shape = {2, 2};
-    // larger error when input < -3 or >4
-    std::vector<double> in0_val = {1.0, 2.0, -3.0, 4.0};
-    std::vector<double> res_val = {0.73105, 0.88079, 0.0474, 0.9820};
-    std::vector<std::shared_ptr<TensorAdapter<int64_t>>> in = {gen(shape)};
-
-    test_fixedt_gen_paddle_tensor<int64_t, 16>(in0_val,
-                                shape, _cpu_ctx).copy(in[0].get());
-    //not copy scaling factor in copy funtion
-    dynamic_cast<PaddleTensor<int64_t>*>(in[0].get())->
-                                scaling_factor() = 16;
-
-    auto out0 = _s_tensor_factory->create<int64_t>(shape);
-    auto out1 = _s_tensor_factory->create<int64_t>(shape);
-    auto out2 = _s_tensor_factory->create<int64_t>(shape);
-
-    PaddleTensor<int64_t> result =
-            test_fixedt_gen_paddle_tensor<int64_t, 16>(res_val, shape, _cpu_ctx);
-
-    _t[0] = std::thread([this, in, out0]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){
-            test_fixedt_sigmoid_chebyshev_fixed(0, in, out0.get());
-        });
-
-    });
-    _t[1] = std::thread([this, in, out1]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){
-            test_fixedt_sigmoid_chebyshev_fixed(1, in, out1.get());
-        });
-
-    });
-    _t[2] = std::thread([this, in, out2]() mutable {
-        g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){
-            test_fixedt_sigmoid_chebyshev_fixed(2, in, out2.get());
-        });
-
-    });
-
-    _t[0].join();
-    _t[1].join();
-    _t[2].join();
-
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get(), 0.03));
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get(), 0.03));
-    EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result, 0.03));
-}
-
 TEST_F(FixedTensorTest, sigmoid) {

    std::vector<size_t> shape = {2, 2};

--- a/python/paddle_fl/mpc/layers/ml.py
+++ b/python/paddle_fl/mpc/layers/ml.py
@@ -234,11 +234,7 @@ def relu(input, name=None):
        type="mpc_relu",
        inputs={"X": input},
        outputs={
-<<<<<<< HEAD
            "Out": out,
-=======
-            "Y": out,
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
            "Derivative": derivative}
        )
    return out

--- a/python/paddle_fl/mpc/mpc_layer_helper.py
+++ b/python/paddle_fl/mpc/mpc_layer_helper.py
@@ -221,11 +221,7 @@ class MpcLayerHelper(LayerHelper):
        self.append_op(
            type="mpc_" + act_type,
            inputs={"X": [input_var]},
-<<<<<<< HEAD
            outputs={"Out": [tmp],
-=======
-            outputs={"Y": [tmp],
->>>>>>> 5a09665c36ffb7eae2288b3f837d3be18091c259
                     "Derivative": [derivative]},
            attrs=act)
        return tmp