From a8576fd51d718181674878734f925b62bf3342d8 Mon Sep 17 00:00:00 2001 From: yangqingyou Date: Wed, 2 Sep 2020 02:20:23 +0000 Subject: [PATCH] amend --- core/privc/fixedpoint_tensor_imp.h | 1132 ---------------------------- 1 file changed, 1132 deletions(-) diff --git a/core/privc/fixedpoint_tensor_imp.h b/core/privc/fixedpoint_tensor_imp.h index 1bf282a..f516950 100644 --- a/core/privc/fixedpoint_tensor_imp.h +++ b/core/privc/fixedpoint_tensor_imp.h @@ -95,8 +95,6 @@ void FixedPointTensor::add(const FixedPointTensor* rhs, template void FixedPointTensor::add(const TensorAdapter* rhs, FixedPointTensor* ret) const { - PADDLE_ENFORCE_EQ(N, rhs->scaling_factor(), - "no match scaling factor"); if (party() == 0) { _share->add(rhs, ret->_share); } else { @@ -113,8 +111,6 @@ void FixedPointTensor::sub(const FixedPointTensor* rhs, template void FixedPointTensor::sub(const TensorAdapter* rhs, FixedPointTensor* ret) const { - PADDLE_ENFORCE_EQ(N, rhs->scaling_factor(), - "no match scaling factor"); if (party() == 0) { _share->sub(rhs, ret->_share); } else { @@ -182,1132 +178,4 @@ void FixedPointTensor::mul_impl(const FixedPointTensor* rhs, } } -/* -template -void FixedPointTensor::truncate(const FixedPointTensor* op, - FixedPointTensor* ret, - size_t scaling_factor) { - if (scaling_factor == 0) { - op->share(0)->copy(ret->mutable_share(0)); - op->share(1)->copy(ret->mutable_share(1)); - } - // implement ABY3's truncate1 algorithm - if (party() == 0) { - // party0 - op->_share[0]->rshift(scaling_factor, ret->_share[0]); - privc_ctx()->network()->template recv(1, *(ret->_share[1])); - - } else if (party() == 1) { - // party1 - auto r_12 = tensor_factory()->template create(op->shape()); - privc_ctx()->template gen_random(*r_12.get(), true); - - op->_share[0]->add(op->_share[1], ret->_share[0]); - // trunc from [SecureML, Thm.1] - ret->_share[0]->negative(ret->_share[0]); - ret->_share[0]->rshift(scaling_factor, ret->_share[0]); - ret->_share[0]->negative(ret->_share[0]); - ret->_share[0]->sub(r_12.get(), ret->_share[0]); - - privc_ctx()->network()->template send(0, *(ret->_share[0])); - r_12->copy(ret->_share[1]); - - } else { - // party2 - op->_share[1]->rshift(scaling_factor, ret->_share[1]); - - auto r_21 = tensor_factory()->template create(op->shape()); - privc_ctx()->template gen_random(*r_21.get(), false); - - r_21->copy(ret->_share[0]); - } - - return; -} - -// Protocol. `truncate3` -// P2 randomly generates r' \in (-2^62, 2^62), randomly generates r'_0, r_0, r_1 in Z_{2^64}, -// P2 compute r'_1 = r' - r'_0, r_2 = r'/2^N - r_0 - r_1, let x2 = r_2 -// P2 send r_0, r'_0 to P0, send r_1, r'_1 to P1 -// P1 and P0 execute "reveal x - r' to P1" -// P1 compute x1 = (x - r') / 2^N + r_1 -// P0 set x0 = r_0 -// P0, P1, P2 invoke reshare() with inputs x0, x1, x2 respectively. -template -void FixedPointTensor::truncate3(const FixedPointTensor* op, - FixedPointTensor* ret, - size_t scaling_factor) { - if (scaling_factor == 0) { - op->share(0)->copy(ret->mutable_share(0)); - op->share(1)->copy(ret->mutable_share(1)); - return; - } - std::vector>> temp; - if (party() == 2) { - for (int i = 0; i < 7; ++i) { - temp.emplace_back( - tensor_factory()->template create(op->shape())); - } - // r', contraint in (-2^62, 2^62) - // notice : when r' is contrainted in (-2^62, 2^62), - // the SD (statistical distance) of x - r' between this - // and r' in Z_{2^64} is equal to |X| / (2^63 + |X|) - // according to http://yuyu.hk/files/ho2.pdf - privc_ctx()->template gen_random_private(*temp[0]); - int64_t contraint_upper = ~((uint64_t) 1 << 62); - int64_t contraint_low = (uint64_t) 1 << 62; - std::for_each(temp[0]->data(), temp[0]->data() + temp[0]->numel(), - [&contraint_upper, &contraint_low] (T& a) { - // contraint -2^62 < a < 2^62 - if (a >= 0) { - a &= contraint_upper; - } else { - a |= contraint_low; - } - }); - - //r'_0, r'_1 - privc_ctx()->template gen_random_private(*temp[1]); - temp[0]->sub(temp[1].get(), temp[2].get()); - // r, r_0, r_1 - temp[0]->rshift(scaling_factor, temp[3].get()); - privc_ctx()->template gen_random_private(*temp[4]); - privc_ctx()->template gen_random_private(*temp[5]); - // r_2 - temp[3]->sub(temp[4].get(), temp[6].get()); - temp[6]->sub(temp[5].get(), temp[6].get()); - - privc_ctx()->network()->template send(1, *temp[2]); - privc_ctx()->network()->template send(1, *temp[5]); - privc_ctx()->network()->template send(0, *temp[1]); - privc_ctx()->network()->template send(0, *temp[4]); - - temp[6]->copy(ret->mutable_share(0)); - - } else if (party() == 1) { - for (int i = 0; i < 4; ++i) { - temp.emplace_back( - tensor_factory()->template create(op->shape())); - } - // r'_1, r_1 - privc_ctx()->network()->template recv(2, *temp[0]); - privc_ctx()->network()->template recv(2, *temp[1]); - // recv x0 - r'_0 from party 0 - privc_ctx()->network()->template recv(0, *temp[2]); - //reveal x - r' to party 1 - op->share(0)->add(op->share(1), temp[3].get()); - temp[3]->add(temp[2].get(), temp[3].get()); - temp[3]->sub(temp[0].get(), temp[3].get()); - // truncate x-r' - temp[3]->rshift(scaling_factor, temp[3].get()); - - temp[3]->add(temp[1].get(), ret->mutable_share(0)); - } else { - for (int i = 0; i < 3; ++i) { - temp.emplace_back( - tensor_factory()->template create(op->shape())); - } - // r'_0, r_0 - privc_ctx()->network()->template recv(2, *temp[0]); - privc_ctx()->network()->template recv(2, *temp[1]); - //send x0 - r'_0 to party 1 - op->share(0)->sub(temp[0].get(), temp[2].get()); - privc_ctx()->network()->template send(1, *temp[2]); - temp[1]->copy(ret->mutable_share(0)); - } - - reshare(ret->share(0), ret->mutable_share(1)); - - // compensation for carry in - auto tensor_carry_in = tensor_factory()->template create(ret->shape()); - assign_to_tensor(tensor_carry_in.get(), (T)1); - tensor_carry_in->scaling_factor() = N; - ret->add(tensor_carry_in.get(), ret); -} - -template -template -void FixedPointTensor::mul_trunc(const FixedPointTensor* lhs, - const FixedPointTensor* rhs, - FixedPointTensor* ret, - MulFunc mul_func) { - - auto r_zero = tensor_factory()->template create(ret->shape()); - privc_ctx()->gen_zero_sharing_arithmetic(*r_zero.get()); - - // temp = _share[0]->mul(rhs->_share[0]) + - // _share[0]->mul(rhs->_share[1]) + - // _share[1]->mul(rhs->_share[0]) + - // r_zero - auto temp = tensor_factory()->template create(ret->shape()); - auto temp1 = tensor_factory()->template create(ret->shape()); - - // use mul_func to fit both element_wise mul and mat mul - (lhs->share(0)->*mul_func)(rhs->share(0), temp.get()); - (lhs->share(0)->*mul_func)(rhs->share(1), temp1.get()); - temp1->add(temp.get(), temp1.get()); - - (lhs->share(1)->*mul_func)(rhs->share(0), temp.get()); - temp1->add(r_zero.get(), temp1.get()); - temp->add(temp1.get(), temp.get()); - - auto temp2 = tensor_factory()->template create(ret->shape()); - auto temp3 = tensor_factory()->template create(ret->shape()); - - TensorAdapter* temp_array[2] = {temp2.get(), temp3.get()}; - - std::shared_ptr> ret_no_trunc = - std::make_shared>(temp_array); - - temp->copy(ret_no_trunc->_share[0]); - reshare(temp.get(), ret_no_trunc->_share[1]); - - truncate3(ret_no_trunc.get(), ret, N); -} - -template -void FixedPointTensor::mul(const TensorAdapter* rhs, - FixedPointTensor* ret) const { - // PADDLE_ENFORCE_EQ(N, rhs->scaling_factor(), - // "no match scaling factor"); - auto temp0 = tensor_factory()->template create(this->shape()); - auto temp1 = tensor_factory()->template create(this->shape()); - std::shared_ptr> temp = - std::make_shared>(temp0.get(), temp1.get()); - - _share[0]->mul(rhs, temp->_share[0]); - _share[1]->mul(rhs, temp->_share[1]); - truncate3(temp.get(), ret, rhs->scaling_factor()); -} - -template -void FixedPointTensor::sum(FixedPointTensor* ret) const { - PADDLE_ENFORCE_EQ(ret->numel(), 1, "output size should be 1."); - T sum1 = (T) 0; - T sum2 = (T) 0; - T* iter_0 = _share[0]->data(); - T* iter_1 = _share[1]->data(); - for (int i = 0; i < this->numel(); ++i) { - sum1 += *(iter_0 + i); - sum2 += *(iter_1 + i); - } - assign_to_tensor(ret->_share[0], sum1); - assign_to_tensor(ret->_share[1], sum2); -} - -template -template class CTensor, - size_t... N1> -void FixedPointTensor::dot_mul(const CTensor* rhs, - FixedPointTensor* ret) const { - PADDLE_ENFORCE_EQ(ret->numel(), 1, "output size should be 1."); - - auto temp0 = tensor_factory()->template create(this->shape()); - auto temp1 = tensor_factory()->template create(this->shape()); - std::shared_ptr> temp = - std::make_shared>(temp0.get(), temp1.get()); - this->mul(rhs, temp.get()); - temp->sum(ret); -} - -template -void FixedPointTensor::mat_mul(const FixedPointTensor* rhs, - FixedPointTensor* ret) const { - mul_trunc(this, rhs, ret, &TensorAdapter::mat_mul); -} - -template -void FixedPointTensor::mat_mul(const TensorAdapter* rhs, - FixedPointTensor* ret) const { - _share[0]->mat_mul(rhs, ret->_share[0]); - _share[1]->mat_mul(rhs, ret->_share[1]); - truncate3(ret, ret, rhs->scaling_factor()); -} - -template< typename T, size_t N> -void FixedPointTensor::div(const TensorAdapter* rhs, - FixedPointTensor* ret) const { - PADDLE_ENFORCE_EQ(N, rhs->scaling_factor(), - "no match scaling factor"); - - auto temp = tensor_factory()->template create(this->shape()); - - double scale = std::pow(2, rhs->scaling_factor()); - auto inverse = [scale](T d) -> T { - return 1.0 * scale / d * scale; }; - std::transform(rhs->data(), rhs->data() + rhs->numel(), - temp->data(), inverse); - temp->scaling_factor() = rhs->scaling_factor(); - - this->mul(temp.get(), ret); -} - -template -void FixedPointTensor::div(const FixedPointTensor* rhs, - FixedPointTensor* ret, - size_t iter, double x0) const { - auto temp0 = tensor_factory()->template create(ret->shape()); - auto temp1 = tensor_factory()->template create(ret->shape()); - std::shared_ptr> temp = - std::make_shared>(temp0.get(), temp1.get()); - reciprocal(rhs, temp.get(), iter, x0); - this->mul(temp.get(), ret); -} - -template -void FixedPointTensor::exp(FixedPointTensor* ret, - size_t iter) const { - // exp approximate: exp(x) = \lim_{n->inf} (1+x/n)^n - // where n = 2^ite - auto pow_iter = tensor_factory()->template create(this->shape()); - assign_to_tensor(pow_iter.get(), (T) (pow(2, N -iter))); - pow_iter->scaling_factor() = N; - - auto tensor_one = tensor_factory()->template create(this->shape()); - assign_to_tensor(tensor_one.get(), (T) 1 << N); - tensor_one->scaling_factor() = N; - - this->mul(pow_iter.get(), ret); - - ret->add(tensor_one.get(), ret); - - for (int i = 0; i < iter; ++i) { - ret->mul(ret, ret); - } -} - -template< typename T, size_t N> -void FixedPointTensor::relu(FixedPointTensor* ret) const { - //utilize polynomial_piecewise - // break_point = {0}, coeff[0] = {0, 0}, coeff[1] = {0, 1} - // break_point.shape = {1, this->shape}, coeff.shape = {2, 2, this->shape} - - auto shape_ = shape(); - //construct break_point - auto b_shape = shape_; - b_shape.insert(b_shape.begin(), 1); - - auto break_point = tensor_factory()->template create(b_shape); - - T* b_ptr = break_point->data(); - for (size_t i = 0; i < break_point->numel(); ++i) { - b_ptr[i] = 0; - } - break_point->scaling_factor() = N; - - //contruct coeff - std::vector c_shape = {2, 2}; - c_shape.insert(c_shape.end(), shape_.begin(), shape_.end()); - - auto coeff = tensor_factory()->template create(c_shape); - - T* c_ptr = coeff->data(); - - for (size_t i = 0; i < 3 * this->numel(); ++i) { - c_ptr[i] = 0; - } - for (size_t i = 3 * this->numel(); i < 4 * this->numel(); ++i) { - c_ptr[i] = (T) 1 << N; - } - coeff->scaling_factor() = N; - - this->polynomial_piecewise(coeff.get(), break_point.get(), ret); -} - -template< typename T, size_t N> -void FixedPointTensor::relu_with_derivative( - FixedPointTensor* ret, BooleanTensor* derivative) const { - - auto shape_ = shape(); - auto zero = tensor_factory()->template create(shape_); - - assign_to_tensor(zero.get(), (T)0); - zero->scaling_factor() = N; - - auto tmp0 = tensor_factory()->template create(shape_); - auto tmp1 = tensor_factory()->template create(shape_); - - BooleanTensor der(tmp0.get(), tmp1.get()); - - gt(zero.get(), &der); - - der.mul(this, ret); - - if (derivative) { - der.share(0)->copy(derivative->share(0)); - der.share(1)->copy(derivative->share(1)); - } -} - -template< typename T, size_t N> -void FixedPointTensor::sigmoid_chebyshev(FixedPointTensor* ret) const { - //utilize Chebyshev polynomial approximation - // more accurate in small range, such as [-4, 4] - auto shape = ret->shape(); - std::vector shape_ = shape; - shape_.insert(shape_.begin(), 10); - auto numel = ret->numel(); - auto coeff = tensor_factory()->template create(shape_); - std::vector w; - w.resize(10, 0.0f); - w[0] = 0.5; - w[1] = 0.2159198015; - w[3] = -0.0082176259; - w[5] = 0.0001825597; - w[7] = -0.0000018848; - w[9] = 0.0000000072; - for (int i = 0; i < 10; ++i) { - for (int j = 0; j < numel; ++j) { - *(coeff->data() + i * numel + j) = (T) (w[i] * pow(2, N)); - } - } - coeff->scaling_factor() = N; - polynomial(coeff.get(), ret); -} - -template< typename T, size_t N> -void FixedPointTensor::sigmoid(FixedPointTensor* ret) const { - //utilize polynomial_piecewise - // break_point = {-2.5, 2.5} - // coeff[0] = {10^-4, 0}, coeff[1] = {0.5, 0.17} - // coeff[2] = {1 - 10^-4, 0} - // break_point.shape = {2, this->shape}, coeff.shape = {3, 2, this->shape} - - //construct break_point - auto shape_ = shape(); - //construct break_point - auto b_shape = shape_; - b_shape.insert(b_shape.begin(), 2); - - auto break_point = tensor_factory()->template create(b_shape); - - T* b_ptr = break_point->data(); - for (size_t i = 0; i < break_point->numel(); ++i) { - b_ptr[i] = 0; - } - for (size_t i = 0; i < break_point->numel() / 2; ++i) { - b_ptr[i] = (T) (-2.5 * pow(2, N)); - } - for (size_t i = break_point->numel() / 2; i < break_point->numel(); ++i) { - b_ptr[i] = (T) (2.5 * pow(2, N)); - } - break_point->scaling_factor() = N; - - //contruct coeff - std::vector c_shape = {3, 2}; - c_shape.insert(c_shape.end(), shape_.begin(), shape_.end()); - - auto coeff = tensor_factory()->template create(c_shape); - - T* c_ptr = coeff->data(); - - size_t numel = this->numel(); - double scale = std::pow(2, N); - for (size_t i = 0; i < numel; ++i) { - c_ptr[i] = 0.0001 * scale; - c_ptr[i + numel] = 0; - c_ptr[i + 2 * numel] = 0.5 * scale; - c_ptr[i + 3 * numel] = 0.17 * scale; - c_ptr[i + 4 * numel] = (1 - 0.0001) * scale; - c_ptr[i + 5 * numel] = 0; - } - coeff->scaling_factor() = N; - - this->polynomial_piecewise(coeff.get(), break_point.get(), ret); -} - -template< typename T, size_t N> -void FixedPointTensor::sigmoid_enhanced(FixedPointTensor* ret) const { - //utilize polynomial_piecewise - // break_point = {-5, -2.5, 2.5, 5} - // coeff[0] = {10^-4, 0}, coeff[1] = {0.145, 0.02776} - // coeff[2] = {0.5, 0.17}, coeff[3] = {0.85498, 0.02776}, coeff[4] = {0.9999, 0} - // break_point.shape = {4, this->shape}, coeff.shape = {5, 2, this->shape} - - //construct break_point - auto shape_ = shape(); - //construct break_point - auto b_shape = shape_; - b_shape.insert(b_shape.begin(), 4); - - auto break_point = tensor_factory()->template create(b_shape); - - T* b_ptr = break_point->data(); - auto numel = ret->numel(); - double scale = std::pow(2, N); - for (size_t i = 0; i < numel; ++i) { - b_ptr[i] = (T) (-5 * scale); - b_ptr[i + numel] = (T) (-2.5 * scale); - b_ptr[i + 2 * numel] = (T) (2.5 * scale); - b_ptr[i + 3 * numel] = (T) (5 * scale); - } - break_point->scaling_factor() = N; - - //contruct coeff - std::vector c_shape = {5, 2}; - c_shape.insert(c_shape.end(), shape_.begin(), shape_.end()); - auto coeff = tensor_factory()->template create(c_shape); - T* c_ptr = coeff->data(); - for (size_t i = 0; i < numel; ++i) { - c_ptr[i] = 0.0001 * scale; - c_ptr[i + numel] = 0; - c_ptr[i + 2 * numel] = 0.145 * scale; - c_ptr[i + 3 * numel] = 0.02776 * scale; - c_ptr[i + 4 * numel] = 0.5 * scale; - c_ptr[i + 5 * numel] = 0.17 * scale; - c_ptr[i + 6 * numel] = 0.85498 * scale; - c_ptr[i + 7 * numel] = 0.02776 * scale; - c_ptr[i + 8 * numel] = 0.9999 * scale; - c_ptr[i + 9 * numel] = 0 * scale; - } - coeff->scaling_factor() = N; - - this->polynomial_piecewise(coeff.get(), break_point.get(), ret); -} - -template< typename T, size_t N> -void FixedPointTensor::softmax(FixedPointTensor* ret, - bool use_relu, bool use_long_div) const { - // softmax axis = -1 - const size_t col = *(shape().end() - 1); - const size_t row = numel() / col; - - std::vector>> temp; - // 11 for allocating temp tensor - for (size_t i = 0; i < 11; ++i) { - temp.emplace_back( - tensor_factory()->template create()); - } - - temp[0]->reshape({row, col}); - temp[1]->reshape({row, col}); - FixedPointTensor x(temp[0].get(), temp[1].get()); - - if (!use_relu) { - temp[2]->reshape({col, row}); - temp[3]->reshape({col, row}); - - temp[4]->reshape({1, row}); - temp[5]->reshape({1, row}); - } - FixedPointTensor x_t(temp[2].get(), temp[3].get()); - FixedPointTensor max_x_t(temp[4].get(), temp[5].get()); - - temp[6]->reshape({row, 1}); - temp[7]->reshape({row, 1}); - FixedPointTensor max_x(temp[6].get(), temp[7].get()); - - temp[8]->reshape({row, col}); - temp[9]->reshape({row, col}); - FixedPointTensor max_x_broadcast(temp[8].get(), temp[9].get()); - - temp[10]->reshape({row, col}); - auto exp_lower_bound = temp[10].get(); - - auto transpose = [](const TensorAdapter* in, TensorAdapter* out) { - // suppose input dims = 2 - const size_t col = in->shape()[1]; - const size_t row = in->shape()[0]; - const size_t numel = in->numel(); - - for (size_t k = 0; k < numel; ++k) { - size_t i = k / row; - size_t j = k % row; - out->data()[k] = in->data()[j * col + i]; - } - }; - - auto broadcast = [](const TensorAdapter* in, TensorAdapter* out) { - // suppose input dims = 2 - // in shape = [row, 1] - const size_t col = out->shape()[1]; - const size_t row = out->shape()[0]; - for (size_t k = 0; k < out->numel(); ++k) { - size_t i = k / col; - out->data()[k] = in->data()[i]; - } - }; - - share(0)->copy(x.mutable_share(0)); - share(1)->copy(x.mutable_share(1)); - - if (use_relu) { - - x.relu(&x); - - } else { // use exp - transpose(x.share(0), x_t.mutable_share(0)); - transpose(x.share(1), x_t.mutable_share(1)); - - // x = max(input - max(input), exp_lower_bound) - x_t.max_pooling(&max_x_t); - - transpose(max_x_t.share(0), max_x.mutable_share(0)); - transpose(max_x_t.share(1), max_x.mutable_share(1)); - - broadcast(max_x.share(0), max_x_broadcast.mutable_share(0)); - broadcast(max_x.share(1), max_x_broadcast.mutable_share(1)); - - x.sub(&max_x_broadcast, &x); - - // n = 64, see exp - assign_to_tensor(exp_lower_bound, (T)(-64 * (1 << N))); - exp_lower_bound->scaling_factor() = N; - - x.sub(exp_lower_bound, &x); - x.relu(&x); - x.add(exp_lower_bound, &x); - - x.exp(&x); - } - - // reuse max_x as sum - reduce(&x, &max_x); - - if (!use_long_div) { // invert sum by Newton's method - // divisor range = [1/col, 1.0] - // TODO: find better iter num & init val - reciprocal(&max_x, &max_x, 16, 0.5 / col); - } - - broadcast(max_x.share(0), max_x_broadcast.mutable_share(0)); - broadcast(max_x.share(1), max_x_broadcast.mutable_share(1)); - - if (use_long_div) { - x.long_div(&max_x_broadcast, &x, 1); - } else { - x.mul(&max_x_broadcast, &x); - } - - x.share(0)->copy(ret->mutable_share(0)); - x.share(1)->copy(ret->mutable_share(1)); -} - -template -void FixedPointTensor::long_div(const FixedPointTensor* rhs, - FixedPointTensor* ret, - size_t int_len) const { - std::vector>> temp; - for (int i = 0; i < 16; ++i) { - temp.emplace_back( - tensor_factory()->template create(ret->shape())); - } - - BooleanTensor sign_lhs(temp[0].get(), temp[1].get()); - BooleanTensor sign_rhs(temp[2].get(), temp[3].get()); - BooleanTensor sign_ret(temp[4].get(), temp[5].get()); - FixedPointTensor abs_lhs(temp[6].get(), temp[7].get()); - FixedPointTensor abs_rhs(temp[8].get(), temp[9].get()); - FixedPointTensor sub_rhs(temp[10].get(), temp[11].get()); - BooleanTensor cmp_res(temp[12].get(), temp[13].get()); - BooleanTensor cmp_res_all(temp[14].get(), temp[15].get()); - - assign_to_tensor(cmp_res_all.share(0), (T)0); - assign_to_tensor(cmp_res_all.share(1), (T)0); - - const size_t msb = sizeof(T) * 8 - 1; - sign_lhs.bit_extract(msb, this); - sign_rhs.bit_extract(msb, rhs); - sign_lhs.bitwise_xor(&sign_rhs, &sign_ret); - - auto lshift = [] (const FixedPointTensor* in, - size_t rhs, - FixedPointTensor* out) { - in->share(0)->lshift(rhs, out->mutable_share(0)); - in->share(1)->lshift(rhs, out->mutable_share(1)); - }; - - // abs = val - 2 * sign * val - auto abs = [lshift] (const FixedPointTensor* in, - const BooleanTensor* sign, - FixedPointTensor* out) { - lshift(in, 1, out); - sign->mul(out, out); - in->sub(out, out); - }; - - auto out0 = tensor_factory()->template create(ret->shape()); - - abs(this, &sign_lhs, &abs_lhs); - - abs(rhs, &sign_rhs, &abs_rhs); - - - for (ssize_t i = int_len - 1; i >= 0; --i) { - lshift(&abs_rhs, i, &sub_rhs); - - - abs_lhs.gt(&sub_rhs, &cmp_res); - - - cmp_res.mul(&sub_rhs, &sub_rhs); - cmp_res.lshift(N + i, &cmp_res); - abs_lhs.sub(&sub_rhs, &abs_lhs); - cmp_res.bitwise_xor(&cmp_res_all, &cmp_res_all); - - } - - for (size_t i = 1; i <= N; ++i) { - truncate3(&abs_rhs, &sub_rhs, i); - abs_lhs.gt(&sub_rhs, &cmp_res); - cmp_res.mul(&sub_rhs, &sub_rhs); - cmp_res.lshift(N - i, &cmp_res); - abs_lhs.sub(&sub_rhs, &abs_lhs); - cmp_res.bitwise_xor(&cmp_res_all, &cmp_res_all); - } - - // use abs_lhs as buffer - cmp_res_all.b2a(&abs_lhs); - - abs(&abs_lhs, &sign_ret, ret); -} - -// reduce last dim -template -void FixedPointTensor::reduce(FixedPointTensor* input, - FixedPointTensor* ret) { - //enfoce shape: input->shape[0 ... (n-2)] == ret shape - auto& shape = input->shape(); - size_t ite_size = shape[shape.size() - 1]; - - T* ret_begin_ptr_0 = ret->_share[0]->data(); - T* ret_begin_ptr_1 = ret->_share[1]->data(); - - T* input_begin_ptr_0 = input->_share[0]->data(); - T* input_begin_ptr_1 = input->_share[1]->data(); - - for (int j = 0; j < ret->numel(); ++j) { - *(ret_begin_ptr_0 + j) = *(input_begin_ptr_0 + j * ite_size); - *(ret_begin_ptr_1 + j) = *(input_begin_ptr_1 + j * ite_size); - for (int i = 1; i < ite_size; ++i) { - *(ret_begin_ptr_0 + j) += - *(input_begin_ptr_0 + j * ite_size + i); - *(ret_begin_ptr_1 + j) += - *(input_begin_ptr_1 + j * ite_size + i); - } - } -} - -template< typename T, size_t N> -void FixedPointTensor::polynomial(const TensorAdapter* coeff, - FixedPointTensor* ret) const { - - // e.g., x.shape = {2, 3}, coeff.shape = {n, 2, 3} (n: polynomial power) - - //TODO: improve performance: [ABY3] - std::vector>> temp; - for (int i = 0; i < 7; ++i) { - temp.emplace_back( - tensor_factory()->template create(this->shape())); - } - std::shared_ptr> x_pow_i = - std::make_shared>( - temp[0].get(), temp[1].get()); - std::shared_ptr> temp_fixed = - std::make_shared>( - temp[2].get(), temp[3].get()); - std::shared_ptr> result = - std::make_shared>( - temp[5].get(), temp[6].get()); - assign_to_tensor(result->_share[0], (T) 0); - assign_to_tensor(result->_share[1], (T) 0); - - //x_pow_i.get() = 1; - assign_to_tensor(x_pow_i.get()->_share[0], (T) 0); - assign_to_tensor(x_pow_i.get()->_share[1], (T) 0); - assign_to_tensor(temp[4].get(), (T) 1 << N); - temp[4]->scaling_factor() = N; - x_pow_i->add(temp[4].get(), x_pow_i.get()); - - for (int i = 0; i < coeff->shape()[0]; ++i) { - auto t = tensor_factory()->template create(); - coeff->slice(i, i + 1, t.get()); - auto t_shape = t->shape(); - // remove leading 1 - t_shape.erase(t_shape.begin()); - t->reshape(t_shape); - x_pow_i->mul(t.get(), temp_fixed.get()); - result->add(temp_fixed.get(), result.get()); - x_pow_i->mul(this, x_pow_i.get()); - } - result->share(0)->copy(ret->mutable_share(0)); - result->share(1)->copy(ret->mutable_share(1)); -} - -template< typename T, size_t N> -void FixedPointTensor::polynomial_piecewise( - const TensorAdapter* coeff, - const TensorAdapter* break_point, - FixedPointTensor* ret) const { - - // e.g., x.shape = {2, 3}, - // break_point.shape = {k, 2, 3} (k: num of break point) - // coeff.shape = {k + 1, n, 2, 3} (n: poly power) - - // copy ret - auto ret_cpy_s0 = tensor_factory()->create_int64_t(ret->share(0)->shape()); - ret->share(0)->copy(ret_cpy_s0.get()); - auto ret_cpy_s1 = tensor_factory()->create_int64_t(ret->share(1)->shape()); - ret->share(1)->copy(ret_cpy_s1.get()); - std::shared_ptr> ret_cpy{new FixedPointTensor(ret_cpy_s0.get(), ret_cpy_s1.get())}; - - std::vector>> msb; - - int len_break_point = break_point->shape()[0]; - int len_coeff = coeff->shape()[0]; - - //number of temp tensor used - int temp_total = 4 * len_break_point + 2 + - 2 * (len_break_point - 1) + 2 + 4 * len_coeff; - std::vector>> temp; - for (int i = 0; i < temp_total; ++i) { - temp.emplace_back(tensor_factory()-> - template create(this->shape())); - } - int temp_index = 0; - - // std::vector>> paddle_t_break; - std::vector>> temp1; - - for (int i = 0; i < break_point->shape()[0]; ++i) { - // msb[i] = msb(x - break_point[i]) - auto t_break = tensor_factory()->template create(); - break_point->slice(i, i + 1, t_break.get()); - - auto t_shape = t_break->shape(); - // remove leading 1 - t_shape.erase(t_shape.begin()); - t_break->reshape(t_shape); - - temp1.emplace_back( - std::make_shared>( - temp[temp_index++].get(), - temp[temp_index++].get())); - this->sub(t_break.get(), temp1[i].get()); - msb.emplace_back(std::make_shared>( - temp[temp_index++].get(), - temp[temp_index++].get())); - msb[i]->bit_extract(sizeof(T) * 8 - 1, temp1[i].get()); - } - - // b[0] = msb[0], b[i + 1] = ~ msb[i] & msb[i + 1] - std::vector>> b; - b.emplace_back(std::make_shared>( - temp[temp_index++].get(), - temp[temp_index++].get())); - b[0] = msb[0]; - - for (int i = 0; i < len_break_point - 1; ++i) { - b.emplace_back(std::make_shared>( - temp[temp_index++].get(), - temp[temp_index++].get())); - - msb[i]->bitwise_not(b[i + 1].get()); - b[i + 1]->bitwise_and(msb[i + 1].get(), b[i + 1].get()); - } - - b.emplace_back(std::make_shared>( - temp[temp_index++].get(), - temp[temp_index++].get())); - msb[len_break_point - 1]->bitwise_not(b[len_break_point].get()); - - // ret += b[i].mul(polynomial(coeff[i])) - std::vector>> temp_fixed; - std::vector>> temp_fixed1; - - assign_to_tensor(ret_cpy->_share[0], (T) 0); - assign_to_tensor(ret_cpy->_share[1], (T) 0); - - for (int i = 0; i < len_coeff; ++i) { - temp_fixed.emplace_back( - std::make_shared>( - temp[temp_index++].get(), - temp[temp_index++].get())); - temp_fixed1.emplace_back( - std::make_shared>( - temp[temp_index++].get(), - temp[temp_index++].get())); - auto t = tensor_factory()->template create(); - coeff->slice(i, i + 1, t.get()); - auto t_shape = t->shape(); - // remove leading 1 - t_shape.erase(t_shape.begin()); - t->reshape(t_shape);; - this->polynomial(t.get(), temp_fixed[i].get()); - b[i]->bit_extract(0, b[i].get()); - b[i]->mul(temp_fixed[i].get(), temp_fixed1[i].get()); - ret_cpy->add(temp_fixed1[i].get(), ret_cpy.get()); - } - ret_cpy->share(0)->copy(ret->mutable_share(0)); - ret_cpy->share(1)->copy(ret->mutable_share(1)); -} - -template -template class CTensor, - size_t... N1> -void FixedPointTensor::lt(const CTensor* rhs, - BooleanTensor* ret) const { - - std::vector>> temp; - for (int i = 0; i < 2; ++i) { - temp.emplace_back( - tensor_factory()->template create(this->shape())); - } - std::shared_ptr> sub_result = - std::make_shared>( - temp[0].get(), temp[1].get()); - this->sub(rhs, sub_result.get()); - ret->bit_extract(sizeof(T) * 8 - 1, sub_result.get()); -} - -template -template class CTensor, - size_t... N1> -void FixedPointTensor::leq(const CTensor* rhs, - BooleanTensor* ret) const { - - this->gt(rhs, ret); - auto tensor_one = tensor_factory()-> - template create(this->shape()); - - assign_to_tensor(tensor_one.get(), (T) 1); - ret->bitwise_xor(tensor_one.get(), ret); -} - -template -template class CTensor, - size_t... N1> -void FixedPointTensor::gt(const CTensor* rhs, - BooleanTensor* ret) const { - - std::vector>> temp; - for (int i = 0; i < 2; ++i) { - temp.emplace_back( - tensor_factory()->template create(this->shape())); - } - std::shared_ptr> sub_result = - std::make_shared>( - temp[0].get(), temp[1].get()); - this->sub(rhs, sub_result.get()); - sub_result->negative(sub_result.get()); - ret->template bit_extract(sizeof(T) * 8 - 1, sub_result.get()); -} - -template -template class CTensor, - size_t... N1> -void FixedPointTensor::geq(const CTensor* rhs, - BooleanTensor* ret) const { - - this->lt(rhs, ret); - auto tensor_one = tensor_factory()-> - template create(this->shape()); - - assign_to_tensor(tensor_one.get(), (T) 1); - ret->bitwise_xor(tensor_one.get(), ret); -} - -template -template class CTensor, - size_t... N1> -void FixedPointTensor::eq(const CTensor* rhs, - BooleanTensor* ret) const { - - this->neq(rhs, ret); - auto tensor_one = tensor_factory()->template create(this->shape()); - assign_to_tensor(tensor_one.get(), (T) 1); - ret->bitwise_xor(tensor_one.get(), ret); -} - -template -template class CTensor, - size_t... N1> -void FixedPointTensor::neq(const CTensor* rhs, - BooleanTensor* ret) const { - std::vector>> temp; - for (int i = 0; i < 4; i ++) { - temp.emplace_back(tensor_factory()-> - template create(this->shape())); - } - std::shared_ptr> lt = - std::make_shared>( - temp[0].get(), temp[1].get()); - std::shared_ptr> gt = - std::make_shared>( - temp[2].get(), temp[3].get()); - - this->lt(rhs, lt.get()); - this->gt(rhs, gt.get()); - lt->bitwise_or(gt.get(), ret); -} - -template -void FixedPointTensor::reciprocal(const FixedPointTensor* op, FixedPointTensor* ret, - size_t iter, double x0) { - auto temp0 = tensor_factory()->template create(ret->shape()); - auto temp1 = tensor_factory()->template create(ret->shape()); - auto temp2 = tensor_factory()->template create(ret->shape()); - auto temp3 = tensor_factory()->template create(ret->shape()); - std::shared_ptr> result = - std::make_shared>(temp0.get(), temp1.get()); - std::shared_ptr> x_copy = - std::make_shared>(temp2.get(), temp3.get()); - assign_to_tensor(result->mutable_share(0), (T) 0); - assign_to_tensor(result->mutable_share(1), (T) 0); - auto tensor_x0 = tensor_factory()->template create(op->shape()); - assign_to_tensor(tensor_x0.get(), (T)(x0 * pow(2, N))); - tensor_x0->scaling_factor() = N; - result->add(tensor_x0.get(), result.get()); - auto tensor_2 = tensor_factory()->template create(op->shape()); - tensor_2->scaling_factor() = N; - assign_to_tensor(tensor_2.get(), (T)(2 << N)); - for (int i = 0; i < iter; ++i) { - result->share(0)->copy(x_copy->mutable_share(0)); - result->share(1)->copy(x_copy->mutable_share(1)); - auto res_ptr = result.get(); - op->mul(res_ptr, res_ptr); - result->negative(res_ptr); - result->add(tensor_2.get(), res_ptr); - x_copy->mul(res_ptr, res_ptr); - } - result->share(0)->copy(ret->mutable_share(0)); - result->share(1)->copy(ret->mutable_share(1)); -} - -template -void FixedPointTensor::inverse_square_root(FixedPointTensor* ret, - size_t iter, - double x0) const { - inverse_square_root(this, ret, iter, x0); -} - -// Newton's method, var naming from Quake III Arena: Q_rsqrt -// float threehalfs = 1.5F; -// x2 = number * 0.5F; -// y = x0; // since 0x5f3759df does not fit fixed-point arithmetic -// y = y * ( threehalfs - ( x2 * y * y ) ); // iteration of Newton's method -template -void FixedPointTensor::inverse_square_root(const FixedPointTensor* op, - FixedPointTensor* ret, - size_t iter, - double x0) { - std::vector>> temp; - for (int i = 0; i < 7; ++i) { - temp.emplace_back( - tensor_factory()->template create(op->shape())); - } - std::shared_ptr> y = - std::make_shared>(temp[0].get(), temp[1].get()); - std::shared_ptr> x2 = - std::make_shared>(temp[2].get(), temp[3].get()); - // x2 = 0.5 * op - truncate3(op, x2.get(), 1); - - assign_to_tensor(y->mutable_share(0), (T)(x0 * pow(2, N))); - assign_to_tensor(y->mutable_share(1), (T)(x0 * pow(2, N))); - - // threehalfs - temp[4]->scaling_factor() = N; - assign_to_tensor(temp[4].get(), T(1.5 * pow(2, N))); - - std::shared_ptr> y_copy = - std::make_shared>(temp[5].get(), temp[6].get()); - - for (int i = 0; i < iter; ++i) { - y->share(0)->copy(y_copy->mutable_share(0)); - y->share(1)->copy(y_copy->mutable_share(1)); - y->mul(y.get(), y.get()); - y->mul(x2.get(), y.get()); - y->negative(y.get()); - y->add(temp[4].get(), y.get()); - y_copy->mul(y.get(), y.get()); - } - y->share(0)->copy(ret->mutable_share(0)); - y->share(1)->copy(ret->mutable_share(1)); -} - -template -template class CTensor, - size_t... N1> -void FixedPointTensor::max(const CTensor* rhs, - FixedPointTensor* ret, - BooleanTensor* cmp) const { - // max = lhs + (rhs - lhs) if rhs > lhs else lhs - std::vector>> temp; - bool output_cmp = cmp != nullptr; - // if cmp is not null, store cmp results in cmp - // else, store them in tmp tensors - for (int i = 0; i < 2 + 2 * (!output_cmp); ++i) { - temp.emplace_back( - tensor_factory()->template create(this->shape())); - } - FixedPointTensor delta(temp[0].get(), temp[1].get()); - sub(rhs, &delta); - BooleanTensor sign; - if (output_cmp) { - sign = *cmp; - } else { - sign = BooleanTensor(temp[2].get(), temp[3].get()); - } - sign.template bit_extract(sizeof(T) * 8 - 1, &delta); - delta.negative(&delta); - sign.mul(&delta, &delta); - add(&delta, ret); -} - -template -void FixedPointTensor::max_pooling(FixedPointTensor* ret, - BooleanTensor* pos) const { - size_t k = shape()[0]; - std::vector>> tmp; - for (int i = 0; i < 4; ++i) { - tmp.emplace_back( - tensor_factory()->template create()); - } - - FixedPointTensor now(tmp[0].get(), tmp[1].get()); - BooleanTensor cmp(tmp[2].get(), tmp[3].get()); - auto cmp_ptr = pos ? &cmp : nullptr; - - share(0)->slice(0, 1, tmp[0].get()); - share(1)->slice(0, 1, tmp[1].get()); - - tmp[0]->copy(ret->mutable_share(0)); - tmp[1]->copy(ret->mutable_share(1)); - - if (pos) { - pos->share(0)->slice(0, 1, tmp[2].get()); - pos->share(1)->slice(0, 1, tmp[3].get()); - - // set init 1, slice_0 is larger than null - if (party() == 0 || party() == 2) { - size_t idx = 2 + (party() == 2); - assign_to_tensor(tmp[idx].get(), T(1)); - assign_to_tensor(tmp[5 - idx].get(), T(0)); - } else { - assign_to_tensor(tmp[2].get(), T(0)); - assign_to_tensor(tmp[3].get(), T(0)); - } - - } - - for (size_t i = 1; i < k; ++i) { - share(0)->slice(i, i + 1, tmp[0].get()); - share(1)->slice(i, i + 1, tmp[1].get()); - - if (pos) { - pos->share(0)->slice(i, i + 1, tmp[2].get()); - pos->share(1)->slice(i, i + 1, tmp[3].get()); - } - - ret->max(&now, ret, cmp_ptr); - - } - - if (pos) { - pos->onehot_from_cmp(); - } - -} -*/ } // namespace privc -- GitLab