提交 a948eea3 编写于 作者: X xutianbing

clean unused code.

上级 58827e3e
...@@ -188,48 +188,6 @@ extern void hl_param_relu_backward_diff(real* grad_o, ...@@ -188,48 +188,6 @@ extern void hl_param_relu_backward_diff(real* grad_o,
int width, int width,
int height, int height,
int partial_sum); int partial_sum);
/**
* @brief cos sim forward
*
* @param[out] output output data
* @param[in] input1 input1 data(matrix)
* @param[in] input2 input2 data(matrix or vector)
* @param[in] width matrix width
* @param[in] input1_height input1_height
* @param[in] input2_height input2_height
* @param[in] scale scale factor
*/
extern void hl_cossim(real* output,
real* input1,
real* input2,
int width,
int input1_height,
int input2_height,
real scale);
/**
* @brief cos sim derivate
*
* @param[in] grad output grad
* @param[in] output output data
* @param[in] prevOutX input1 data
* @param[in] prevOutY input2 data
* @param[out] prevGradX input1 grad
* @param[out] prevGradY input2 grad
* @param[in] width matrix width
* @param[in] input1_height input1 height
* @param[in] input2_height input2 height
* @param[in] scale scale factor
*/
extern void hl_cossim_derivative(real* grad,
real* output,
real* prevOutX,
real* prevOutY,
real* prevGradX,
real* prevGradY,
int width,
int input1_height,
int input2_height,
real scale);
/** /**
* @brief Matrix addition: A_d[i][j] += scale * B_d[j/channel]. * @brief Matrix addition: A_d[i][j] += scale * B_d[j/channel].
......
...@@ -74,25 +74,6 @@ inline void hl_param_relu_backward_diff(real* grad_o, ...@@ -74,25 +74,6 @@ inline void hl_param_relu_backward_diff(real* grad_o,
int height, int height,
int partial_sum) {} int partial_sum) {}
inline void hl_cossim(real* output,
real* input1,
real* input2,
int width,
int input1_height,
int input2_height,
real scale) {}
inline void hl_cossim_derivative(real* grad,
real* output,
real* prevOutX,
real* prevOutY,
real* prevGradX,
real* prevGradY,
int width,
int input1_height,
int input2_height,
real scale) {}
inline void hl_matrix_add_shared_bias(real* A_d, inline void hl_matrix_add_shared_bias(real* A_d,
real* B_d, real* B_d,
const int channel, const int channel,
......
...@@ -584,177 +584,6 @@ void hl_param_relu_backward_diff(real* grad_o, ...@@ -584,177 +584,6 @@ void hl_param_relu_backward_diff(real* grad_o,
CHECK_SYNC("hl_param_relu_backward_diff failed"); CHECK_SYNC("hl_param_relu_backward_diff failed");
} }
template<int blockSize>
__global__ void KeCosSim(real* output,
real* input1,
real* input2,
int width,
int input1_height,
int input2_height,
real scale) {
const int ty = blockIdx.y;
int tid = threadIdx.x;
__shared__ real xx[blockSize];
__shared__ real yy[blockSize];
__shared__ real xy[blockSize];
xx[tid] = 0.0;
yy[tid] = 0.0;
xy[tid] = 0.0;
__syncthreads();
input1 += ty * width;
if (input2_height > 1) {
input2 += ty * width;
}
for (int index = tid; index < width; index += blockSize) {
real x = input1[index];
real y = input2[index];
xx[tid] += x * x;
yy[tid] += y * y;
xy[tid] += x * y;
}
__syncthreads();
for (int s = blockSize / 2; s > 0; s >>= 1) {
if (tid < s) {
xx[tid] += xx[tid + s];
yy[tid] += yy[tid + s];
xy[tid] += xy[tid + s];
}
__syncthreads();
}
if (tid == 0) {
output[ty] = scale * xy[0] / (sqrt(xx[0]) * sqrt(yy[0]));
}
}
void hl_cossim(real* output,
real* input1,
real* input2,
int width,
int input1_height,
int input2_height,
real scale) {
CHECK_NOTNULL(output);
CHECK_NOTNULL(input1);
CHECK_NOTNULL(input2);
const int blockSize = 256;
dim3 threads(blockSize, 1);
dim3 grid(1, input1_height);
KeCosSim<blockSize><<<grid, threads, 0, STREAM_DEFAULT>>>
(output, input1, input2, width, input1_height, input2_height, scale);
CHECK_SYNC("hl_cossim failed");
}
template<int blockSize>
__global__ void KeCosSimDerivative(real* grad,
real* output,
real* prevOutX,
real* prevOutY,
real* prevGradX,
real* prevGradY,
int width,
int input1_height,
int input2_height,
real scale) {
const int ty = blockIdx.y;
int tid = threadIdx.x;
__shared__ real xx[blockSize];
__shared__ real yy[blockSize];
__shared__ real xy[blockSize];
xx[tid] = 0.0;
yy[tid] = 0.0;
xy[tid] = 0.0;
__syncthreads();
prevOutX += ty * width;
prevGradX += ty * width;
if (input2_height > 1) {
prevOutY += ty * width;
prevGradY += ty * width;
}
for (int index = tid; index < width; index += blockSize) {
real x = prevOutX[index];
real y = prevOutY[index];
xx[tid] += x * x;
yy[tid] += y * y;
xy[tid] += x * y;
}
__syncthreads();
for (int s = blockSize / 2; s > 0; s >>= 1) {
if (tid < s) {
xx[tid] += xx[tid + s];
yy[tid] += yy[tid + s];
xy[tid] += xy[tid + s];
}
__syncthreads();
}
if (xy[0] == 0) {
real reciprocal = 1.0 / (sqrt(xx[0]) * sqrt(yy[0]));
for (int index = tid; index < width; index += blockSize) {
prevGradX[index] +=
scale * grad[ty] * prevOutY[index] * reciprocal;
if (input2_height > 1) {
prevGradY[index] +=
scale * grad[ty] * prevOutX[index] * reciprocal;
} else {
paddle::paddleAtomicAdd(prevGradY + index,
scale * grad[ty] * prevOutX[index] * reciprocal);
}
}
} else {
real reciprocalXY = 1.0 / xy[0];
real reciprocalSquareSumX = 1.0 / xx[0];
real reciprocalSquareSumY = 1.0 / yy[0];
for (int index = tid; index < width; index += blockSize) {
prevGradX[index] += output[ty] * grad[ty] *
(prevOutY[index] * reciprocalXY -
prevOutX[index] * reciprocalSquareSumX);
if (input2_height > 1) {
prevGradY[index] += output[ty] * grad[ty] *
(prevOutX[index] * reciprocalXY -
prevOutY[index] * reciprocalSquareSumY);
} else {
paddle::paddleAtomicAdd(prevGradY + index, output[ty] * grad[ty] *
(prevOutX[index] * reciprocalXY -
prevOutY[index] * reciprocalSquareSumY));
}
}
}
}
void hl_cossim_derivative(real* grad,
real* output,
real* prevOutX,
real* prevOutY,
real* prevGradX,
real* prevGradY,
int width,
int input1_height,
int input2_height,
real scale) {
CHECK_NOTNULL(grad);
CHECK_NOTNULL(output);
CHECK_NOTNULL(prevOutX);
CHECK_NOTNULL(prevOutY);
CHECK_NOTNULL(prevGradX);
CHECK_NOTNULL(prevGradY);
const int blockSize = 256;
dim3 threads(blockSize, 1);
dim3 grid(1, input1_height);
KeCosSimDerivative<blockSize><<<grid, threads, 0, STREAM_DEFAULT>>>
(grad, output, prevOutX, prevOutY, prevGradX, prevGradY, width,
input1_height, input2_height, scale);
CHECK_SYNC("hl_cossim_derivate failed");
}
__global__ void KeMatrixAddSharedBias(real* A, __global__ void KeMatrixAddSharedBias(real* A,
real* B, real* B,
const int channel, const int channel,
......
...@@ -34,7 +34,6 @@ void CosSimForward<DEVICE_TYPE_CPU>(CpuMatrix* out_mat, ...@@ -34,7 +34,6 @@ void CosSimForward<DEVICE_TYPE_CPU>(CpuMatrix* out_mat,
CHECK(in2_mat->getHeight() == 1LU || in2_mat->getHeight() == num_samples); CHECK(in2_mat->getHeight() == 1LU || in2_mat->getHeight() == num_samples);
size_t inc = (in2_mat->getHeight() == 1LU) ? 0 : dim; size_t inc = (in2_mat->getHeight() == 1LU) ? 0 : dim;
for (size_t i = 0; i < num_samples; ++i, x += dim, y += inc) { for (size_t i = 0; i < num_samples; ++i, x += dim, y += inc) {
/// for each row, todo(tianbing), use TensorExpression square2 ?
real square_sum_x = 0; real square_sum_x = 0;
real square_sum_y = 0; real square_sum_y = 0;
real xy = 0; real xy = 0;
...@@ -147,12 +146,15 @@ void CosSimBackward<DEVICE_TYPE_CPU>(const CpuMatrix* out_grad, ...@@ -147,12 +146,15 @@ void CosSimBackward<DEVICE_TYPE_CPU>(const CpuMatrix* out_grad,
} }
/** /**
* \param inputs[0] output value 1, size: nSamples * 1. * \param inouts[0] forward input grad 1, size: nSamples * dim.
* \param inputs[1] input value 1, size: nSamples * dim. * \param inouts[1] forward input grad 2,
* \param inputs[2] input value 2, size: n2 * dim (n2 == 1 or n2 == nSamples). * size: n2 * dim (n2 == 1 or n2 == nSamples).
* \param inputs[3] input grad 1, size: nSamples * dim. *
* \param inputs[4] input grad 2, size: n2 * dim (n2 == 1 or n2 == nSamples). * \param inputs[0] backward loss output grad, size : nSamples * 1.
* \param outputs[0] output grad, size : nSamples * 1. * \param inputs[1] forward output value, size: nSamples * 1.
* \param inputs[2] forward input value 1, size: nSamples * dim.
* \param inputs[3] forward input value 2,
* size: n2 * dim (n2 == 1 or n2 == nSamples).
*/ */
template <DeviceType Device> template <DeviceType Device>
class CosSimBackwardFunc : public FunctionBase { class CosSimBackwardFunc : public FunctionBase {
...@@ -163,35 +165,35 @@ class CosSimBackwardFunc : public FunctionBase { ...@@ -163,35 +165,35 @@ class CosSimBackwardFunc : public FunctionBase {
void calc(const Arguments& inputs, void calc(const Arguments& inputs,
const Arguments& outputs, const Arguments& outputs,
const Arguments& inouts) override { const Arguments& inouts) override {
CHECK_EQ(inputs.size(), 5); CHECK_EQ(inputs.size(), 4);
CHECK_EQ(outputs.size(), 1); CHECK_EQ(outputs.size(), 0);
CHECK_EQ(inouts.size(), 0); CHECK_EQ(inouts.size(), 2);
/// dim of out_grad and out_val == 1, column vector /// dim of out_grad and out_val == 1, column vector
CHECK_EQ(outputs[0].dims_[1], 1UL);
CHECK_EQ(inputs[0].dims_[1], 1UL); CHECK_EQ(inputs[0].dims_[1], 1UL);
CHECK_EQ(inputs[1].dims_[1], 1UL);
/// nSamples of out_grad == out_val == in_val1 == in_grad1 /// nSamples of out_grad == out_val == in_val1 == in_grad1
CHECK_EQ(inputs[0].dims_[0], outputs[0].dims_[0]); CHECK_EQ(inputs[1].dims_[0], inputs[0].dims_[0]);
CHECK_EQ(inputs[1].dims_[0], outputs[0].dims_[0]); CHECK_EQ(inputs[0].dims_[0], inputs[0].dims_[0]);
CHECK_EQ(inputs[3].dims_[0], outputs[0].dims_[0]); CHECK_EQ(inouts[0].dims_[0], inputs[0].dims_[0]);
/// dim of in1_val1 == in_val2 == in_grad1 == in_grad2 /// dim of in1_val1 == in_val2 == in_grad1 == in_grad2
CHECK_EQ(inputs[2].dims_[1], inputs[1].dims_[1]); CHECK_EQ(inputs[3].dims_[1], inputs[2].dims_[1]);
CHECK_EQ(inputs[3].dims_[1], inputs[1].dims_[1]); CHECK_EQ(inouts[0].dims_[1], inputs[2].dims_[1]);
CHECK_EQ(inputs[4].dims_[1], inputs[1].dims_[1]); CHECK_EQ(inouts[1].dims_[1], inputs[2].dims_[1]);
CHECK(outputs[0].getData() && inputs[0].getData() && inputs[1].getData() && CHECK(inputs[0].getData() && inputs[1].getData() && inputs[2].getData() &&
inputs[2].getData() && inputs[3].getData() && inputs[4].getData()); inputs[3].getData() && inouts[0].getData() && inouts[1].getData());
const auto out_grad = std::make_shared<typename MatrixT<Device>::type>( const auto out_grad = std::make_shared<typename MatrixT<Device>::type>(
outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]);
const auto out_val = std::make_shared<typename MatrixT<Device>::type>(
inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]); inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]);
const auto in1_val = std::make_shared<typename MatrixT<Device>::type>( const auto out_val = std::make_shared<typename MatrixT<Device>::type>(
inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]); inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]);
const auto in2_val = std::make_shared<typename MatrixT<Device>::type>( const auto in1_val = std::make_shared<typename MatrixT<Device>::type>(
inputs[2].getData(), inputs[2].dims_[0], inputs[2].dims_[1]); inputs[2].getData(), inputs[2].dims_[0], inputs[2].dims_[1]);
auto in1_grad = std::make_shared<typename MatrixT<Device>::type>( const auto in2_val = std::make_shared<typename MatrixT<Device>::type>(
inputs[3].getData(), inputs[3].dims_[0], inputs[3].dims_[1]); inputs[3].getData(), inputs[3].dims_[0], inputs[3].dims_[1]);
auto in1_grad = std::make_shared<typename MatrixT<Device>::type>(
inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]);
auto in2_grad = std::make_shared<typename MatrixT<Device>::type>( auto in2_grad = std::make_shared<typename MatrixT<Device>::type>(
inputs[4].getData(), inputs[4].dims_[0], inputs[4].dims_[1]); inouts[1].getData(), inouts[1].dims_[0], inouts[1].dims_[1]);
CosSimBackward<Device>(out_grad.get(), CosSimBackward<Device>(out_grad.get(),
out_val.get(), out_val.get(),
......
...@@ -25,9 +25,9 @@ namespace paddle { ...@@ -25,9 +25,9 @@ namespace paddle {
* = scale * \sum_j (in1[i][j] * in2[i][j]) / * = scale * \sum_j (in1[i][j] * in2[i][j]) /
* sqrt(sum_j (in1[i][j]^2) * sum_j (in2[i][j])^2) * sqrt(sum_j (in1[i][j]^2) * sum_j (in2[i][j])^2)
* *
* \param[out] output output data. * \param[out] output output value.
* \param[in] intput1 input data. * \param[in] intput1 input value.
* \param[in] intput2 input data. * \param[in] intput2 input value.
* \param[in] scale default 1.0. * \param[in] scale default 1.0.
* *
*/ */
...@@ -40,12 +40,12 @@ void CosSimForward(typename MatrixT<Device>::type* output, ...@@ -40,12 +40,12 @@ void CosSimForward(typename MatrixT<Device>::type* output,
/** /**
* \brief Cosine Similarity BackWard for Derivative. * \brief Cosine Similarity BackWard for Derivative.
* *
* \param[out] output1 backward loss output grad. * \param[in] output grad backward loss output grad.
* \param[in] input1 forward-output value. * \param[in] output val forward-output value.
* \param[in] input2 forward input value 1. * \param[in] input val1 forward input value 1.
* \param[in] input3 forward input value 2. * \param[in] input val2 forward input value 2.
* \param[in] input4 forward input grad 1. * \param[in/out] input grad forward input grad 1.
* \param[in] input5 forward input grad 2. * \param[in/out] input grad forward input grad 2.
* \param[in] scale default 1.0. * \param[in] scale default 1.0.
* *
*/ */
......
...@@ -97,22 +97,22 @@ void testCosSimBackward(size_t height_x, ...@@ -97,22 +97,22 @@ void testCosSimBackward(size_t height_x,
gpu_in2_grad.copyFrom(cpu_in2_grad); gpu_in2_grad.copyFrom(cpu_in2_grad);
compare.getCpuFunction()->calc( compare.getCpuFunction()->calc(
{Tensor(cpu_out_val.getData(), Dims{height_x, 1}), {Tensor(cpu_out_grad.getData(), Dims{height_x, 1}),
Tensor(cpu_out_val.getData(), Dims{height_x, 1}),
Tensor(cpu_in1_val.getData(), Dims{height_x, width}), Tensor(cpu_in1_val.getData(), Dims{height_x, width}),
Tensor(cpu_in2_val.getData(), Dims{height_x, width}), Tensor(cpu_in2_val.getData(), Dims{height_x, width})},
Tensor(cpu_in1_grad.getData(), Dims{height_x, width}), {},
Tensor(cpu_in2_grad.getData(), Dims{height_x, width})}, {Tensor(cpu_in1_grad.getData(), Dims{height_x, width}),
{Tensor(cpu_out_grad.getData(), Dims{height_x, 1})}, Tensor(cpu_in2_grad.getData(), Dims{height_x, width})});
{});
compare.getGpuFunction()->calc( compare.getGpuFunction()->calc(
{Tensor(gpu_out_val.getData(), Dims{height_x, 1}), {Tensor(gpu_out_grad.getData(), Dims{height_x, 1}),
Tensor(gpu_out_val.getData(), Dims{height_x, 1}),
Tensor(gpu_in1_val.getData(), Dims{height_x, width}), Tensor(gpu_in1_val.getData(), Dims{height_x, width}),
Tensor(gpu_in2_val.getData(), Dims{height_x, width}), Tensor(gpu_in2_val.getData(), Dims{height_x, width})},
Tensor(gpu_in1_grad.getData(), Dims{height_x, width}), {},
Tensor(gpu_in2_grad.getData(), Dims{height_x, width})}, {Tensor(gpu_in1_grad.getData(), Dims{height_x, width}),
{Tensor(gpu_out_grad.getData(), Dims{height_x, 1})}, Tensor(gpu_in2_grad.getData(), Dims{height_x, width})});
{});
autotest::TensorCheckErr(cpu_in1_grad, gpu_in1_grad); autotest::TensorCheckErr(cpu_in1_grad, gpu_in1_grad);
autotest::TensorCheckErr(cpu_in2_grad, gpu_in2_grad); autotest::TensorCheckErr(cpu_in2_grad, gpu_in2_grad);
......
...@@ -79,13 +79,13 @@ void CosSimLayer::backward(const UpdateCallback& callback) { ...@@ -79,13 +79,13 @@ void CosSimLayer::backward(const UpdateCallback& callback) {
auto inG2 = this->getInputGrad(1); auto inG2 = this->getInputGrad(1);
CHECK(outG && outV && inV1 && inV2 && inG1 && inG2); CHECK(outG && outV && inV1 && inV2 && inG1 && inG2);
backward_[0]->calc( backward_[0]->calc(
{Tensor(outV->getData(), Dims{outV->getHeight(), outV->getWidth()}), {Tensor(outG->getData(), Dims{outG->getHeight(), outG->getWidth()}),
Tensor(outV->getData(), Dims{outV->getHeight(), outV->getWidth()}),
Tensor(inV1->getData(), Dims{inV1->getHeight(), inV1->getWidth()}), Tensor(inV1->getData(), Dims{inV1->getHeight(), inV1->getWidth()}),
Tensor(inV2->getData(), Dims{inV2->getHeight(), inV2->getWidth()}), Tensor(inV2->getData(), Dims{inV2->getHeight(), inV2->getWidth()})},
Tensor(inG1->getData(), Dims{inG1->getHeight(), inG1->getWidth()}), {},
Tensor(inG2->getData(), Dims{inG2->getHeight(), inG2->getWidth()})}, {Tensor(inG1->getData(), Dims{inG1->getHeight(), inG1->getWidth()}),
{Tensor(outG->getData(), Dims{outG->getHeight(), outG->getWidth()})}, Tensor(inG2->getData(), Dims{inG2->getHeight(), inG2->getWidth()})});
{});
} }
} }
......
...@@ -169,19 +169,19 @@ void CosSimVecMatLayer::backward(const UpdateCallback& callback) { ...@@ -169,19 +169,19 @@ void CosSimVecMatLayer::backward(const UpdateCallback& callback) {
tmpRow3->setData(outG->rowBuf(i)); tmpRow3->setData(outG->rowBuf(i));
backward_[0]->calc( backward_[0]->calc(
{Tensor(tmpRow2->getData(), {Tensor(tmpRow3->getData(),
Dims{tmpRow3->getHeight(), tmpRow3->getWidth()}),
Tensor(tmpRow2->getData(),
Dims{tmpRow2->getHeight(), tmpRow2->getWidth()}), Dims{tmpRow2->getHeight(), tmpRow2->getWidth()}),
Tensor(tmpMtx0->getData(), Tensor(tmpMtx0->getData(),
Dims{tmpMtx0->getHeight(), tmpMtx0->getWidth()}), Dims{tmpMtx0->getHeight(), tmpMtx0->getWidth()}),
Tensor(tmpRow0->getData(), Tensor(tmpRow0->getData(),
Dims{tmpRow0->getHeight(), tmpRow0->getWidth()}), Dims{tmpRow0->getHeight(), tmpRow0->getWidth()})},
Tensor(tmpMtx1->getData(), {},
{Tensor(tmpMtx1->getData(),
Dims{tmpMtx1->getHeight(), tmpMtx1->getWidth()}), Dims{tmpMtx1->getHeight(), tmpMtx1->getWidth()}),
Tensor(tmpRow1->getData(), Tensor(tmpRow1->getData(),
Dims{tmpRow1->getHeight(), tmpRow1->getWidth()})}, Dims{tmpRow1->getHeight(), tmpRow1->getWidth()})});
{Tensor(tmpRow3->getData(),
Dims{tmpRow3->getHeight(), tmpRow3->getWidth()})},
{});
} }
} }
......
...@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once
#include "Layer.h" #include "Layer.h"
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
......
...@@ -941,59 +941,6 @@ void GpuMatrix::softreluDerivative(Matrix& output) { ...@@ -941,59 +941,6 @@ void GpuMatrix::softreluDerivative(Matrix& output) {
void GpuMatrix::scaledTanh(Matrix& output, real p1, real p2) { void GpuMatrix::scaledTanh(Matrix& output, real p1, real p2) {
BaseMatrix::scaledTanh(output, p1, p2); BaseMatrix::scaledTanh(output, p1, p2);
} }
void GpuMatrix::cosSim(Matrix& output1, Matrix& output2, real scale) {
CHECK(output1.useGpu_ == true && output2.useGpu_ == true)
<< "Matrix type are not equal";
size_t numSamples = getHeight();
size_t dim = output1.getWidth();
CHECK_EQ(getWidth(), 1UL);
CHECK_EQ(output1.getHeight(), numSamples);
CHECK_EQ(output1.getWidth(), output2.getWidth());
real* out = getData();
real* x = output1.getData();
real* y = output2.getData();
hl_cossim(out, x, y, dim, output1.getHeight(), output2.getHeight(), scale);
}
void GpuMatrix::cosSimDerivative(Matrix& output,
Matrix& prevOut1,
Matrix& prevOut2,
Matrix& prevGrad1,
Matrix& prevGrad2,
real scale) {
CHECK(output.useGpu_ == true && prevOut1.useGpu_ == true &&
prevOut2.useGpu_ == true && prevGrad1.useGpu_ == true &&
prevGrad2.useGpu_ == true)
<< "Matrix type are not equal";
CHECK_EQ(getWidth(), 1UL);
CHECK_EQ(output.getWidth(), 1UL);
size_t numSamples = getHeight();
CHECK_EQ(output.getHeight(), numSamples);
CHECK_EQ(prevOut1.getHeight(), numSamples);
CHECK_EQ(prevGrad1.getHeight(), numSamples);
size_t dim = prevOut1.getWidth();
CHECK_EQ(prevOut2.getWidth(), dim);
CHECK_EQ(prevGrad1.getWidth(), dim);
CHECK_EQ(prevGrad2.getWidth(), dim);
real* grad = getData();
real* out = output.getData();
real* prevOutX = prevOut1.getData();
real* prevOutY = prevOut2.getData();
real* prevGradX = prevGrad1.getData();
real* prevGradY = prevGrad2.getData();
hl_cossim_derivative(grad,
out,
prevOutX,
prevOutY,
prevGradX,
prevGradY,
dim,
prevOut1.getHeight(),
prevOut2.getHeight(),
scale);
}
void GpuMatrix::randomizeUniform() { void GpuMatrix::randomizeUniform() {
CHECK(isContiguous()); CHECK(isContiguous());
...@@ -3470,105 +3417,6 @@ void CpuMatrix::softmaxDerivative(Matrix& output, Matrix& sftmaxSum) { ...@@ -3470,105 +3417,6 @@ void CpuMatrix::softmaxDerivative(Matrix& output, Matrix& sftmaxSum) {
} }
} }
void CpuMatrix::cosSim(Matrix& output1, Matrix& output2, real scale) {
size_t numSamples = getHeight();
size_t dim = output1.getWidth();
CHECK_EQ(getWidth(), 1UL);
CHECK_EQ(output1.getHeight(), numSamples);
CHECK_EQ(output1.getWidth(), output2.getWidth());
real* out = getData();
const real* x = output1.getData();
const real* y = output2.getData();
size_t yInc = dim;
if (output2.getHeight() == 1LU) {
yInc = 0;
} else {
CHECK_EQ(output2.getHeight(), numSamples);
}
for (size_t i = 0; i < numSamples; ++i, x += dim, y += yInc) {
real squareSumX = 0;
real squareSumY = 0;
real xy = 0;
for (size_t j = 0; j < dim; ++j) {
squareSumX += _square(x[j]);
squareSumY += _square(y[j]);
xy += x[j] * y[j];
}
CHECK(squareSumX > 0 && squareSumY > 0);
out[i] = scale * xy / (std::sqrt(squareSumX) * std::sqrt(squareSumY));
}
}
void CpuMatrix::cosSimDerivative(Matrix& output,
Matrix& prevOut1,
Matrix& prevOut2,
Matrix& prevGrad1,
Matrix& prevGrad2,
real scale) {
CHECK(output.useGpu_ == false) << "Matrix type are not equal";
CHECK_EQ(getWidth(), 1UL);
CHECK_EQ(output.getWidth(), 1UL);
size_t numSamples = getHeight();
CHECK_EQ(output.getHeight(), numSamples);
CHECK_EQ(prevOut1.getHeight(), numSamples);
CHECK_EQ(prevGrad1.getHeight(), numSamples);
size_t dim = prevOut1.getWidth();
CHECK_EQ(prevOut2.getWidth(), dim);
CHECK_EQ(prevGrad1.getWidth(), dim);
CHECK_EQ(prevGrad2.getWidth(), dim);
const real* grad = getData();
const real* out = output.getData();
const real* prevOutX = prevOut1.getData();
const real* prevOutY = prevOut2.getData();
real* prevGradX = prevGrad1.getData();
real* prevGradY = prevGrad2.getData();
size_t yInc = dim;
if (prevOut2.getHeight() == 1LU) {
yInc = 0;
CHECK_EQ(prevGrad2.getHeight(), 1LU);
} else {
CHECK_EQ(prevOut2.getHeight(), numSamples);
CHECK_EQ(prevGrad2.getHeight(), numSamples);
}
for (size_t i = 0; i < numSamples; ++i,
prevOutX += dim,
prevOutY += yInc,
prevGradX += dim,
prevGradY += yInc) {
real squareSumX = 0;
real squareSumY = 0;
real xy = 0;
for (size_t j = 0; j < dim; ++j) {
squareSumX += _square(prevOutX[j]);
squareSumY += _square(prevOutY[j]);
xy += prevOutX[j] * prevOutY[j];
}
CHECK(squareSumX > 0 && squareSumY > 0);
if (xy == 0) {
real reciprocal = 1.0f / (std::sqrt(squareSumX) * std::sqrt(squareSumY));
for (size_t j = 0; j < dim; ++j) {
prevGradX[j] += scale * grad[i] * prevOutY[j] * reciprocal;
prevGradY[j] += scale * grad[i] * prevOutX[j] * reciprocal;
}
} else {
real reciprocalXY = 1.0f / xy;
real reciprocalSquareSumX = 1.0f / squareSumX;
real reciprocalSquareSumY = 1.0f / squareSumY;
for (size_t j = 0; j < dim; ++j) {
prevGradX[j] += out[i] * grad[i] * (prevOutY[j] * reciprocalXY -
prevOutX[j] * reciprocalSquareSumX);
prevGradY[j] += out[i] * grad[i] * (prevOutX[j] * reciprocalXY -
prevOutY[j] * reciprocalSquareSumY);
}
}
}
}
void CpuMatrix::sumOfSquares(Matrix& output, Matrix& label) { void CpuMatrix::sumOfSquares(Matrix& output, Matrix& label) {
CHECK(output.useGpu_ == false && label.useGpu_ == false) CHECK(output.useGpu_ == false && label.useGpu_ == false)
<< "Matrix type are not equal"; << "Matrix type are not equal";
......
...@@ -799,26 +799,6 @@ public: ...@@ -799,26 +799,6 @@ public:
LOG(FATAL) << "Not implemented"; LOG(FATAL) << "Not implemented";
} }
/**
* cosine similarity, for each row i,
* this[i] = cos(output1[i], output2[i])
*
* output2 can only have one row, then for each row i,
* this[i] = cos(output1[i], output2[0])
*/
virtual void cosSim(Matrix& output1, Matrix& output2, real scale = 1.0f) {
LOG(FATAL) << "Not implemented";
}
virtual void cosSimDerivative(Matrix& output,
Matrix& prevOut1,
Matrix& prevOut2,
Matrix& prevGrad1,
Matrix& prevGrad2,
real scale = 1.0f) {
LOG(FATAL) << "Not implemented";
}
/// print out the values of elements to os /// print out the values of elements to os
virtual void print(std::ostream& os) const { virtual void print(std::ostream& os) const {
LOG(FATAL) << "Not implemented"; LOG(FATAL) << "Not implemented";
...@@ -1324,14 +1304,6 @@ public: ...@@ -1324,14 +1304,6 @@ public:
void softreluDerivative(Matrix& output); void softreluDerivative(Matrix& output);
void scaledTanh(Matrix& output, real p1, real p2); void scaledTanh(Matrix& output, real p1, real p2);
void cosSim(Matrix& output1, Matrix& output2, real scale);
void cosSimDerivative(Matrix& output,
Matrix& prevOut1,
Matrix& prevOut2,
Matrix& prevGrad1,
Matrix& prevGrad2,
real scale);
virtual void print(std::ostream& os) const; virtual void print(std::ostream& os) const;
virtual void print(std::ostream& os, size_t height, size_t width) const; virtual void print(std::ostream& os, size_t height, size_t width) const;
...@@ -1752,14 +1724,6 @@ public: ...@@ -1752,14 +1724,6 @@ public:
void softreluDerivative(Matrix& output); void softreluDerivative(Matrix& output);
void scaledTanh(Matrix& output, real p1, real p2); void scaledTanh(Matrix& output, real p1, real p2);
void cosSim(Matrix& output1, Matrix& output2, real scale);
void cosSimDerivative(Matrix& output,
Matrix& prevOut1,
Matrix& prevOut2,
Matrix& prevGrad1,
Matrix& prevGrad2,
real scale);
void print(std::ostream& os) const; void print(std::ostream& os) const;
void print(std::ostream& os, size_t height, size_t width) const; void print(std::ostream& os, size_t height, size_t width) const;
void printOneRow(std::ostream& os, size_t idx) const; void printOneRow(std::ostream& os, size_t idx) const;
......
...@@ -181,28 +181,6 @@ TEST(Matrix, copyByRowIndex) { ...@@ -181,28 +181,6 @@ TEST(Matrix, copyByRowIndex) {
} }
} }
void testCosSim(int heightX, int heightY, int width, real scale) {
AutoCompare test(heightX, 1);
CpuMatrix arg1(heightX, width);
CpuMatrix arg2(heightY, width);
arg1.randomizeUniform();
arg2.randomizeUniform();
arg2.add(-0.5);
test.cmpWithArg(&Matrix::cosSim, arg1, arg2, scale);
}
TEST(Matrix, cosSim) {
for (auto heightX : {10, 100, 1000}) {
for (auto heightY : {1, heightX}) {
for (auto width : {10, 100, 1000}) {
for (auto scale : {1.0, 2.0}) {
testCosSim(heightX, heightY, width, scale);
}
}
}
}
}
void testParamReluForward(int height, int width, int w_height, int w_width) { void testParamReluForward(int height, int width, int w_height, int w_width) {
AutoCompare test(height, width); AutoCompare test(height, width);
CpuMatrix arg1(height, width); CpuMatrix arg1(height, width);
......
...@@ -720,61 +720,6 @@ TEST(Matrix, sequenceAvgForward) { ...@@ -720,61 +720,6 @@ TEST(Matrix, sequenceAvgForward) {
} }
} }
void testCosSimDerivate(int heightX, int heightY, int width, real scale) {
MatrixPtr prevOutX = CpuMatrix::create(heightX, width, false, false);
MatrixPtr prevOutY = CpuMatrix::create(heightY, width, false, false);
MatrixPtr grad = CpuMatrix::create(heightX, 1, false, false);
MatrixPtr output = CpuMatrix::create(heightX, 1, false, false);
MatrixPtr prevGradX = CpuMatrix::create(heightX, width, false, false);
MatrixPtr prevGradY = CpuMatrix::create(heightY, width, false, false);
prevOutX->randomizeUniform();
prevOutY->randomizeUniform();
grad->randomizeUniform();
output->randomizeUniform();
prevGradX->randomizeUniform();
prevGradY->randomizeUniform();
MatrixPtr prevOutXGpu = GpuMatrix::create(heightX, width, false, true);
MatrixPtr prevOutYGpu = GpuMatrix::create(heightY, width, false, true);
MatrixPtr gradGpu = GpuMatrix::create(heightX, 1, false, true);
MatrixPtr outputGpu = GpuMatrix::create(heightX, 1, false, true);
MatrixPtr prevGradXGpu = GpuMatrix::create(heightX, width, false, true);
MatrixPtr prevGradYGpu = GpuMatrix::create(heightY, width, false, true);
prevOutXGpu->copyFrom(*prevOutX);
prevOutYGpu->copyFrom(*prevOutY);
gradGpu->copyFrom(*grad);
outputGpu->copyFrom(*output);
prevGradXGpu->copyFrom(*prevGradX);
prevGradYGpu->copyFrom(*prevGradY);
grad->cosSimDerivative(
*output, *prevOutX, *prevOutY, *prevGradX, *prevGradY, scale);
gradGpu->cosSimDerivative(*outputGpu,
*prevOutXGpu,
*prevOutYGpu,
*prevGradXGpu,
*prevGradYGpu,
scale);
TensorCheckErr(*prevGradX, *prevGradXGpu);
TensorCheckErr(*prevGradY, *prevGradYGpu);
}
TEST(Matrix, cosSimDerivate) {
for (auto heightX : {1, 10, 100}) {
for (auto heightY : {1, heightX}) {
for (auto width : {1, 10, 100}) {
for (auto scale : {1.0, 2.0}) {
testCosSimDerivate(heightX, heightY, width, scale);
}
}
}
}
}
void testParamReluBackwardDiff(int height, void testParamReluBackwardDiff(int height,
int width, int width,
int w_height, int w_height,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册