diff --git a/paddle/fluid/lite/core/tensor.cc b/paddle/fluid/lite/core/tensor.cc index 4354bb6cb4422ab271058f956c25f2e3f76f402d..b56baf7d17c8eb20170e152d99e090cfd9d7ff71 100644 --- a/paddle/fluid/lite/core/tensor.cc +++ b/paddle/fluid/lite/core/tensor.cc @@ -13,3 +13,33 @@ // limitations under the License. #include "paddle/fluid/lite/core/tensor.h" + +namespace paddle { +namespace lite { + +std::ostream &operator<<(std::ostream &os, const DDim &dims) { + if (dims.empty()) { + os << "[]"; + return os; + } + + os << "["; + for (int i = 0; i < dims.size() - 1; i++) { + os << dims[i] << " "; + } + os << dims.back() << "]"; + return os; +} + +std::ostream &operator<<(std::ostream &os, const Tensor &tensor) { + os << "Tensor:" << std::endl; + os << "dim: " << tensor.dims(); + for (int i = 0; i < product(tensor.dims()); i++) { + os << tensor.data()[i] << " "; + } + os << "\n"; + return os; +} + +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/core/tensor.h b/paddle/fluid/lite/core/tensor.h index 15a2ba3aee1b62cb8d1dd24827f45f5469769e0e..e308913a3dc923c5333894274e5256c968a7628d 100644 --- a/paddle/fluid/lite/core/tensor.h +++ b/paddle/fluid/lite/core/tensor.h @@ -14,8 +14,8 @@ #pragma once #include -#include #include +#include #include "memory.h" namespace paddle { @@ -91,5 +91,8 @@ class Tensor { LoD lod_; }; +std::ostream& operator<<(std::ostream& os, const DDim& dims); +std::ostream& operator<<(std::ostream& os, const Tensor& tensor); + } // namespace lite } // namespace paddle diff --git a/paddle/fluid/lite/kernels/host/fc_compute.cc b/paddle/fluid/lite/kernels/host/fc_compute.cc index f2f3863ca76fcca0392423622c0a0e1ad84f4713..7b0ae58b829110ffc7cf480e8169b1dd0bf073a0 100644 --- a/paddle/fluid/lite/kernels/host/fc_compute.cc +++ b/paddle/fluid/lite/kernels/host/fc_compute.cc @@ -30,25 +30,18 @@ void FcCompute::Run() { CHECK_GE(param.input->dims().size(), 2UL); CHECK_EQ(param.output->dims().size(), 2UL); - Eigen::Map input( - param.input->data(), - product(param.input->dims().begin(), - param.input->dims().begin() + param.in_num_col_dims), + + fc_compute_eigen( + param.input->data(), // x product(param.input->dims().begin() + param.in_num_col_dims, - param.input->dims().end())); - Eigen::Map weight(param.w->data(), param.w->dims()[0], - param.w->dims()[1]); - matrix_map_t output(param.output->mutable_data(), - param.output->dims()[0], param.output->dims()[1]); - - output = weight.transpose() * input; - - if (param.bias) { - Eigen::Map bias(param.bias->data(), - param.bias->dims()[0], - param.bias->dims()[1]); - output += bias; - } + param.input->dims().end()), // x_w + product(param.input->dims().begin(), + param.input->dims().begin() + param.in_num_col_dims), // x_h + param.w->data(), // w + param.w->dims()[1], // w_w + param.w->dims()[0], // w_h + param.bias->data(), // b + param.output->mutable_data()); } TargetType FcCompute::target() const { return TARGET(kHost); } diff --git a/paddle/fluid/lite/kernels/host/fc_compute.h b/paddle/fluid/lite/kernels/host/fc_compute.h index 3b79c5142727f69832fe4bfd09e58c5cbfa697c5..355f1be8503615116c5de8f945d63adc7ac78de0 100644 --- a/paddle/fluid/lite/kernels/host/fc_compute.h +++ b/paddle/fluid/lite/kernels/host/fc_compute.h @@ -13,6 +13,8 @@ // limitations under the License. #pragma once +#include +#include #include "paddle/fluid/lite/core/kernel.h" #include "paddle/fluid/lite/operators/fc_op.h" @@ -33,6 +35,52 @@ class FcCompute : public OpKernel { virtual ~FcCompute() = default; }; +template +void fc_compute_eigen(const T* x, int x_w, int x_h, // + const T* w, int w_w, int w_h, // + const T* b, // + T* out) { + using matrix_t = + Eigen::Matrix; + + Eigen::Map X(x, x_h, x_w); + Eigen::Map W(w, w_h, w_w); + Eigen::Map Out(out, x_h, w_h); + + Out = X * W.transpose(); + + if (b) { + Eigen::Map> B(b, w_h); + Out = Out.array().rowwise() + B.transpose().array(); + } +} + +template +__attribute__((optimize("unroll-loops"))) // +T dot(const T* x, const T* y, int dim) { + T out{}; + for (int i = 0; i < dim; i++) { + out += x[i] * y[i]; + } + return out; +} + +template +void fc_compute_naive(const T* x, int x_w, int x_h, // + const T* w, int w_w, int w_h, // + const T* b, // + T* out) { + CHECK_EQ(x_w, w_w); + // out shape: (x_h, w_w) + memset(out, 0, x_h * w_h * sizeof(T)); + + for (int r = 0; r < x_h; r++) { + for (int c = 0; c < w_h; c++) { + out[r * w_h + c] = dot(&x[r * x_w], &w[c * w_w], w_w) + b[c]; + } + } +} + } // namespace host } // namespace kernels } // namespace lite diff --git a/paddle/fluid/lite/kernels/host/fc_compute_test.cc b/paddle/fluid/lite/kernels/host/fc_compute_test.cc index bc412e80c81ebdaf34da60cf84c0b522197e6f7f..474965e27770b71349595488c7a3f760149fc003 100644 --- a/paddle/fluid/lite/kernels/host/fc_compute_test.cc +++ b/paddle/fluid/lite/kernels/host/fc_compute_test.cc @@ -22,6 +22,37 @@ namespace lite { namespace kernels { namespace host { +TEST(fc_compute_naive, test) { + Tensor x, w, b, out, out1; + const int batch_size = 2; + x.Resize({batch_size, 3}); + w.Resize({4, 3}); + b.Resize({1, 4}); + out.Resize({batch_size, 4}); + out1.Resize({batch_size, 4}); + + auto x_data = x.mutable_data(); + auto w_data = w.mutable_data(); + auto b_data = b.mutable_data(); + auto out_data = out.mutable_data(); + auto out_data1 = out1.mutable_data(); + + for (int i = 0; i < product(x.dims()); i++) x_data[i] = i; + for (int i = 0; i < product(w.dims()); i++) w_data[i] = i; + for (int i = 0; i < product(b.dims()); i++) b_data[i] = i; + + fc_compute_naive(x_data, 3, batch_size, // + w_data, 3, 4, // + b_data, out_data); + fc_compute_eigen(x_data, 3, batch_size, // + w_data, 3, 4, // + b_data, out_data1); + + for (int i = 0; i < product(out.dims()); i++) { + EXPECT_NEAR(out_data[0], out_data1[0], 1e-6); + } +} + TEST(fc_host, init) { FcCompute fc; ASSERT_EQ(fc.precision(), PRECISION(kFloat));