提交 8c7c13f1 编写于 作者: S superjomn

make fc math right

上级 dd2bd8e3
...@@ -13,3 +13,33 @@ ...@@ -13,3 +13,33 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/lite/core/tensor.h" #include "paddle/fluid/lite/core/tensor.h"
namespace paddle {
namespace lite {
std::ostream &operator<<(std::ostream &os, const DDim &dims) {
if (dims.empty()) {
os << "[]";
return os;
}
os << "[";
for (int i = 0; i < dims.size() - 1; i++) {
os << dims[i] << " ";
}
os << dims.back() << "]";
return os;
}
std::ostream &operator<<(std::ostream &os, const Tensor &tensor) {
os << "Tensor:" << std::endl;
os << "dim: " << tensor.dims();
for (int i = 0; i < product(tensor.dims()); i++) {
os << tensor.data<float>()[i] << " ";
}
os << "\n";
return os;
}
} // namespace lite
} // namespace paddle
...@@ -14,8 +14,8 @@ ...@@ -14,8 +14,8 @@
#pragma once #pragma once
#include <algorithm> #include <algorithm>
#include <vector>
#include <numeric> #include <numeric>
#include <vector>
#include "memory.h" #include "memory.h"
namespace paddle { namespace paddle {
...@@ -91,5 +91,8 @@ class Tensor { ...@@ -91,5 +91,8 @@ class Tensor {
LoD lod_; LoD lod_;
}; };
std::ostream& operator<<(std::ostream& os, const DDim& dims);
std::ostream& operator<<(std::ostream& os, const Tensor& tensor);
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
...@@ -30,25 +30,18 @@ void FcCompute::Run() { ...@@ -30,25 +30,18 @@ void FcCompute::Run() {
CHECK_GE(param.input->dims().size(), 2UL); CHECK_GE(param.input->dims().size(), 2UL);
CHECK_EQ(param.output->dims().size(), 2UL); CHECK_EQ(param.output->dims().size(), 2UL);
Eigen::Map<const matrix_t> input(
param.input->data<float>(), fc_compute_eigen(
product(param.input->dims().begin(), param.input->data<float>(), // x
param.input->dims().begin() + param.in_num_col_dims),
product(param.input->dims().begin() + param.in_num_col_dims, product(param.input->dims().begin() + param.in_num_col_dims,
param.input->dims().end())); param.input->dims().end()), // x_w
Eigen::Map<const matrix_t> weight(param.w->data<float>(), param.w->dims()[0], product(param.input->dims().begin(),
param.w->dims()[1]); param.input->dims().begin() + param.in_num_col_dims), // x_h
matrix_map_t output(param.output->mutable_data<float>(), param.w->data<float>(), // w
param.output->dims()[0], param.output->dims()[1]); param.w->dims()[1], // w_w
param.w->dims()[0], // w_h
output = weight.transpose() * input; param.bias->data<float>(), // b
param.output->mutable_data<float>());
if (param.bias) {
Eigen::Map<const matrix_t> bias(param.bias->data<float>(),
param.bias->dims()[0],
param.bias->dims()[1]);
output += bias;
}
} }
TargetType FcCompute::target() const { return TARGET(kHost); } TargetType FcCompute::target() const { return TARGET(kHost); }
......
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include <glog/logging.h>
#include <Eigen/Core>
#include "paddle/fluid/lite/core/kernel.h" #include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/operators/fc_op.h" #include "paddle/fluid/lite/operators/fc_op.h"
...@@ -33,6 +35,52 @@ class FcCompute : public OpKernel<TARGET(kHost), PRECISION(kFloat)> { ...@@ -33,6 +35,52 @@ class FcCompute : public OpKernel<TARGET(kHost), PRECISION(kFloat)> {
virtual ~FcCompute() = default; virtual ~FcCompute() = default;
}; };
template <typename T>
void fc_compute_eigen(const T* x, int x_w, int x_h, //
const T* w, int w_w, int w_h, //
const T* b, //
T* out) {
using matrix_t =
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
Eigen::Map<const matrix_t> X(x, x_h, x_w);
Eigen::Map<const matrix_t> W(w, w_h, w_w);
Eigen::Map<matrix_t> Out(out, x_h, w_h);
Out = X * W.transpose();
if (b) {
Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>> B(b, w_h);
Out = Out.array().rowwise() + B.transpose().array();
}
}
template <typename T>
__attribute__((optimize("unroll-loops"))) //
T dot(const T* x, const T* y, int dim) {
T out{};
for (int i = 0; i < dim; i++) {
out += x[i] * y[i];
}
return out;
}
template <typename T>
void fc_compute_naive(const T* x, int x_w, int x_h, //
const T* w, int w_w, int w_h, //
const T* b, //
T* out) {
CHECK_EQ(x_w, w_w);
// out shape: (x_h, w_w)
memset(out, 0, x_h * w_h * sizeof(T));
for (int r = 0; r < x_h; r++) {
for (int c = 0; c < w_h; c++) {
out[r * w_h + c] = dot(&x[r * x_w], &w[c * w_w], w_w) + b[c];
}
}
}
} // namespace host } // namespace host
} // namespace kernels } // namespace kernels
} // namespace lite } // namespace lite
......
...@@ -22,6 +22,37 @@ namespace lite { ...@@ -22,6 +22,37 @@ namespace lite {
namespace kernels { namespace kernels {
namespace host { namespace host {
TEST(fc_compute_naive, test) {
Tensor x, w, b, out, out1;
const int batch_size = 2;
x.Resize({batch_size, 3});
w.Resize({4, 3});
b.Resize({1, 4});
out.Resize({batch_size, 4});
out1.Resize({batch_size, 4});
auto x_data = x.mutable_data<float>();
auto w_data = w.mutable_data<float>();
auto b_data = b.mutable_data<float>();
auto out_data = out.mutable_data<float>();
auto out_data1 = out1.mutable_data<float>();
for (int i = 0; i < product(x.dims()); i++) x_data[i] = i;
for (int i = 0; i < product(w.dims()); i++) w_data[i] = i;
for (int i = 0; i < product(b.dims()); i++) b_data[i] = i;
fc_compute_naive(x_data, 3, batch_size, //
w_data, 3, 4, //
b_data, out_data);
fc_compute_eigen(x_data, 3, batch_size, //
w_data, 3, 4, //
b_data, out_data1);
for (int i = 0; i < product(out.dims()); i++) {
EXPECT_NEAR(out_data[0], out_data1[0], 1e-6);
}
}
TEST(fc_host, init) { TEST(fc_host, init) {
FcCompute fc; FcCompute fc;
ASSERT_EQ(fc.precision(), PRECISION(kFloat)); ASSERT_EQ(fc.precision(), PRECISION(kFloat));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册