提交 a30b9743 编写于 作者: D dingweihao

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle-Lite into dwh_dev

......@@ -97,7 +97,7 @@ function(compile_flatbuffers_schema_to_cpp_opt TARGET SRC_FBS OPT)
OUTPUT ${GEN_HEADER}
COMMAND "${FLATBUFFERS_FLATC_EXECUTABLE}"
--cpp --gen-mutable --gen-object-api --reflect-names
--cpp-ptr-type flatbuffers::unique_ptr # Used to test with C++98 STLs
--force-empty --force-empty-vectors
${OPT}
-I "${CMAKE_CURRENT_SOURCE_DIR}/tests/include_test"
-o "${CMAKE_CURRENT_SOURCE_DIR}/${SRC_FBS_DIR}"
......
......@@ -59,9 +59,9 @@ void TestModel(const std::vector<Place>& valid_places) {
}
auto* image_tensor = predictor.GetInput(1);
image_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 2})));
data = image_tensor->mutable_data<float>();
data[0] = FLAGS_im_height;
data[1] = FLAGS_im_width;
auto* data_1 = image_tensor->mutable_data<int>();
data_1[0] = FLAGS_im_height;
data_1[1] = FLAGS_im_width;
for (int i = 0; i < FLAGS_warmup; ++i) {
predictor.Run();
......
......@@ -763,24 +763,6 @@ void act_thresholded_relu<float>(
}
}
#ifdef LITE_WITH_TRAIN
template <>
void act_square_grad(const float* din,
const float* dout_grad,
float* din_grad,
int size,
int threads) {
const float* ptr_out_grad = dout_grad;
float* ptr_in_grad = din_grad;
for (int i = 0; i < size; ++i) {
ptr_in_grad[0] = ptr_out_grad[0] * 2.0 * din[0];
ptr_out_grad++;
ptr_in_grad++;
din++;
}
}
#endif
} // namespace math
} // namespace arm
} // namespace lite
......
......@@ -90,12 +90,6 @@ template <typename T>
void act_thresholded_relu(
const T* din, T* dout, int size, float threshold, int threads);
#ifdef LITE_WITH_TRAIN
template <typename T>
void act_square_grad(
const T* din, const T* dout_grad, T* din_grad, int size, int threads);
#endif
} // namespace math
} // namespace arm
} // namespace lite
......
......@@ -192,7 +192,8 @@ void ConvBNFuser::InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) {
} else if (is_weight_quantization) {
std::string scale_name = conv_weight_name + "_quant_scale";
if (conv_op_desc->HasAttr(scale_name)) {
auto scale = conv_op_desc->GetAttr<std::vector<float>>(scale_name);
std::vector<float> scale =
conv_op_desc->GetAttr<std::vector<float>>(scale_name);
CHECK_EQ(scale.size(), alpha_tensor.numel());
for (size_t i = 0; i < scale.size(); i++) {
scale[i] *= alpha_data[i];
......
......@@ -84,11 +84,12 @@ cpp::OpDesc TransposeSoftmaxTransposeFuser::GenOpDesc(
op_desc.SetInput("X", {matched.at("x1")->arg()->name});
op_desc.SetOutput("Out", {matched.at("out")->arg()->name});
op_desc.SetAttr("axis",
matched.at("transpose1")
*(matched.at("transpose1")
->stmt()
->op_info()
->GetAttr<std::vector<int>>("axis")
.back());
.end() -
1));
return op_desc;
}
......
......@@ -62,15 +62,17 @@ std::string Visualize(mir::SSAGraph* graph) {
<< string_trunc(op_info->GetAttr<std::string>(attr_name)) << "\"";
break;
case AttrType::FLOATS: {
auto vals = op_info->GetAttr<std::vector<float>>(attr_name);
std::vector<float> vals =
op_info->GetAttr<std::vector<float>>(attr_name);
os << ":floats: {" + Join(vals, ",") << "}";
} break;
case AttrType::INTS: {
auto vals = op_info->GetAttr<std::vector<int>>(attr_name);
std::vector<int> vals = op_info->GetAttr<std::vector<int>>(attr_name);
os << ":ints: {" + Join(vals, ",") + "}";
} break;
case AttrType::STRINGS: {
auto vals = op_info->GetAttr<std::vector<std::string>>(attr_name);
std::vector<std::string> vals =
op_info->GetAttr<std::vector<std::string>>(attr_name);
os << ":strings: {" + string_trunc(Join(vals, ",")) << "}";
} break;
default:
......
......@@ -195,7 +195,7 @@ void Program::Build(const cpp::ProgramDesc& prog) {
CHECK(ops_.empty()) << "Executor duplicate Build found";
// Create operators.
auto program = prog;
auto& program = prog;
CHECK(program.BlocksSize());
auto& main_block = *program.GetBlock<cpp::BlockDesc>(0);
for (size_t i = 0; i < main_block.OpsSize(); ++i) {
......@@ -262,7 +262,7 @@ void Program::PrepareWorkspace(const cpp::ProgramDesc& prog,
}
};
auto program = prog;
auto& program = prog;
CHECK(program.BlocksSize());
for (size_t b = 0; b < program.BlocksSize(); ++b) {
auto& main_block = *program.GetBlock<cpp::BlockDesc>(b);
......
......@@ -46,7 +46,8 @@ struct Program {
const std::shared_ptr<Scope>& root,
const std::vector<Place>& valid_places,
const std::vector<std::string>& var_names = {})
: scope_(root), valid_places_(valid_places), desc_(desc) {
: scope_(root), valid_places_(valid_places) {
desc_.CopyFrom(desc);
CHECK(scope_) << "scope should be init first";
VLOG(4) << "prepare work";
PrepareWorkspace(desc, var_names);
......
......@@ -103,7 +103,6 @@ add_kernel(deformable_conv_compute_arm ARM extra SRCS deformable_conv_compute.cc
add_kernel(mean_compute_arm ARM extra SRCS mean_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(mean_grad_compute_arm ARM train SRCS mean_grad_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(activation_grad_compute_arm ARM train SRCS activation_grad_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(elementwise_grad_compute_arm ARM train SRCS elementwise_grad_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(mul_grad_compute_arm ARM train SRCS mul_grad_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(sgd_compute_arm ARM train SRCS sgd_compute.cc DEPS ${lite_kernel_deps} math_arm)
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include <bmcompiler_if.h>
#include <math.h>
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/bm/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
......@@ -64,10 +65,16 @@ int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto* bias_data = bias->mutable_data<float>();
auto* mean_data = mean->mutable_data<float>();
auto* variance_data = variance->mutable_data<float>();
float* new_bias = static_cast<float*>(malloc(bias->memory_size()));
float* new_scale = static_cast<float*>(malloc(scale->memory_size()));
CHECK(new_bias != nullptr);
CHECK(new_scale != nullptr);
for (int c = 0; c < channel_size; c++) {
float inv_scale = 1.f / (std::sqrt(variance_data[c] + epsilon));
bias_data[c] = bias_data[c] - inv_scale * scale_data[c] * mean_data[c];
scale_data[c] = inv_scale * scale_data[c];
new_bias[c] = bias_data[c] - inv_scale * scale_data[c] * mean_data[c];
new_scale[c] = inv_scale * scale_data[c];
}
const int input_num = 1;
......@@ -86,11 +93,13 @@ int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
output_dims.size(),
static_cast<const char*>(output_var_name.c_str()),
static_cast<const char*>(unique_op_name.c_str()),
static_cast<const float*>(scale->mutable_data<float>()),
static_cast<const float*>(bias->mutable_data<float>()),
static_cast<const float*>(new_scale),
static_cast<const float*>(new_bias),
1,
1,
1);
free(new_scale);
free(new_bias);
delete[] shape;
delete[] name;
delete[] dim;
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include <bmcompiler_if.h>
#include <math.h>
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/bm/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
......
......@@ -76,6 +76,8 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) {
static_cast<const char*>(output_var_name.c_str()),
0,
0,
0,
0,
type);
}
graph->AddNode(output_var_name);
......
......@@ -76,9 +76,10 @@ bool SubgraphEngine::BuildDeviceProgram() {
return false;
}
}
std::string net_name = "bmnetc_f32umodel";
std::string net_name = "bmnet_f32bmodel";
auto unique_net_name = lite::subgraph::bm::UniqueName(net_name);
__bmcompile_opt(
graph.GetCompilerHandle(), const_cast<char*>(net_name.c_str()), 1);
graph.GetCompilerHandle(), const_cast<char*>(unique_net_name.c_str()), 2);
void* bmodel_data = nullptr;
unsigned int data_size = 0;
bm_hd_ = static_cast<bm_handle_t>(ctx.GetHandle());
......
......@@ -19,6 +19,7 @@ add_kernel(read_from_array_compute_host Host extra SRCS read_from_array_compute.
add_kernel(assign_compute_host Host extra SRCS assign_compute.cc DEPS ${lite_kernel_deps})
add_kernel(retinanet_detection_output_compute_host Host extra SRCS retinanet_detection_output_compute.cc DEPS ${lite_kernel_deps})
add_kernel(where_index_compute_host Host extra SRCS where_index_compute.cc DEPS ${lite_kernel_deps})
add_kernel(activation_grad_compute_host Host train SRCS activation_grad_compute.cc DEPS ${lite_kernel_deps})
if(LITE_BUILD_EXTRA)
lite_cc_test(test_where_index_compute_host SRCS where_index_compute.cc DEPS where_index_compute_host)
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/host/activation_grad_compute.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace host {
void SquareGradCompute::Run() {
auto& param = this->Param<param_t>();
CHECK(param.X);
auto out_grad_dims = param.Out_grad->dims();
auto out_grad_data = param.Out_grad->data<float>();
auto x_data = param.X->data<float>();
auto x_grad_data = param.X_grad->mutable_data<float>();
for (int i = 0; i < out_grad_dims.production(); i++) {
x_grad_data[i] = out_grad_data[i] * 2.0 * x_data[i];
}
}
void ReluGradCompute::Run() {
auto& param = this->Param<param_t>();
CHECK(param.X);
auto out_grad_dims = param.Out_grad->dims();
auto out_grad_data = param.Out_grad->data<float>();
auto x_data = param.X->data<float>();
auto x_grad_data = param.X_grad->mutable_data<float>();
for (int i = 0; i < out_grad_dims.production(); i++) {
x_grad_data[i] = x_data[i] > 0 ? out_grad_data[i] : 0.0;
}
}
void TanhGradCompute::Run() {
auto& param = this->Param<param_t>();
CHECK(param.Out);
auto out_grad_dims = param.Out_grad->dims();
auto out_grad_data = param.Out_grad->data<float>();
auto out_data = param.Out->data<float>();
auto x_grad_data = param.X_grad->mutable_data<float>();
for (int i = 0; i < out_grad_dims.production(); i++) {
x_grad_data[i] = out_grad_data[i] *
(static_cast<float>(1.0) - out_data[i] * out_data[i]);
}
}
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(square_grad,
kHost,
kFloat,
kNCHW,
paddle::lite::kernels::host::SquareGradCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kHost))})
.BindInput("Out@GRAD", {LiteType::GetTensorTy(TARGET(kHost))})
.BindOutput("X@GRAD", {LiteType::GetTensorTy(TARGET(kHost))})
.Finalize();
REGISTER_LITE_KERNEL(relu_grad,
kHost,
kFloat,
kNCHW,
paddle::lite::kernels::host::SquareGradCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kHost))})
.BindInput("Out@GRAD", {LiteType::GetTensorTy(TARGET(kHost))})
.BindOutput("X@GRAD", {LiteType::GetTensorTy(TARGET(kHost))})
.Finalize();
REGISTER_LITE_KERNEL(tanh_grad,
kHost,
kFloat,
kNCHW,
paddle::lite::kernels::host::SquareGradCompute,
def)
.BindInput("Out", {LiteType::GetTensorTy(TARGET(kHost))})
.BindInput("Out@GRAD", {LiteType::GetTensorTy(TARGET(kHost))})
.BindOutput("X@GRAD", {LiteType::GetTensorTy(TARGET(kHost))})
.Finalize();
......@@ -20,9 +20,9 @@
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
namespace host {
class SquareGradCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
class SquareGradCompute : public KernelLite<TARGET(kHost), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationGradParam;
......@@ -31,7 +31,25 @@ class SquareGradCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
virtual ~SquareGradCompute() = default;
};
} // namespace arm
class ReluGradCompute : public KernelLite<TARGET(kHost), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationGradParam;
void Run() override;
virtual ~ReluGradCompute() = default;
};
class TanhGradCompute : public KernelLite<TARGET(kHost), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationGradParam;
void Run() override;
virtual ~TanhGradCompute() = default;
};
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
......@@ -107,8 +107,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CNML_FILTER,
CNML_NCHW,
graph->FPType());
const auto weight_scale =
op_info->GetAttr<std::vector<float>>("weight_scale");
const auto weight_scale = op_info->GetInputScale(filter_var_name);
if (filter->precision() == PrecisionType::kUnk ||
filter->precision() == PrecisionType::kInt8) {
......@@ -162,7 +161,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
graph->BindConstData(bias_var_name, bias);
}
const auto input_scale = op_info->GetAttr<float>("input_scale");
const auto input_scale = op_info->GetInputScale(input_var_name)[0];
bool use_first_conv = false;
if (lite::TargetWrapperMlu::UseFirstConv() && input_dims[1] == 3) {
......
......@@ -224,8 +224,10 @@ void test_conv(int bs,
opdesc_mlu.SetAttr("groups", groups);
opdesc_mlu.SetAttr("fuse_relu", static_cast<bool>(fuse_relu));
opdesc_mlu.SetAttr("weight_scale", std::vector<float>(oc, filter_scale));
opdesc_mlu.SetAttr("input_scale", input_scale);
OpInfo op_info(opdesc_mlu);
op_info.SetInputScale(filter_int_var_name,
std::vector<float>(oc, filter_scale));
op_info.SetInputScale(input_var_name, {input_scale});
if (has_bias) {
if (is_channel_bias) {
......@@ -234,7 +236,7 @@ void test_conv(int bs,
bias->Resize({output_shape});
}
FillTensor<float>(bias);
opdesc_mlu.SetInput("Bias", {bias_var_name});
op_info.SetInput("Bias", {bias_var_name});
}
for (int i = 0; i < bs; i++) {
......@@ -248,7 +250,7 @@ void test_conv(int bs,
}
// create and convert op to MLU model, then run it on MLU
auto op = CreateOp<operators::ConvOpLite>(opdesc_mlu, &scope);
auto op = CreateOp<operators::ConvOpLite>(op_info, &scope);
LaunchOp(op, {input_var_name}, {output_var_name});
// compare results
auto* output_data = output->mutable_data<float>();
......
......@@ -68,7 +68,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto w_tensor = graph->AddNode(
w_var_name, cnml_w_shape, CNML_FILTER, CNML_NCHW, graph->FPType());
auto input_scale = op_info->GetAttr<float>("input_scale");
auto input_scale = op_info->GetInputScale(x_var_name)[0];
auto output_tensor = graph->AddNode(output_var_name,
output->dims().Vectorize(),
......@@ -101,7 +101,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
bias_tensor ? bias_tensor->mlu_tensor() : nullptr));
graph->SetComputingDataType(
fc_op, graph->GetNode(x_var_name)->mlu_tensor(), 1 / input_scale);
auto weight_scale = op_info->GetAttr<std::vector<float>>("weight_scale");
auto weight_scale = op_info->GetInputScale(w_var_name);
// LOG(INFO) << "W precision " << int(w->precision());
if (w->precision() == PrecisionType::kUnk ||
......
......@@ -131,14 +131,15 @@ void test_fc(const std::vector<int64_t>& input_shape,
fc_op_desc_mlu.SetOutput("Out", {out_var_name});
fc_op_desc_mlu.SetAttr("in_num_col_dims", static_cast<int>(in_num_col_dims));
fc_op_desc_mlu.SetAttr("weight_scale",
OpInfo op_info(fc_op_desc_mlu);
op_info.SetInputScale(w_int_var_name,
std::vector<float>(w_shape[1], w_scale));
fc_op_desc_mlu.SetAttr("input_scale", input_scale);
op_info.SetInputScale(input_var_name, {input_scale});
if (has_bias) {
fc_op_desc_mlu.SetInput("Bias", {bias_var_name});
op_info.SetInput("Bias", {bias_var_name});
}
auto fc_op_mlu = CreateOp<operators::FcOpLite>(fc_op_desc_mlu, &scope);
auto fc_op_mlu = CreateOp<operators::FcOpLite>(op_info, &scope);
Tensor input_tmp, out_tmp;
input_tmp.Resize(input_shape);
......
......@@ -49,8 +49,7 @@ int LrnConverter(void* ctx, OpLite* op, KernelBase* kernel) {
<< "Unsuport WithinChannel";
}
auto local_size = op_info->GetAttr<int>("n");
CHECK(op_info->HasAttr("input_scale"));
auto input_scale = op_info->GetAttr<float>("input_scale");
auto input_scale = op_info->GetInputScale(x_var_name)[0];
VLOG(5) << "lrn input scale: " << input_scale;
cnmlLrnOpParam_t param;
......
......@@ -178,9 +178,10 @@ void test_lrn(float alpha,
opdesc.SetAttr("k", k);
opdesc.SetAttr("n", local_size);
opdesc.SetAttr("norm_region", norm_region);
opdesc.SetAttr<float>("input_scale", (*dmax - *dmin) / 255.f);
OpInfo op_info(opdesc);
op_info.SetInputScale(x_var_name, {(*dmax - *dmin) / 255.f});
auto op = CreateOp<operators::LrnOpLite>(opdesc, &scope);
auto op = CreateOp<operators::LrnOpLite>(op_info, &scope);
// baseline
lrn_compute_ref(op);
......@@ -213,7 +214,7 @@ void test_lrn(float alpha,
auto output_data = output_trans.mutable_data<float>();
auto* output_ref_data = out_ref->mutable_data<float>();
for (size_t i = 0; i < out->data_size(); i++) {
EXPECT_NEAR(output_data[i], output_ref_data[i], 1e-4);
EXPECT_NEAR(output_data[i], output_ref_data[i], 5e-4);
}
}
......
......@@ -54,10 +54,11 @@ class SubgraphEngine : public subgraph::Engine {
VLOG(4) << "[MLU] PADDLE_LITE_MLU_SAVE_OFFLINE_MODEL is "
<< GetBoolFromEnv("PADDLE_LITE_MLU_SAVE_OFFLINE_MODEL");
VLOG(4) << "[MLU] PADDLE_LITE_MLU_DISABLE_BATCH_SIZE_CHANGEABLE is "
<< GetBoolFromEnv("PADDLE_LITE_MLU_DISABLE_BATCH_SIZE_CHANGEABLE");
<< GetBoolFromEnv("PADDLE_LITE_MLU_DISABLE_BATCH_SIZE_CHANGEABLE",
true);
VLOG(4) << "[MLU] LITE_DISABLE_MLU_CAST is "
<< GetBoolFromEnv("LITE_DISABLE_MLU_CAST");
if (GetBoolFromEnv("PADDLE_LITE_MLU_DISABLE_BATCH_SIZE_CHANGEABLE")) {
if (GetBoolFromEnv("PADDLE_LITE_MLU_DISABLE_BATCH_SIZE_CHANGEABLE", true)) {
disable_batch_size_changeable_ = true;
}
}
......
......@@ -54,10 +54,16 @@ class BlockDescWriteAPI {
virtual void SetForwardBlockIdx(int32_t idx) { NotImplemented(); }
template <typename T>
T* AddVar();
T* AddVar() {
NotImplemented();
return nullptr;
}
template <typename T>
T* AddOp();
T* AddOp() {
NotImplemented();
return nullptr;
}
virtual ~BlockDescWriteAPI() = default;
......
......@@ -73,7 +73,9 @@ class OpDescWriteAPI {
}
template <typename T>
void SetAttr(const std::string& name, const T& v);
void SetAttr(const std::string& name, const T& v) {
NotImplemented();
}
virtual ~OpDescWriteAPI() = default;
......
......@@ -40,7 +40,10 @@ class ProgramDescWriteAPI {
virtual void SetVersion(int64_t version) { NotImplemented(); }
template <typename T>
T* AddBlock();
T* AddBlock() {
NotImplemented();
return nullptr;
}
virtual ~ProgramDescWriteAPI() = default;
......
......@@ -57,6 +57,7 @@ class VectorView {
public:
typedef vector_view::VectorTraits<T, U> Traits;
explicit VectorView(typename Traits::vector_type const* cvec) {
CHECK(cvec);
cvec_ = cvec;
}
typename Traits::subscript_return_type operator[](size_t i) const {
......
......@@ -277,7 +277,7 @@ void OpAttrsCppToAny(const cpp::OpDesc &cpp_desc, OpDescType *any_desc) {
template <> \
void TransformProgramDescCppToAny<NT::T>(const cpp::T &cpp_desc, \
NT::T *any_desc) { \
auto desc = cpp_desc; \
auto &desc = cpp_desc; \
if (desc.HasVersion()) { \
any_desc->SetVersion(desc.Version()); \
} \
......
......@@ -8,9 +8,6 @@ endfunction()
lite_fbs_library(fbs_op_desc SRCS op_desc.cc FBS_DEPS framework_fbs_header)
lite_fbs_library(fbs_var_desc SRCS var_desc.cc FBS_DEPS framework_fbs_header)
lite_fbs_library(fbs_block_desc SRCS block_desc.cc FBS_DEPS framework_fbs_header)
lite_fbs_library(fbs_program_desc SRCS program_desc.cc FBS_DEPS framework_fbs_header)
lite_cc_test(test_vector_view SRCS vector_view_test.cc)
if (TARGET test_vector_view)
add_dependencies(test_vector_view framework_fbs_header)
endif()
lite_cc_library(fbs_program_desc SRCS program_desc.cc DEPS fbs_op_desc fbs_var_desc fbs_block_desc)
lite_cc_library(fbs_io SRCS io.cc DEPS fbs_program_desc)
lite_cc_test(test_vector_view SRCS vector_view_test.cc DEPS fbs_program_desc)
......@@ -19,15 +19,27 @@ namespace lite {
namespace fbs {
template <>
proto::VarDesc* BlockDesc::GetVar<proto::VarDesc>(int32_t idx) {
proto::VarDesc const* BlockDesc::GetVar<proto::VarDesc>(int32_t idx) const {
CHECK_LT(idx, VarsSize()) << "idx >= vars.size()";
return const_cast<proto::VarDesc*>(desc_->vars()->Get(idx));
return desc_->vars()->Get(idx);
}
template <>
proto::OpDesc* BlockDesc::GetOp<proto::OpDesc>(int32_t idx) {
proto::OpDesc const* BlockDesc::GetOp<proto::OpDesc>(int32_t idx) const {
CHECK_LT(idx, OpsSize()) << "idx >= ops.size()";
return const_cast<proto::OpDesc*>(desc_->ops()->Get(idx));
return desc_->ops()->Get(idx);
}
template <>
VarDesc const* BlockDesc::GetVar<VarDesc>(int32_t idx) const {
CHECK_LT(idx, VarsSize()) << "idx >= vars.size()";
return &vars_[idx];
}
template <>
OpDesc const* BlockDesc::GetOp<OpDesc>(int32_t idx) const {
CHECK_LT(idx, OpsSize()) << "idx >= ops.size()";
return &ops_[idx];
}
} // namespace fbs
......
......@@ -14,8 +14,11 @@
#pragma once
#include <vector>
#include "lite/model_parser/base/block_desc.h"
#include "lite/model_parser/flatbuffers/framework_generated.h"
#include "lite/model_parser/flatbuffers/op_desc.h"
#include "lite/model_parser/flatbuffers/var_desc.h"
#include "lite/utils/all.h"
namespace paddle {
......@@ -24,7 +27,17 @@ namespace fbs {
class BlockDesc : public BlockDescAPI {
public:
explicit BlockDesc(proto::BlockDesc* desc) : desc_(desc) { CHECK(desc_); }
explicit BlockDesc(proto::BlockDesc const* desc) : desc_(desc) {
CHECK(desc_);
vars_.reserve(VarsSize());
ops_.reserve(OpsSize());
for (size_t idx = 0; idx < VarsSize(); ++idx) {
vars_.push_back(VarDesc(desc_->vars()->Get(idx)));
}
for (size_t idx = 0; idx < OpsSize(); ++idx) {
ops_.push_back(OpDesc(desc_->ops()->Get(idx)));
}
}
int32_t Idx() const override { return desc_->idx(); }
......@@ -33,11 +46,12 @@ class BlockDesc : public BlockDescAPI {
size_t VarsSize() const override { return desc_->vars()->size(); }
template <typename T>
T* GetVar(int32_t idx);
T const* GetVar(int32_t idx) const;
template <typename T>
T const* GetVar(int32_t idx) const {
return GetVar<T>(idx);
T* GetVar(int32_t idx) {
NotImplemented();
return nullptr;
}
size_t OpsSize() const override {
......@@ -47,21 +61,32 @@ class BlockDesc : public BlockDescAPI {
}
template <typename T>
T* GetOp(int32_t idx);
T const* GetOp(int32_t idx) const;
template <typename T>
T const* GetOp(int32_t idx) const {
return GetOp<T>(idx);
T* GetOp(int32_t idx) {
NotImplemented();
return nullptr;
}
const std::vector<VarDesc>& GetVars() const { return vars_; }
int32_t ForwardBlockIdx() const override {
return desc_->forward_block_idx();
}
BlockDesc() = delete;
BlockDesc() { NotImplemented(); }
private:
proto::BlockDesc const* desc_; // not_own
std::vector<VarDesc> vars_;
std::vector<OpDesc> ops_;
private:
proto::BlockDesc* desc_; // not_own
void NotImplemented() const {
LOG(FATAL) << "The additional interfaces of BlockDesc is temporarily "
"unavailable in read-only mode.";
}
};
} // namespace fbs
......
......@@ -12,41 +12,26 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/arm/activation_grad_compute.h"
#include "lite/backends/arm/math/funcs.h"
#include "lite/model_parser/flatbuffers/io.h"
#include <memory>
#include <utility>
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
namespace fbs {
void SquareGradCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->template As<ARMContext>();
auto out_grad_dims = param.Out_grad->dims();
auto out_grad_data = param.Out_grad->data<float>();
auto x_data = param.X->data<float>();
auto x_grad_data = param.X_grad->mutable_data<float>();
lite::arm::math::act_square_grad<float>(x_data,
out_grad_data,
x_grad_data,
out_grad_dims.production(),
ctx.threads());
void LoadModel(const std::string& path, ProgramDesc* prog) {
FILE* file = fopen(path.c_str(), "rb");
fseek(file, 0, SEEK_END);
int64_t size = ftell(file);
rewind(file);
char* data = new char[size];
size = fread(data, 1, size, file);
fclose(file);
std::unique_ptr<char[]> buf(data);
prog->Init(std::move(buf));
}
} // namespace arm
} // namespace kernels
} // namespace fbs
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(square_grad,
kARM,
kFloat,
kNCHW,
paddle::lite::kernels::arm::SquareGradCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindInput("Out@GRAD", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("X@GRAD", {LiteType::GetTensorTy(TARGET(kARM))})
.Finalize();
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "lite/model_parser/flatbuffers/program_desc.h"
namespace paddle {
namespace lite {
namespace fbs {
void LoadModel(const std::string& path, ProgramDesc* prog);
} // namespace fbs
} // namespace lite
} // namespace paddle
......@@ -30,7 +30,7 @@ namespace fbs {
class OpDesc : public OpDescAPI {
public:
explicit OpDesc(proto::OpDesc* desc) : desc_(desc) { CHECK(desc_); }
explicit OpDesc(proto::OpDesc const* desc) : desc_(desc) { CHECK(desc_); }
std::string Type() const override { return desc_->type()->str(); }
......@@ -95,7 +95,7 @@ class OpDesc : public OpDescAPI {
OpDescAPI::AttrType GetAttrType(const std::string& name) const override {
const auto& attr = desc_->attrs()->LookupByKey(name.c_str());
CHECK(attr);
CHECK(attr) << "Can not find attr: " << name;
return static_cast<OpDescAPI::AttrType>(attr->type());
}
......@@ -124,10 +124,8 @@ class OpDesc : public OpDescAPI {
template <typename T>
typename lite::OpDataTypeTrait<T, Flatbuffers>::RT GetAttr(size_t idx) const;
OpDesc() = delete;
private:
proto::OpDesc* desc_;
proto::OpDesc const* desc_;
// To reduce overhead, we expect to use namespace aliasing to make cpp::Desc
// and flatbuffers::Desc replace each other. However, there is no direct
......@@ -138,6 +136,7 @@ class OpDesc : public OpDescAPI {
// caused by different building options.
public:
OpDesc() { NotImplemented(); }
bool HasInput(const std::string& param) const {
return desc_->inputs()->LookupByKey(param.c_str()) != nullptr;
}
......
......@@ -19,9 +19,16 @@ namespace lite {
namespace fbs {
template <>
proto::BlockDesc* ProgramDesc::GetBlock<proto::BlockDesc>(int32_t idx) {
proto::BlockDesc const* ProgramDesc::GetBlock<proto::BlockDesc>(
int32_t idx) const {
CHECK_LT(idx, BlocksSize()) << "idx >= blocks.size()";
return const_cast<proto::BlockDesc*>(desc_->blocks()->Get(idx));
return desc_->blocks()->Get(idx);
}
template <>
BlockDesc const* ProgramDesc::GetBlock<BlockDesc>(int32_t idx) const {
CHECK_LT(idx, BlocksSize()) << "idx >= blocks.size()";
return &blocks_[idx];
}
} // namespace fbs
......
......@@ -15,7 +15,10 @@
#pragma once
#include <memory>
#include <utility>
#include <vector>
#include "lite/model_parser/base/program_desc.h"
#include "lite/model_parser/flatbuffers/block_desc.h"
#include "lite/model_parser/flatbuffers/framework_generated.h"
#include "lite/utils/all.h"
......@@ -26,18 +29,40 @@ namespace fbs {
class ProgramDesc : public ProgramDescAPI {
public:
ProgramDesc() = default;
explicit ProgramDesc(proto::ProgramDesc *desc) : desc_(desc) { CHECK(desc); }
explicit ProgramDesc(std::unique_ptr<const char[]> buf) {
Init(std::move(buf));
}
size_t BlocksSize() const override { return desc_->blocks()->size(); }
void Init(std::unique_ptr<const char[]> buf) {
CHECK(buf.get() != nullptr);
buf_ = std::move(buf);
desc_ = proto::GetProgramDesc(buf_.get());
blocks_.reserve(BlocksSize());
for (size_t idx = 0; idx < BlocksSize(); ++idx) {
blocks_.push_back(BlockDesc(desc_->blocks()->Get(idx)));
}
}
void CopyFrom(const ProgramDesc& other) {
size_t length = strlen(static_cast<const char*>(other.raw_buf()));
std::unique_ptr<char[]> buf(new char[length]);
memcpy(buf.get(), other.raw_buf(), length);
Init(std::move(buf));
}
template <typename T>
T *GetBlock(int32_t idx);
T const* GetBlock(int32_t idx) const;
template <typename T>
T const *GetBlock(int32_t idx) const {
return GetBlock<T>(idx);
T* GetBlock(int32_t idx) {
NotImplemented();
return nullptr;
}
const std::vector<BlockDesc>& GetBlocks() const { return blocks_; }
bool HasVersion() const override { return desc_->version() != nullptr; }
int64_t Version() const override {
......@@ -45,8 +70,22 @@ class ProgramDesc : public ProgramDescAPI {
return desc_->version()->version();
}
proto::ProgramDesc const* raw_desc() const { return desc_; }
const void* raw_buf() const { return buf_.get(); }
private:
proto::ProgramDesc *desc_; // not_own
proto::ProgramDesc const* desc_;
std::unique_ptr<const char[]> buf_;
std::vector<BlockDesc> blocks_;
private:
ProgramDesc& operator=(const ProgramDesc&) = delete;
ProgramDesc(const ProgramDesc&) = delete;
void NotImplemented() const {
LOG(FATAL) << "The additional interfaces of ProgramDesc is temporarily "
"unavailable in read-only mode.";
}
};
} // namespace fbs
......
......@@ -27,7 +27,7 @@ namespace fbs {
class VarDesc : public VarDescAPI {
public:
explicit VarDesc(proto::VarDesc* desc) : desc_(desc) {}
explicit VarDesc(proto::VarDesc const* desc) : desc_(desc) {}
std::string Name() const override { return desc_->name()->str(); }
......@@ -48,10 +48,14 @@ class VarDesc : public VarDescAPI {
return dims_vec;
}
VarDesc() = delete;
VarDescAPI::Type GetDataType() const {
CHECK(GetType() == VarDescAPI::Type::LOD_TENSOR);
return static_cast<VarDescAPI::Type>(
desc_->type()->lod_tensor()->tensor()->data_type());
}
private:
proto::VarDesc* desc_;
proto::VarDesc const* desc_;
// To reduce overhead, we expect to use namespace aliasing to make cpp::Desc
// and flatbuffers::Desc replace each other. However, there is no direct
......@@ -62,10 +66,7 @@ class VarDesc : public VarDescAPI {
// caused by different building options.
public:
VarDescAPI::Type GetDataType() const {
NotImplemented();
return data_type_;
}
VarDesc() { NotImplemented(); }
void SetDataType(Type data_type) { NotImplemented(); }
void SetShape(const std::vector<int64_t>& dims) { NotImplemented(); }
......@@ -74,7 +75,6 @@ class VarDesc : public VarDescAPI {
LOG(FATAL) << "The additional interfaces of VarDesc is temporarily "
"unavailable in read-only mode.";
}
Type data_type_;
std::vector<int64_t> shape_;
};
......
......@@ -104,21 +104,33 @@ class VectorView<std::string, Flatbuffers> {
explicit VectorView(typename Traits::vector_type const* cvec) {
cvec_ = cvec;
}
std::string operator[](size_t i) const { return cvec_->operator[](i)->str(); }
std::string operator[](size_t i) const {
CHECK(cvec_);
return cvec_->operator[](i)->str();
}
vector_view::FBSStrIterator begin() const {
CHECK(cvec_);
return vector_view::FBSStrIterator(cvec_->begin());
}
vector_view::FBSStrIterator end() const {
CHECK(cvec_);
return vector_view::FBSStrIterator(cvec_->end());
}
size_t size() const { return cvec_->size(); }
size_t size() const {
if (cvec_ == nullptr) {
return 0;
}
return cvec_->size();
}
operator std::vector<std::string>() const {
VLOG(5) << "Copying elements out of VectorView will damage performance.";
std::vector<std::string> tmp;
tmp.reserve(cvec_->size());
tmp.reserve(size());
if (cvec_ != nullptr) {
for (auto val : *cvec_) {
tmp.push_back(val->str());
}
}
return tmp;
}
~VectorView() = default;
......
......@@ -24,6 +24,12 @@ VarDesc* BlockDesc::GetVar<VarDesc>(int32_t idx) {
return &vars_[idx];
}
template <>
VarDesc const* BlockDesc::GetVar<VarDesc>(int32_t idx) const {
CHECK_LT(idx, VarsSize()) << "idx >= vars.size()";
return &vars_[idx];
}
template <>
VarDesc* BlockDesc::AddVar<VarDesc>() {
vars_.emplace_back();
......@@ -36,6 +42,12 @@ OpDesc* BlockDesc::GetOp<OpDesc>(int32_t idx) {
return &ops_[idx];
}
template <>
OpDesc const* BlockDesc::GetOp<OpDesc>(int32_t idx) const {
CHECK_LT(idx, OpsSize()) << "idx >= ops.size()";
return &ops_[idx];
}
template <>
OpDesc* BlockDesc::AddOp<OpDesc>() {
ops_.emplace_back();
......
......@@ -46,12 +46,10 @@ class BlockDesc : public BlockDescAPI {
template <typename T>
T* GetVar(int32_t idx);
std::vector<VarDesc>& GetVars() { return vars_; }
template <typename T>
T const* GetVar(int32_t idx) const {
return GetVar<T>(idx);
}
T const* GetVar(int32_t idx) const;
std::vector<VarDesc>& GetVars() { return vars_; }
template <typename T>
T* AddVar();
......@@ -64,9 +62,7 @@ class BlockDesc : public BlockDescAPI {
T* GetOp(int32_t idx);
template <typename T>
T const* GetOp(int32_t idx) const {
return GetOp<T>(idx);
}
T const* GetOp(int32_t idx) const;
template <typename T>
T* AddOp();
......
......@@ -24,6 +24,12 @@ BlockDesc* ProgramDesc::GetBlock<BlockDesc>(int32_t idx) {
return &blocks_[idx];
}
template <>
BlockDesc const* ProgramDesc::GetBlock<BlockDesc>(int32_t idx) const {
CHECK_LT(idx, BlocksSize()) << "idx >= blocks.size()";
return &blocks_[idx];
}
template <>
BlockDesc* ProgramDesc::AddBlock<BlockDesc>() {
blocks_.emplace_back();
......
......@@ -30,6 +30,13 @@ class ProgramDesc : public ProgramDescAPI {
public:
ProgramDesc() = default;
void CopyFrom(const ProgramDesc& other) {
version_ = other.Version();
blocks_ = other.blocks();
}
const std::vector<BlockDesc>& blocks() const { return blocks_; }
size_t BlocksSize() const override { return blocks_.size(); }
void ClearBlocks() override { blocks_.clear(); }
......@@ -37,12 +44,10 @@ class ProgramDesc : public ProgramDescAPI {
template <typename T>
T* GetBlock(int32_t idx);
std::vector<BlockDesc>& GetBlocks() { return blocks_; }
template <typename T>
T const* GetBlock(int32_t idx) const {
return GetBlock<T>(idx);
}
T const* GetBlock(int32_t idx) const;
std::vector<BlockDesc>& GetBlocks() { return blocks_; }
template <typename T>
T* AddBlock();
......
......@@ -176,7 +176,7 @@ void LoadCombinedParamsPb(const std::string &path,
const cpp::ProgramDesc &cpp_prog,
bool params_from_memory) {
CHECK(scope);
auto prog = cpp_prog;
auto &prog = cpp_prog;
auto &main_block_desc = *prog.GetBlock<cpp::BlockDesc>(0);
// Get vars
......@@ -310,7 +310,7 @@ void SaveModelPb(const std::string &model_dir,
void SaveCombinedParamsPb(const std::string &path,
const lite::Scope &exec_scope,
const cpp::ProgramDesc &cpp_prog) {
auto prog = cpp_prog;
auto &prog = cpp_prog;
auto &main_block_desc = *prog.GetBlock<cpp::BlockDesc>(0);
// Get vars
......@@ -526,7 +526,7 @@ void SaveCombinedParamsNaive(const std::string &path,
naive_buffer::proto::CombinedParamsDesc pt_desc(&table);
naive_buffer::CombinedParamsDesc desc(&pt_desc);
auto prog = cpp_prog;
auto &prog = cpp_prog;
auto &main_block_desc = *prog.GetBlock<cpp::BlockDesc>(0);
// set unique_var_names to avoid saving shared params repeatedly
std::set<std::string> unique_var_names;
......@@ -681,7 +681,7 @@ void LoadCombinedParamsNaive(const std::string &path,
}
// Check all params loaded
auto prog = cpp_prog;
auto &prog = cpp_prog;
auto &main_block_desc = *prog.GetBlock<cpp::BlockDesc>(0);
for (size_t i = 0; i < main_block_desc.VarsSize(); ++i) {
auto &var = *main_block_desc.GetVar<cpp::VarDesc>(i);
......
......@@ -55,11 +55,6 @@ class BlockDesc : public BlockDescAPI {
template <typename T>
T* GetVar(int32_t idx);
template <typename T>
T const* GetVar(int32_t idx) const {
return GetVar<T>(idx);
}
template <typename T>
T* AddVar();
......@@ -70,11 +65,6 @@ class BlockDesc : public BlockDescAPI {
template <typename T>
T* GetOp(int32_t idx);
template <typename T>
T const* GetOp(int32_t idx) const {
return GetOp<T>(idx);
}
template <typename T>
T* AddOp();
......
......@@ -45,11 +45,6 @@ class ProgramDesc : public ProgramDescAPI {
template <typename T>
T *GetBlock(int32_t idx);
template <typename T>
T const *GetBlock(int32_t idx) const {
return GetBlock<T>(idx);
}
template <typename T>
T *AddBlock();
......
......@@ -41,15 +41,11 @@ bool ActivationGradOp::AttachImpl(const cpp::OpDesc& opdesc,
if (opdesc.HasInput("X")) {
auto X_name = opdesc.Input("X").front();
param_.X = GetVar<lite::Tensor>(scope, X_name);
} else {
param_.X = param_.X_grad;
}
if (opdesc.HasInput("Out")) {
auto Out_name = opdesc.Input("Out").front();
param_.Out = GetVar<lite::Tensor>(scope, Out_name);
} else {
param_.Out = param_.Out_grad;
}
return true;
......@@ -60,3 +56,5 @@ bool ActivationGradOp::AttachImpl(const cpp::OpDesc& opdesc,
} // namespace paddle
REGISTER_LITE_OP(square_grad, paddle::lite::operators::ActivationGradOp);
REGISTER_LITE_OP(relu_grad, paddle::lite::operators::ActivationGradOp);
REGISTER_LITE_OP(tanh_grad, paddle::lite::operators::ActivationGradOp);
......@@ -83,7 +83,7 @@ class DeformableConvOpLite : public OpLite {
param_.conv_param.filter =
scope->FindVar(Filter)->GetMutable<lite::Tensor>();
param_.conv_param.strides = op_desc.GetAttr<std::vector<int>>("strides");
auto paddings = op_desc.GetAttr<std::vector<int>>("paddings");
std::vector<int> paddings = op_desc.GetAttr<std::vector<int>>("paddings");
auto dilations = op_desc.GetAttr<std::vector<int>>("dilations");
param_.conv_param.groups = op_desc.GetAttr<int>("groups");
param_.conv_param.dilations = std::make_shared<std::vector<int>>(dilations);
......
......@@ -54,7 +54,7 @@ class MaxPoolWithIndexOpLite : public OpLite {
param_.ksize = op_desc.GetAttr<std::vector<int>>("ksize");
param_.global_pooling = op_desc.GetAttr<bool>("global_pooling");
param_.strides = op_desc.GetAttr<std::vector<int>>("strides");
auto paddings = op_desc.GetAttr<std::vector<int>>("paddings");
std::vector<int> paddings = op_desc.GetAttr<std::vector<int>>("paddings");
if (op_desc.HasAttr("adaptive")) {
param_.adaptive = op_desc.GetAttr<bool>("adaptive");
}
......
......@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/arm/activation_grad_compute.h"
#include "lite/kernels/host/activation_grad_compute.h"
#include <gtest/gtest.h>
#include "lite/core/op_registry.h"
#include "lite/kernels/arm/activation_compute.h"
......@@ -20,13 +20,11 @@
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
using param_t = operators::ActivationParam;
using grad_param_t = operators::ActivationGradParam;
using kernel_t = SquareCompute;
using grad_kernel_t = SquareGradCompute;
template <class kernel_t, class grad_kernel_t>
class ActivationGradTester {
public:
explicit ActivationGradTester(DDim dims) : dims_(dims) {}
......@@ -71,22 +69,28 @@ class ActivationGradTester {
void run_backward(grad_param_t* param,
grad_kernel_t* kernel,
const std::vector<float>& in_vec,
const std::vector<float>& out_vec,
const std::vector<float>& out_grad_vec,
float* in_grad_vec) {
Tensor x;
Tensor out;
Tensor x_grad;
Tensor out_grad;
x.Resize(dims_);
out.Resize(dims_);
x_grad.Resize(dims_);
out_grad.Resize(dims_);
auto* x_data = x.mutable_data<float>();
auto* out_data = out.mutable_data<float>();
auto* out_grad_data = out_grad.mutable_data<float>();
for (int i = 0; i < dims_.production(); i++) {
x_data[i] = in_vec[i];
out_data[i] = out_vec[i];
out_grad_data[i] = out_grad_vec[i];
}
param->X = &x;
param->Out = &out;
param->X_grad = &x_grad;
param->Out_grad = &out_grad;
kernel->SetParam(*param);
......@@ -102,7 +106,9 @@ class ActivationGradTester {
std::vector<float> x(dims_.production());
std::vector<float> out(dims_.production());
for (int i = 0; i < dims_.production(); i++) {
x[i] = 1.0 * static_cast<float>(i % 128) * 0.3f - 1.1;
x[i] = static_cast<float>(i % 3 - 2.0) / 2.0 * 0.333 +
static_cast<float>(i % 19 - 10.0) / 10.0 * 0.333 +
static_cast<float>(i % 39 - 20.0) / 20.0 * 0.333 + 0.001213;
}
this->run_forward(&param_, &kernel_, x, out.data());
......@@ -120,7 +126,8 @@ class ActivationGradTester {
for (int i = 0; i < dims_.production(); i++) {
out_grad[i] = 1.0;
}
this->run_backward(&grad_param_, &grad_kernel_, x, out_grad, x_grad.data());
this->run_backward(
&grad_param_, &grad_kernel_, x, out, out_grad, x_grad.data());
for (int i = 0; i < dims_.production(); i++) {
EXPECT_NEAR(x_grad[i], (out_delta[i] - out[i]) / delta, max_grad_delta);
......@@ -137,31 +144,58 @@ class ActivationGradTester {
grad_param_t grad_param_;
};
void TestNormalCase(DDim dims) {
std::unique_ptr<ActivationGradTester> tester(new ActivationGradTester(dims));
void TestSquareGrad(DDim dims) {
LOG(INFO) << "Test Square grad";
std::unique_ptr<
ActivationGradTester<arm::SquareCompute, host::SquareGradCompute>>
tester(
new ActivationGradTester<arm::SquareCompute, host::SquareGradCompute>(
dims));
tester->prepare_kernel();
float delta = 0.001;
float max_grad_delta = 0.005;
tester->check_grad(delta, max_grad_delta);
}
TEST(activation_grad_arm, compute) {
LOG(INFO) << "Test Square grad";
void TestReluGrad(DDim dims) {
LOG(INFO) << "Test Relu grad";
std::unique_ptr<ActivationGradTester<arm::ReluCompute, host::ReluGradCompute>>
tester(new ActivationGradTester<arm::ReluCompute, host::ReluGradCompute>(
dims));
tester->prepare_kernel();
float delta = 0.001;
float max_grad_delta = 0.005;
tester->check_grad(delta, max_grad_delta);
}
void TestTanhGrad(DDim dims) {
LOG(INFO) << "Test Tanh grad";
std::unique_ptr<ActivationGradTester<arm::TanhCompute, host::TanhGradCompute>>
tester(new ActivationGradTester<arm::TanhCompute, host::TanhGradCompute>(
dims));
tester->prepare_kernel();
float delta = 0.001;
float max_grad_delta = 0.005;
tester->check_grad(delta, max_grad_delta);
}
TEST(activation_grad_host, compute) {
DeviceInfo::Init();
for (auto n : {2}) {
for (auto c : {2}) {
for (auto h : {2}) {
for (auto w : {2}) {
TestNormalCase(DDim(std::vector<int64_t>({n, c, h, w})));
for (auto n : {2, 1}) {
for (auto c : {2, 9}) {
for (auto h : {2, 1}) {
for (auto w : {2, 10}) {
TestSquareGrad(DDim(std::vector<int64_t>({n, c, h, w})));
TestReluGrad(DDim(std::vector<int64_t>({n, c, h, w})));
TestTanhGrad(DDim(std::vector<int64_t>({n, c, h, w})));
}
}
}
}
}
} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
USE_LITE_KERNEL(square, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(square_grad, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(square_grad, kHost, kFloat, kNCHW, def);
......@@ -215,18 +215,6 @@ class ElementwiseAddGradTester {
fill_data_rand(y.data(), -1.f, 1.f, y_dims_.production());
this->run_forward(&param_, &kernel_, x, y, out.data());
for (int i = 0; i < x_dims_.production(); i++) {
LOG(INFO) << "x_" << i << ": " << x[i];
}
for (int i = 0; i < y_dims_.production(); i++) {
LOG(INFO) << "y_" << i << ": " << y[i];
}
for (int i = 0; i < out_dims_.production(); i++) {
LOG(INFO) << "out_" << i << ": " << out[i];
}
// backward
std::vector<float> out_grad(out_dims_.production());
std::vector<float> x_grad(x_dims_.production());
......@@ -242,14 +230,6 @@ class ElementwiseAddGradTester {
x_grad.data(),
y_grad.data());
for (int i = 0; i < x_grad.size(); i++) {
LOG(INFO) << "x_grad_" << i << ": " << x_grad[i];
}
for (int i = 0; i < y_grad.size(); i++) {
LOG(INFO) << "y_grad_" << i << ": " << y_grad[i];
}
// get numeric gradient
std::vector<float> x_delta(x_dims_.production());
std::vector<float> y_delta(y_dims_.production());
......@@ -443,18 +423,6 @@ class ElementwiseSubGradTester {
fill_data_rand(y.data(), -1.f, 1.f, y_dims_.production());
this->run_forward(&param_, &kernel_, x, y, out.data());
for (int i = 0; i < x_dims_.production(); i++) {
LOG(INFO) << "x_" << i << ": " << x[i];
}
for (int i = 0; i < y_dims_.production(); i++) {
LOG(INFO) << "y_" << i << ": " << y[i];
}
for (int i = 0; i < out_dims_.production(); i++) {
LOG(INFO) << "out_" << i << ": " << out[i];
}
// backward
std::vector<float> out_grad(out_dims_.production());
std::vector<float> x_grad(x_dims_.production());
......@@ -470,14 +438,6 @@ class ElementwiseSubGradTester {
x_grad.data(),
y_grad.data());
for (int i = 0; i < x_grad.size(); i++) {
LOG(INFO) << "x_grad_" << i << ": " << x_grad[i];
}
for (int i = 0; i < y_grad.size(); i++) {
LOG(INFO) << "y_grad_" << i << ": " << y_grad[i];
}
// get numeric gradient
std::vector<float> x_delta(x_dims_.production());
std::vector<float> y_delta(y_dims_.production());
......
......@@ -85,21 +85,31 @@ class SequenceConvComputeTester : public arena::TestCase {
auto output_dims = output->dims();
auto output_data = output->mutable_data<float>();
std::vector<std::vector<float>> res;
if (contextStart_ == -2) {
if (contextStart_ == -2 && lod_.size() == 1 &&
lod_[0] == std::vector<uint64_t>({0, 4})) {
res = {{-0.08867277f, -0.17257819f, -0.2564836f},
{0.194508f, 0.05720823f, -0.08009153f},
{0.73512584f, 0.5749428f, 0.41475973f},
{0.5635012f, 0.49485126f, 0.42620137f}};
} else if (contextStart_ == -1) {
} else if (contextStart_ == -1 && lod_.size() == 1 &&
lod_[0] == std::vector<uint64_t>({0, 4})) {
res = {{0.194508f, 0.05720823f, -0.08009153f},
{0.73512584f, 0.5749428f, 0.41475973f},
{0.5635012f, 0.49485126f, 0.42620137f},
{0.2517162f, 0.23646072f, 0.22120519f}};
} else if (contextStart_ == 0) {
} else if (contextStart_ == 0 && lod_.size() == 1 &&
lod_[0] == std::vector<uint64_t>({0, 4})) {
res = {{0.73512584f, 0.5749428f, 0.41475973f},
{0.5635012f, 0.49485126f, 0.42620137f},
{0.2517162f, 0.23646072f, 0.22120519f},
{0.02574372f, 0.03337148f, 0.04099924f}};
} else if (contextStart_ == -1 && lod_.size() == 1 &&
lod_[0] == std::vector<uint64_t>({0, 2, 4})) {
res = {{0.194508, 0.05720823, -0.08009153},
{0.7093821, 0.57208234, 0.43478262},
{0.19450802, 0.17925248, 0.16399695},
{0.2517162, 0.23646072, 0.22120519}};
} else {
fprintf(stderr, "not supported contextStart_\n");
exit(-1);
......@@ -136,12 +146,25 @@ void TestNormalCase(Place place, float abs_error = 2e-5) {
}
}
void TestBatchCase(Place place, float abs_error = 2e-5) {
std::vector<std::vector<uint64_t>> lod{{0, 2, 4}};
std::vector<int64_t> dims{4, 5};
std::vector<int> candidate_pad_idx{-1};
for (int pad_idx : candidate_pad_idx) {
std::unique_ptr<arena::TestCase> tester(new SequenceConvComputeTester(
place, "def", lod, DDim(dims), pad_idx, 1, 3, 3));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
}
TEST(sequence_conv, precision) {
#ifdef LITE_WITH_ARM
float abs_error = 2e-5;
Place place(TARGET(kARM));
TestNormalCase(place, abs_error);
TestBatchCase(place, abs_error);
#endif
}
......
......@@ -269,6 +269,7 @@ function main {
if [ -z "$1" ]; then
# compiling result contains light_api lib only, recommanded.
make_tiny_publish_so $ARCH $TOOLCHAIN $ANDROID_STL
exit 0
fi
# Parse command line.
......@@ -358,6 +359,7 @@ function main {
done
# compiling result contains light_api lib only, recommanded.
make_tiny_publish_so
exit 0
}
main $@
......@@ -70,6 +70,13 @@ function build_bm {
mkdir -p $build_dir
cd $build_dir
if [ $TARGET_NAME == "BM1684" ]; then
BM_SDK_ROOT="$workspace/third-party/bmlibs/bm_sc5_libs"
else
BM_SDK_ROOT="$workspace/third-party/bmlibs/bm_sc3_libs"
fi
echo $BM_SDK_ROOT
prepare_workspace
cmake .. \
${CMAKE_COMMON_OPTIONS} \
......@@ -95,17 +102,7 @@ function main {
case $i in
--target_name=*)
TARGET_NAME="${i#*=}"
shift
;;
#--bm_sdk_root=*)
# BM_SDK_ROOT="${i#*=}"
# shift
# ;;
bm)
build_bm
shift
;;
*)
# unknown option
print_usage
exit 1
......
......@@ -152,6 +152,7 @@ function main {
esac
done
make_ios $ARCH
exit 0
}
main $@
......@@ -71,7 +71,7 @@ function CheckLibSizeDiff() {
if [ $diff_size -gt 10485 ]; then
echo_line="Your PR has increased basic inference lib for $diff_size Byte, exceeding maximum requirement of 10485 Byte (0.01M). You need Superjomn's (Yunchunwei) approval or you can contact DannyIsFunny(HuZhiqiang).\n"
echo "****************"
echo -e "${echo_list[@]}"
echo -e "${echo_line[@]}"
echo "There is an approved errors."
echo "****************"
exit 1
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册