提交 4a948cfc 编写于 作者: 石晓伟 提交者: GitHub

add full_api_static target and fix building errors, test=develop (#2064)

* add full_api_static target and fix building errors, test=develop

* fix build errors, test=develop

* fix code style, test=develop

* fix lite/model_parser/pb/var_desc.cc, test=develop

* fix building errors, test=develop

* modify lite/tools/debug/CMakeLists.txt, test=develop
上级 26925ab9
......@@ -176,6 +176,7 @@ include(generic) # simplify cmake module
include(ccache) # set ccache for compilation
include(util) # set unittest and link libs
include(version) # set PADDLE_VERSION
include(flags)
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
......
......@@ -105,8 +105,8 @@ set_property(GLOBAL PROPERTY FLUID_MODULES "")
function(find_fluid_modules TARGET_NAME)
get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE)
string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path})
string(FIND "${__target_path}" "fluid" pos)
if(pos GREATER 1)
string(FIND "${__target_path}" "lite" pos)
if((pos GREATER 0) OR (pos EQUAL 0))
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
set(fluid_modules ${fluid_modules} ${TARGET_NAME})
set_property(GLOBAL PROPERTY FLUID_MODULES "${fluid_modules}")
......@@ -369,6 +369,7 @@ function(cc_binary TARGET_NAME)
endif()
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(${TARGET_NAME} ${os_dependency_modules})
find_fluid_modules(${TARGET_NAME})
endfunction(cc_binary)
function(cc_test TARGET_NAME)
......
......@@ -126,12 +126,12 @@ function(lite_cc_library TARGET)
)
if (args_SHARED OR ARGS_shared)
cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS} SHARED)
cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} SHARED)
elseif (args_MODULE OR ARGS_module)
add_library(${TARGET} MODULE ${args_SRCS})
add_dependencies(${TARGET} ${deps} ${args_DEPS})
else()
cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS})
cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps})
endif()
target_compile_options(${TARGET} BEFORE PRIVATE -Wno-ignored-qualifiers)
......@@ -163,7 +163,7 @@ function(lite_cc_binary TARGET)
LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS}
)
cc_binary(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS})
cc_binary(${TARGET} SRCS ${args_SRCS} DEPS ${deps})
target_compile_options(${TARGET} BEFORE PRIVATE -Wno-ignored-qualifiers)
if (NOT APPLE)
# strip binary target to reduce size
......
......@@ -210,6 +210,8 @@ if (NOT LITE_ON_TINY_PUBLISH)
FPGA_DEPS ${fpga_kernels})
# The final inference library for just MobileConfig.
bundle_static_library(paddle_api_full paddle_api_full_bundled bundle_full_api)
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
cc_library(api_full_static SRCS DEPS paddle_api_full cxx_api paddle_api light_api ${cxx_api_deps} ${ops} ${host_kernels} ${cuda_kernels} program tensor memory naive_buffer types ${fluid_modules} protobuf)
endif()
bundle_static_library(paddle_api_light paddle_api_light_bundled bundle_light_api)
#-----------------------------------------------------------------------------------------------------
......
......@@ -18,9 +18,6 @@
#include <string>
#include <vector>
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/core/device_info.h"
#include "lite/utils/cp_logging.h"
......
......@@ -15,6 +15,7 @@
#include "lite/api/cxx_api.h"
#include <string>
#include "lite/api/paddle_api.h"
#include "lite/core/device_info.h"
#include "lite/core/version.h"
namespace paddle {
......@@ -49,6 +50,9 @@ class CxxPaddleApiImpl : public lite_api::PaddlePredictor {
CxxPaddleApiImpl::CxxPaddleApiImpl() {}
void CxxPaddleApiImpl::Init(const lite_api::CxxConfig &config) {
#ifdef LITE_WITH_CUDA
Env<TARGET(kCUDA)>::Init();
#endif
auto places = config.valid_places();
places.emplace_back(TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny));
raw_predictor_.Build(config, places);
......
......@@ -16,9 +16,6 @@
#include <string>
#include <vector>
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/core/device_info.h"
#include "lite/utils/cp_logging.h"
......
......@@ -286,7 +286,6 @@ bool CudnnConv2DInt8<Ptype_out>::create(const operators::ConvParam& param,
}
}
this->scale_.Resize({oc});
auto* scale_data = this->scale_.template mutable_data<float>(TARGET(kCUDA));
this->scale_.template Assign<float, lite::DDim, TARGET(kCUDA)>(
weight_scale.data(), this->scale_.dims());
......
......@@ -32,17 +32,17 @@ class CudnnConv2DBase {
public:
CudnnConv2DBase()
: handle_(NULL),
workspace_data_(NULL),
workspace_(NULL),
conv_desc_(NULL),
fwd_algo_((cudnnConvolutionFwdAlgo_t)0),
input_desc_(NULL),
output_desc_(NULL),
bias_desc_(NULL),
filter_desc_(NULL),
conv_desc_(NULL),
act_desc_(NULL),
bias_desc_(NULL),
workspace_data_(NULL),
workspace_(NULL),
workspace_fwd_sizes_(0),
workspace_size_inbytes_(0),
fwd_algo_((cudnnConvolutionFwdAlgo_t)0) {}
workspace_size_inbytes_(0) {}
~CudnnConv2DBase() {
if (conv_desc_) {
......@@ -85,10 +85,10 @@ class CudnnConv2DBase {
cudnnActivationDescriptor_t act_desc_;
bool with_relu_act_{true};
void* workspace_data_; // underlying storage
void* workspace_; // aliases into _workspaceData
size_t workspace_fwd_sizes_;
size_t workspace_size_inbytes_; // size of underlying storage
void* workspace_data_; // underlying storage
void* workspace_; // aliases into _workspaceData
const bool use_tensor_core_ = true;
const size_t workspace_limit_bytes_ = 4 * 1024 * 1024;
......@@ -104,6 +104,7 @@ template <PrecisionType Ptype_out>
class CudnnConv2D : public CudnnConv2DBase<Ptype_out> {
public:
CudnnConv2D() : CudnnConv2DBase<Ptype_out>() {}
virtual ~CudnnConv2D() = default;
virtual bool init(const operators::ConvParam& param,
Context<TARGET(kCUDA)>* ctx);
......@@ -117,6 +118,7 @@ template <PrecisionType Ptype_out>
class CudnnConv2DInt8 : CudnnConv2DBase<Ptype_out> {
public:
CudnnConv2DInt8() : CudnnConv2DBase<Ptype_out>() {}
virtual ~CudnnConv2DInt8() = default;
virtual bool init(const operators::ConvParam& param,
Context<TARGET(kCUDA)>* ctx);
......
......@@ -153,16 +153,18 @@ static inline void* GetDsoHandleFromSearchPath(const std::string& search_root,
dso_handle = GetDsoHandleFromDefaultPath(dlPath, dynload_flags);
}
}
auto error_msg =
"Failed to find dynamic library: %s ( %s ) \n Please specify "
"its path correctly using following ways: \n Method. set "
"environment variable LD_LIBRARY_PATH on Linux or "
"DYLD_LIBRARY_PATH on Mac OS. \n For instance, issue command: "
"export LD_LIBRARY_PATH=... \n Note: After Mac OS 10.11, "
"using the DYLD_LIBRARY_PATH is impossible unless System "
"Integrity Protection (SIP) is disabled.";
/*
auto error_msg =
"Failed to find dynamic library: %s ( %s ) \n Please specify "
"its path correctly using following ways: \n Method. set "
"environment variable LD_LIBRARY_PATH on Linux or "
"DYLD_LIBRARY_PATH on Mac OS. \n For instance, issue command: "
"export LD_LIBRARY_PATH=... \n Note: After Mac OS 10.11, "
"using the DYLD_LIBRARY_PATH is impossible unless System "
"Integrity Protection (SIP) is disabled.";
*/
#if !defined(_WIN32)
auto errorno = dlerror();
// auto errorno = dlerror();
#else
auto errorno = GetLastError();
#endif // !_WIN32
......
......@@ -49,6 +49,7 @@ class BeamSearchFunctor<TARGET(kX86), T> {
end_id,
is_accumulated);
auto selected_items = ToMap(items, high_level.back());
/*
if (FLAGS_v == 3) {
VLOG(3) << "selected_items:";
for (size_t i = 0; i < selected_items.size(); ++i) {
......@@ -58,6 +59,7 @@ class BeamSearchFunctor<TARGET(kX86), T> {
}
}
}
*/
PruneEndBeams(pre_ids, abs_lod, &selected_items, level, end_id);
// calculate the output tensor's height
......@@ -69,7 +71,8 @@ class BeamSearchFunctor<TARGET(kX86), T> {
// the output tensor shape should be [num_instances, 1]
// auto dims = framework::make_ddim(
// std::vector<int64_t>({static_cast<int>(num_instances), 1}));
lite::DDim dims(std::vector<int64_t>({num_instances, 1L}));
lite::DDim dims(
std::vector<int64_t>({static_cast<int>(num_instances), 1L}));
selected_ids->Resize(dims);
auto *selected_ids_data = selected_ids->mutable_data<int64_t>(TARGET(kX86));
......@@ -296,7 +299,7 @@ class BeamSearchFunctor<TARGET(kX86), T> {
result.emplace_back(top_beam);
}
/*
if (FLAGS_v == 3) {
VLOG(3) << "SelectTopBeamSizeItems result size " << result.size();
for (auto &items : result) {
......@@ -306,7 +309,7 @@ class BeamSearchFunctor<TARGET(kX86), T> {
}
}
}
*/
return result;
}
};
......
......@@ -48,6 +48,7 @@ inline ActivationType GetActivationType(const std::string &type) {
LOG(ERROR) << "Not support type " << type;
// PADDLE_ENFORCE(false, "Not support type %s", type);
// PADDLE_THROW("Not support type %s.", type);
return ActivationType();
}
namespace forward {
......
......@@ -107,7 +107,8 @@ class Tree2ColFunctor<lite::TargetType::kX86, T> {
// patch->mutable_data<T>({static_cast<int64_t>(patch_size),
// static_cast<int64_t>(patch_elem_size)},
// cpu_place);
patch->Resize({static_cast<int64_t>(patch_size, patch_elem_size)});
patch->Resize({static_cast<int64_t>(patch_size),
static_cast<int64_t>(patch_elem_size)});
auto *patch_data = patch->mutable_data<T>(lite::TargetType::kX86);
constant(context, patch, 0);
const T *features = node_features.data<T>();
......
......@@ -42,6 +42,7 @@ class TestCase {
: place_(place), scope_(new Scope), alias_(alias) {
ctx_ = ContextScheduler::Global().NewContext(place_.target);
}
virtual ~TestCase() {}
void Prepare() {
PrepareScopes();
......@@ -138,20 +139,18 @@ class TestCase {
}
private:
Place place_;
std::shared_ptr<Scope> scope_;
std::string alias_;
// The workspace for the Instruction.
Scope* inst_scope_{};
// The workspace for the baseline implementation.
Scope* base_scope_{};
std::unique_ptr<cpp::OpDesc> op_desc_;
std::unique_ptr<Instruction> instruction_;
Place place_;
std::string alias_;
};
class Arena {
float abs_error_{};
public:
Arena(std::unique_ptr<TestCase>&& tester,
const Place& place,
......@@ -203,12 +202,14 @@ class Arena {
default:
LOG(FATAL) << "not support type " << PrecisionToStr(type->precision());
return false;
}
}
private:
std::unique_ptr<TestCase> tester_;
Place place_;
float abs_error_;
};
template <typename T>
......
......@@ -192,10 +192,10 @@ class Context<TargetType::kCUDA> {
ctx->cublas_fp32_ = cublas_fp32_;
}
const cudaStream_t exec_stream() { return exec_stream_; }
const cudaStream_t& exec_stream() const { return exec_stream_; }
void SetExecStream(cudaStream_t stream) { exec_stream_ = stream; }
const cudaStream_t io_stream() { return io_stream_; }
const cudaStream_t& io_stream() const { return io_stream_; }
void SetIoStream(cudaStream_t stream) { io_stream_ = stream; }
std::shared_ptr<cuda::Blas<float>> cublas_fp32() { return cublas_fp32_; }
......
......@@ -167,7 +167,7 @@ class Device<TARGET(kCUDA)> {
int id() { return idx_; }
int max_stream() { return max_stream_; }
int SetId(int idx) { idx_ = idx; }
void SetId(int idx) { idx_ = idx; }
std::string name() { return device_prop_.name; }
int core_num() { return device_prop_.multiProcessorCount; }
float max_memory() { return device_prop_.totalGlobalMem / 1048576.; }
......@@ -186,8 +186,8 @@ class Device<TARGET(kCUDA)> {
void GetInfo();
private:
int max_stream_;
int idx_{0};
int max_stream_;
cudaDeviceProp device_prop_;
std::string device_name_;
float max_memory_;
......
......@@ -46,7 +46,7 @@ std::set<T> Types<T>::ValidSet(const T& element) const {
return std::set<T>({element});
}
bool ExpandPlaces(std::set<Place>* places, const Place& place) {
void ExpandPlaces(std::set<Place>* places, const Place& place) {
static const Types<TargetType> target_set({TARGET(kHost),
TARGET(kX86),
TARGET(kCUDA),
......
......@@ -278,19 +278,21 @@ int SubgraphProgramPass::FuseSubgraphID(
const std::unique_ptr<SSAGraph>& graph) {
int sub_id = 1; // id start from 1 not 0
for (auto& item : graph->StmtTopologicalOrder()) {
bool inputvar = 0;
// bool inputvar = false;
if (!item->IsStmt()) continue;
auto& stmt = item->AsStmt();
/*
if (stmt.subgraph_id() == -1) {
for (auto& i : item->outlinks) {
for (auto& j : i->outlinks) {
if (j->IsStmt()) {
auto& jstmt = j->AsStmt();
if (jstmt.subgraph_id() == 0) inputvar = 1;
if (jstmt.subgraph_id() == 0) inputvar = true;
}
}
}
}
*/
if (stmt.subgraph_id() != 0) continue;
ChangeAllOutConnectedID(item, sub_id);
sub_id++;
......
......@@ -214,7 +214,6 @@ TEST(SubGraphTest, SimpleNet) {
auto* pass = new mir::subgraph::SubgraphProgramPass;
ASSERT_EQ(pass->FuseSubgraph(graph, supported_op_types), 1);
const int num_nodes = graph->nodes().size();
ASSERT_EQ(graph->nodes().size(), 9);
// LOG(INFO) << "After NPU Pass \n" << Visualize(graph.get());
}
......
......@@ -110,7 +110,6 @@ void TypeLayoutTransformPass::AddLayoutInst(
bool is_found = false;
for (auto& kernel : kernels) {
const Type* in_arg_ty = kernel->GetInputDeclType("Input");
const Type* out_arg_ty = kernel->GetOutputDeclType("Out");
if (TypeCompatible(*in_arg_ty, from)) {
is_found = true;
selected_kernels.emplace_back(std::move(kernel));
......
......@@ -114,7 +114,6 @@ void TypeTargetTransformPass::AddIoCopyInst(
std::vector<std::unique_ptr<KernelBase>> selected_kernels;
for (auto& kernel : kernels) {
const Type* in_arg_ty = kernel->GetInputDeclType("Input");
const Type* out_arg_ty = kernel->GetOutputDeclType("Out");
if (TypeCompatible(*in_arg_ty, from)) {
is_found = true;
selected_kernels.emplace_back(std::move(kernel));
......
......@@ -209,7 +209,7 @@ class KernelRegistry final {
ss << "Count of kernel kinds: ";
int count = 0;
for (auto &item : kernel_info_map_) {
for (auto &kernel : item.second) ++count;
count += item.second.size();
}
ss << count << "\n";
......
......@@ -68,6 +68,7 @@ framework::proto::VarType::Type ToDataType(std::type_index type) {
return it->second;
}
PADDLE_THROW("Not support %s as tensor type", type.name());
return static_cast<framework::proto::VarType::Type>(-1);
}
std::type_index ToTypeIndex(framework::proto::VarType::Type type) {
......@@ -77,6 +78,7 @@ std::type_index ToTypeIndex(framework::proto::VarType::Type type) {
}
PADDLE_THROW("Not support framework::proto::VarType::Type(%d) as tensor type",
static_cast<int>(type));
return std::type_index(typeid(void));
}
std::string DataTypeToString(const framework::proto::VarType::Type type) {
......@@ -86,6 +88,7 @@ std::string DataTypeToString(const framework::proto::VarType::Type type) {
}
PADDLE_THROW("Not support framework::proto::VarType::Type(%d) as tensor type",
static_cast<int>(type));
return std::string();
}
size_t SizeOfType(framework::proto::VarType::Type type) {
......@@ -93,7 +96,8 @@ size_t SizeOfType(framework::proto::VarType::Type type) {
if (it != gDataTypeMap().proto_to_size_.end()) {
return it->second;
}
PADDLE_THROW("Not support %s as tensor type", DataTypeToString(type));
PADDLE_THROW("Not support %s as tensor type", DataTypeToString(type).c_str());
return 0;
}
} // namespace fluid
......
......@@ -163,7 +163,7 @@ int64_t SelectedRows::AutoGrownIndex(int64_t key,
if (iter == id_to_index_.end()) {
rwlock_->UNLock();
if (!auto_grown) {
PADDLE_THROW("key %d not found", key);
PADDLE_THROW("key %ld not found", key);
}
rwlock_->WRLock();
auto map_size = id_to_index_.size();
......@@ -171,7 +171,7 @@ int64_t SelectedRows::AutoGrownIndex(int64_t key,
if (map_size != vector_size) {
rwlock_->UNLock();
PADDLE_THROW(
"id_to_index_ size %d should have the same size with rows_ %d",
"id_to_index_ size %lu should have the same size with rows_ %lu",
map_size,
vector_size);
}
......
......@@ -82,7 +82,7 @@ class SelectedRows {
int64_t Index(int64_t key) const {
auto it = std::find(rows_.begin(), rows_.end(), key);
if (it == rows_.end()) {
PADDLE_THROW("id %s not in table", key);
PADDLE_THROW("id %ld not in table", key);
}
return static_cast<int64_t>(std::distance(rows_.begin(), it));
}
......
......@@ -75,7 +75,6 @@ TEST(calib_cuda, int8_to_fp32) {
output.Resize({n, c, h, w});
output_cpu.Resize({n, c, h, w});
// initialize the data of input tensors
auto* x_data = x.mutable_data<int8_t>(TARGET(kCUDA));
auto* x_cpu_data = x_cpu.mutable_data<int8_t>();
for (int i = 0; i < x.dims().production(); i++) {
float sign = i % 3 == 0 ? -1.0f : 1.0f;
......@@ -131,7 +130,6 @@ TEST(calib_cuda, fp32_to_int8) {
output.Resize({n, c, h, w});
output_cpu.Resize({n, c, h, w});
// initialize the data of input tensors
auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
auto* x_cpu_data = x_cpu.mutable_data<float>();
for (int i = 0; i < x.dims().production(); i++) {
float sign = i % 3 == 0 ? -1.0f : 1.0f;
......
......@@ -53,7 +53,6 @@ TEST(conv_compute, fp32) {
y_cpu.Resize({n, c_o, h_o, w_o});
bias_cpu.Resize({c_o});
auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
auto* y_data = y.mutable_data<float>(TARGET(kCUDA));
float* x_cpu_data = x_cpu.mutable_data<float>();
float* filter_cpu_data = filter_cpu.mutable_data<float>();
......@@ -127,7 +126,6 @@ TEST(conv_compute, int8) {
y_cpu.Resize({1, 1, 1, c});
bias_cpu.Resize({c});
auto* x_data = x.mutable_data<int8_t>(TARGET(kCUDA));
auto* y_data = y.mutable_data<float>(TARGET(kCUDA));
auto* x_cpu_data = x_cpu.mutable_data<int8_t>();
auto* filter_cpu_data = filter_cpu.mutable_data<int8_t>();
......@@ -194,7 +192,6 @@ TEST(conv_compute, int8_int8_out) {
y_cpu.Resize({1, 1, 1, c});
bias_cpu.Resize({c});
auto* x_data = x.mutable_data<int8_t>(TARGET(kCUDA));
auto* y_data = y.mutable_data<int8_t>(TARGET(kCUDA));
auto* x_cpu_data = x_cpu.mutable_data<int8_t>();
auto* filter_cpu_data = filter_cpu.mutable_data<int8_t>();
......
......@@ -56,8 +56,6 @@ TEST(elementwise_add, normal) {
y_ref.Resize({n, c, h, w});
out_ref.Resize({n, c, h, w});
auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
auto* y_data = y.mutable_data<float>(TARGET(kCUDA));
auto* out_data = out.mutable_data<float>(TARGET(kCUDA));
auto* x_cpu_data = x_cpu.mutable_data<float>();
......
......@@ -35,7 +35,6 @@ TEST(leaky_relu, normal) {
x_cpu.Resize({h, w});
y_cpu.Resize({h, w});
auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
auto* y_data = y.mutable_data<float>(TARGET(kCUDA));
float* x_cpu_data = x_cpu.mutable_data<float>();
float* y_cpu_data = x_cpu.mutable_data<float>();
......
......@@ -80,7 +80,6 @@ TEST(nearest_interp, normal) {
Tensor x_ref, osz_ref, out_ref;
int n = 1, c = 3, in_h = 40, in_w = 40;
int in_chw = c * in_h * in_w;
int out_h = 80, out_w = 80;
float scale = 2.0;
......@@ -101,8 +100,6 @@ TEST(nearest_interp, normal) {
osz_ref.Resize({2});
out_ref.Resize({n, c, out_h, out_w});
auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
auto* osz_data = osz.mutable_data<float>(TARGET(kCUDA));
auto* out_data = out.mutable_data<float>(TARGET(kCUDA));
float* x_cpu_data = x_cpu.mutable_data<float>();
......
......@@ -41,7 +41,6 @@ void nchw2nhwc_ref(lite::Tensor* input,
int input_c = input->dims()[1];
int input_h = input->dims()[2];
int input_w = input->dims()[3];
int output_n = output->dims()[0];
int output_c = output->dims()[1];
int output_h = output->dims()[2];
int output_w = output->dims()[3];
......@@ -75,7 +74,6 @@ void nhwc2nchw_ref(lite::Tensor* input,
int input_h = input->dims()[1];
int input_w = input->dims()[2];
int input_c = input->dims()[3];
int output_n = output->dims()[0];
int output_h = output->dims()[1];
int output_w = output->dims()[2];
int output_c = output->dims()[3];
......@@ -145,7 +143,6 @@ TEST(transpose_nchw, normal) {
x_ref.Resize({N, C, H, W});
out_ref.Resize({N, H, W, C});
auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
auto* x_cpu_data = x_cpu.mutable_data<float>();
auto* out_cpu_data = out_cpu.mutable_data<float>();
auto* x_ref_data = x_ref.mutable_data<float>();
......@@ -200,7 +197,6 @@ TEST(transpose_nhwc, normal) {
x_ref.Resize({N, H, W, C});
out_ref.Resize({N, C, H, W});
auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
auto* x_cpu_data = x_cpu.mutable_data<float>();
auto* out_cpu_data = out_cpu.mutable_data<float>();
auto* x_ref_data = x_ref.mutable_data<float>();
......@@ -253,7 +249,6 @@ TEST(transpose, normal) {
x_ref.Resize({C, H, W});
out_ref.Resize({W, C, H});
auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
auto* x_cpu_data = x_cpu.mutable_data<float>();
auto* out_cpu_data = out_cpu.mutable_data<float>();
auto* x_ref_data = x_ref.mutable_data<float>();
......
......@@ -180,8 +180,6 @@ TEST(yolo_box, normal) {
boxes_ref.Resize({n, m, 4});
scores_ref.Resize({n, cls, m});
auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
auto* sz_data = sz.mutable_data<float>(TARGET(kCUDA));
auto* boxes_data = boxes.mutable_data<float>(TARGET(kCUDA));
auto* scores_data = scores.mutable_data<float>(TARGET(kCUDA));
......
......@@ -64,6 +64,7 @@ bool Activate(const lite::Tensor* X, lite::Tensor* Out) {
auto x = lite::fluid::EigenVector<T>::Flatten(*X);
auto out = lite::fluid::EigenVector<T>::Flatten(*Out);
Functor()(place, x, out);
return true;
}
// square(x) = x^2
......
......@@ -293,7 +293,7 @@ TEST(NaiveBufferWrapper, ProgramDesc) {
// Set ProgramDesc
nb_desc0.SetVersion(1);
for (int i = 0; i < 3; ++i) {
auto* item = nb_desc0.AddBlock<proto::BlockDesc>();
nb_desc0.AddBlock<proto::BlockDesc>();
}
// Save model
......
......@@ -130,6 +130,7 @@ class OpDesc : public OpDescAPI {
DEF_ONE(LONGS);
default:
LOG(FATAL) << "Unknown attribute type";
return static_cast<AttrType>(-1);
}
#undef DEF_ONE
}
......
......@@ -97,6 +97,7 @@ VarDescAPI::VarDataType ParamDesc::GetDataType() const {
default:
LOG(FATAL) << "Unknown var data type";
}
return VarDescAPI::VarDataType();
#undef GET_DATA_TYPE_CASE_ITEM
}
......
......@@ -51,6 +51,7 @@ VarDescAPI::Type VarDesc::GetType() const {
GET_TYPE_CASE_ITEM(READER);
default:
LOG(FATAL) << "Unknown var type";
return VarDescAPI::Type();
}
#undef GET_TYPE_CASE_ITEM
}
......
......@@ -121,6 +121,7 @@ class OpDesc : public OpDescAPI {
DEF_ONE(LONGS);
default:
LOG(FATAL) << "Unknown attribute type";
return static_cast<AttrType>(-1);
}
#undef DEF_ONE
}
......
......@@ -39,6 +39,7 @@ VarDescAPI::Type VarDesc::GetType() const {
GET_TYPE_CASE_ITEM(READER);
default:
LOG(FATAL) << "Unknown var type";
return VarDescAPI::Type();
}
#undef GET_TYPE_CASE_ITEM
}
......
......@@ -32,7 +32,6 @@ bool GRUUnitOpLite::CheckShape() const {
auto hidden_prev_dims = param_.hidden_prev->dims();
auto weight_dims = param_.weight->dims();
int batch_size = input_dims[0];
int input_size = input_dims[1];
int frame_size = hidden_prev_dims[1];
int weight_height = weight_dims[0];
......
......@@ -29,7 +29,6 @@ bool Im2SequenceOp::CheckShape() const { return true; }
bool Im2SequenceOp::InferShape() const {
CHECK_OR_FALSE(param_.Out);
// TODO(Superjomn) Enable data sharing.
auto inputs = param_.X;
auto input_dims = param_.X->dims();
int img_num = input_dims[0];
int img_channels = input_dims[1];
......
......@@ -21,7 +21,7 @@ namespace operators {
bool IsEmptyOp::CheckShape() const { return true; }
bool IsEmptyOp::InferShape() const {}
bool IsEmptyOp::InferShape() const { return true; }
bool IsEmptyOp::AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) {
param_.X =
......
......@@ -33,7 +33,7 @@ template <typename T>
void GetSize(T start, T end, T step, int64_t* size) {
CHECK(!std::equal_to<T>()(step, 0))
<< "The step of range op should not be 0.";
CHECK(((start < end) && (step > 0)) || (start > end) && (step < 0))
CHECK(((start < end) && (step > 0)) || ((start > end) && (step < 0)))
<< "The step should be greater than 0 while start < end. And the "
"step should be less than 0 while start > end.";
*size = std::is_integral<T>::value
......
......@@ -48,7 +48,6 @@ bool YoloBoxOp::CheckShape() const {
bool YoloBoxOp::InferShape() const {
auto* X = param_.X;
auto* ImgSize = param_.ImgSize;
auto anchors = param_.anchors;
int anchor_num = anchors.size() / 2;
auto class_num = param_.class_num;
......
......@@ -64,8 +64,6 @@ class AffineChannelComputeTester : public arena::TestCase {
if (data_layout_ == "NCHW") {
int channel = x_dims_[1];
int height = x_dims_[2];
int width = x_dims_[3];
int size = x_dims_[2] * x_dims_[3];
int in_channel = channel * size;
for (int n = 0; n < num; n++) {
......
......@@ -121,16 +121,10 @@ class BoxCoderComputeTester : public arena::TestCase {
auto* output_box = scope->NewTensor(output_box_);
CHECK(output_box);
output_box->Resize(target_box_dims_);
auto* output_box_data = output_box->mutable_data<float>();
auto* prior_box = scope->FindTensor(prior_box_);
const auto* prior_box_data = prior_box->data<float>();
auto* prior_box_var = scope->FindTensor(prior_box_var_);
const auto* prior_box_var_data = prior_box_var->data<float>();
auto* target_box = scope->FindTensor(target_box_);
const auto* target_box_data = target_box->data<float>();
box_coder_ref(output_box,
prior_box,
......
......@@ -45,10 +45,6 @@ class CastComputeTester : public arena::TestCase {
auto* output_data = out->mutable_data<float>();
auto* x = scope->FindTensor(input_);
auto* x_data = x->data<char>();
int num = x_dims_[0];
int channel = x_dims_[1];
int size = x_dims_[2] * x_dims_[3];
int in_channel = channel * size;
auto* output_data_tmp = output_data;
auto* x_data_tmp = x_data;
for (int i = 0; i < x_dims_.production(); i++) {
......@@ -60,10 +56,6 @@ class CastComputeTester : public arena::TestCase {
auto* output_data = out->mutable_data<float>();
auto* x = scope->FindTensor(input_);
auto* x_data = x->data<int32_t>();
int num = x_dims_[0];
int channel = x_dims_[1];
int size = x_dims_[2] * x_dims_[3];
int in_channel = channel * size;
auto* output_data_tmp = output_data;
auto* x_data_tmp = x_data;
for (int i = 0; i < x_dims_.production(); i++) {
......
......@@ -190,7 +190,6 @@ bool deconv_basic(const Dtype1* din,
auto* workspace_ptr = workspace_tensor.mutable_data<Dtype2>();
int group_size_in = win * hin * chin / group;
int group_size_out = wout * hout * chout / group;
int group_size_coldata = m * n;
int group_size_weights = chin * chout * kernel_w * kernel_h / (group * group);
bool flag_1x1s1p1 = (kernel_w == 1) && (kernel_h == 1) && (stride_h == 1) &&
......
......@@ -43,7 +43,6 @@ class ElementwiseComputeTester : public arena::TestCase {
auto* x = scope->FindTensor(inputx_);
const auto* x_data = x->data<float>();
auto* y = scope->FindTensor(inputy_);
const auto* y_data = x->data<float>();
for (int i = 0; i < dims_.production(); i++) {
......@@ -94,7 +93,6 @@ class ElementwiseSubComputeTester : public arena::TestCase {
auto* x = scope->FindTensor(inputx_);
const auto* x_data = x->data<float>();
auto* y = scope->FindTensor(inputy_);
const auto* y_data = x->data<float>();
for (int i = 0; i < dims_.production(); i++) {
......@@ -145,7 +143,6 @@ class ElementwiseMulComputeTester : public arena::TestCase {
auto* x = scope->FindTensor(inputx_);
const auto* x_data = x->data<float>();
auto* y = scope->FindTensor(inputy_);
const auto* y_data = x->data<float>();
for (int i = 0; i < dims_.production(); i++) {
......@@ -196,7 +193,6 @@ class ElementwiseMaxComputeTester : public arena::TestCase {
auto* x = scope->FindTensor(inputx_);
const auto* x_data = x->data<float>();
auto* y = scope->FindTensor(inputy_);
const auto* y_data = x->data<float>();
for (int i = 0; i < dims_.production(); i++) {
......@@ -249,7 +245,6 @@ class FusionElementwiseAddActivationComputeTester : public arena::TestCase {
auto* x = scope->FindTensor(inputx_);
const auto* x_data = x->data<float>();
auto* y = scope->FindTensor(inputy_);
const auto* y_data = x->data<float>();
for (int i = 0; i < dims_.production(); i++) {
......@@ -308,7 +303,6 @@ class FusionElementwiseSubActivationComputeTester : public arena::TestCase {
auto* x = scope->FindTensor(inputx_);
const auto* x_data = x->data<float>();
auto* y = scope->FindTensor(inputy_);
const auto* y_data = x->data<float>();
for (int i = 0; i < dims_.production(); i++) {
......@@ -367,7 +361,6 @@ class FusionElementwiseMulActivationComputeTester : public arena::TestCase {
auto* x = scope->FindTensor(inputx_);
const auto* x_data = x->data<float>();
auto* y = scope->FindTensor(inputy_);
const auto* y_data = x->data<float>();
for (int i = 0; i < dims_.production(); i++) {
......@@ -426,7 +419,6 @@ class FusionElementwiseMaxActivationComputeTester : public arena::TestCase {
auto* x = scope->FindTensor(inputx_);
const auto* x_data = x->data<float>();
auto* y = scope->FindTensor(inputy_);
const auto* y_data = x->data<float>();
for (int i = 0; i < dims_.production(); i++) {
......
......@@ -51,10 +51,10 @@ class FcOPTest : public arena::TestCase {
std::string weight_ = "w";
std::string bias_ = "b";
std::string out_ = "out";
int in_num_col_dims_{1};
DDim dims_{{1, 128}};
DDim wdims_{{128, 4}};
DDim bdims_{{4}};
int in_num_col_dims_{1};
public:
FcOPTest(const Place& place,
......
......@@ -243,11 +243,11 @@ class GRUUnitTester : public arena::TestCase {
std::string reset_hidden_prev_ = "reset_hidden_prev";
std::string hidden_ = "hidden";
DDim dims_{{16, 256 * 3}};
// 0: indentity; 1: sigmoid; 2: tanh; 3: relu
int gate_activation_{1};
int activation_{2};
bool origin_mode_{false};
DDim dims_{{16, 256 * 3}};
public:
GRUUnitTester(const Place& place,
......
......@@ -123,7 +123,6 @@ class LrnComputeTester : public arena::TestCase {
int H = dims_[2];
int W = dims_[3];
int pre_pad = (local_size_ - 1) / 2;
int offset_num = 0;
int offset_within_channel = 0;
int dst_id;
......
......@@ -120,12 +120,12 @@ class MatMulComputeTester : public arena::TestCase {
// common attributes for this op.
std::string x_ = "X";
std::string y_ = "Y";
std::string out_ = "Out";
DDim x_dims_;
DDim y_dims_;
bool x_transpose_;
bool y_transpose_;
float alpha_;
std::string out_ = "Out";
DDim x_dims_;
DDim y_dims_;
public:
MatMulComputeTester(const Place& place,
......
......@@ -26,8 +26,8 @@ class Pad2dComputeTester : public arena::TestCase {
std::string input_ = "X";
std::string output_ = "Out";
DDim dims_{{1, 1, 14, 14}};
std::vector<int> paddings_;
std::string mode_{"constant"};
std::vector<int> paddings_;
float pad_value_ = 0.f;
std::string data_format_{"NCHW"};
......
......@@ -125,7 +125,6 @@ void prior_box_compute_ref(const lite::Tensor* input,
if (fixed_size_.size() > 0) {
for (int s = 0; s < fixed_size_.size(); ++s) {
int fixed_size = fixed_size_[s];
int com_idx = 0;
box_width = fixed_size;
box_height = fixed_size;
......
......@@ -28,7 +28,7 @@ void reduce_n(const float* src,
int width_in) {
int hw_size = height_in * width_in;
int chw_size = channel_in * hw_size;
int data_index, src_index, src_index0;
int data_index, src_index;
for (int c = 0; c < channel_in; ++c) {
for (int h = 0; h < height_in; ++h) {
for (int w = 0; w < width_in; ++w) {
......@@ -196,9 +196,9 @@ class ReduceMaxComputeTester : public arena::TestCase {
std::string input_ = "x";
std::string output_ = "out";
std::vector<int> dim_{0};
DDim x_dims_{{3, 2, 3, 4}};
bool keep_dim_ = false;
bool reduce_all_ = false;
DDim x_dims_{{3, 2, 3, 4}};
public:
ReduceMaxComputeTester(const Place& place,
......
......@@ -28,7 +28,7 @@ void reduce_mean_n(const float* src,
int width_in) {
int hw_size = height_in * width_in;
int chw_size = channel_in * hw_size;
int data_index, src_index, src_index0;
int data_index, src_index;
for (int c = 0; c < channel_in; ++c) {
for (int h = 0; h < height_in; ++h) {
for (int w = 0; w < width_in; ++w) {
......@@ -195,8 +195,8 @@ class ReduceMeanComputeTester : public arena::TestCase {
std::string input_ = "x";
std::string output_ = "out";
std::vector<int> dim_{0};
DDim x_dims_{{3, 2, 3, 4}};
bool keep_dim_ = false;
DDim x_dims_{{3, 2, 3, 4}};
bool reduce_all_ = false;
public:
......
......@@ -25,10 +25,10 @@ class SequenceExpandComputeTester : public arena::TestCase {
const std::string input_x_ = "x";
const std::string input_y_ = "y";
const std::string output_ = "out";
int ref_level_ = -1;
DDim dims_{{4, 1}};
LoD lod_x_{{0, 2, 4}};
LoD lod_y_{{0, 1, 4}};
int ref_level_ = -1;
DDim dims_{{4, 1}};
public:
SequenceExpandComputeTester(const Place& place,
......@@ -50,7 +50,6 @@ class SequenceExpandComputeTester : public arena::TestCase {
const auto* x_data = x->data<float>();
(x->mutable_lod())->clear();
(x->mutable_lod())->push_back(lod_x_[0]);
int x_rank = dims_.size();
auto width = x->numel() / dims_[0];
auto lod_x = x->lod();
......@@ -59,7 +58,6 @@ class SequenceExpandComputeTester : public arena::TestCase {
for (int i = 0; i < lod_y_.size(); i++) {
(y->mutable_lod())->push_back(lod_y_[i]);
}
const auto* y_data = y->data<float>();
if (ref_level_ == -1) {
ref_level_ = lod_y_.size() - 1;
}
......
......@@ -25,9 +25,9 @@ class SequencePoolComputeTester : public arena::TestCase {
// common attributes for this op.
std::string input_ = "x";
std::string output_ = "out";
DDim dims_{{5, 1}};
LoD lod_{{0, 2, 5}};
std::string pool_type_ = "SUM";
DDim dims_{{5, 1}};
public:
SequencePoolComputeTester(const Place& place,
......
......@@ -60,10 +60,6 @@ bool test_gemm_int8(bool tra,
Tensor tc_basic_fp32;
Tensor tbias;
int lda = tra ? m : k;
int ldb = trb ? k : n;
int ldc = n;
ta.Resize({m, k});
tb.Resize({k, n});
tc_int8.Resize({m, n});
......@@ -94,6 +90,16 @@ bool test_gemm_int8(bool tra,
scale_merge_int8[j] = scale_merge_fp32[j] / scale_c[0];
}
LOG(INFO) << "gemm_int8 M: " << m << ", N: " << n << ", K: " << k
<< ", transA: " << (tra ? "true" : "false")
<< ", transB: " << (trb ? "true" : "false")
<< ", relu: " << (has_relu ? "true" : "false")
<< ", bias: " << (has_bias ? "true" : "false");
#ifdef LITE_WITH_ARM
int lda = tra ? m : k;
int ldb = trb ? k : n;
int ldc = n;
auto da = ta.mutable_data<int8_t>();
auto db = tb.mutable_data<int8_t>();
auto dc_int8 = tc_int8.mutable_data<int8_t>();
......@@ -102,12 +108,6 @@ bool test_gemm_int8(bool tra,
auto dc_basic_fp32 = tc_basic_fp32.mutable_data<float>();
auto dbias = tbias.mutable_data<float>();
LOG(INFO) << "gemm_int8 M: " << m << ", N: " << n << ", K: " << k
<< ", transA: " << (tra ? "true" : "false")
<< ", transB: " << (trb ? "true" : "false")
<< ", relu: " << (has_relu ? "true" : "false")
<< ", bias: " << (has_bias ? "true" : "false");
#ifdef LITE_WITH_ARM
if (FLAGS_check_result) {
Tensor ta_fp32;
Tensor tb_fp32;
......
......@@ -42,7 +42,7 @@ function prepare_workspace {
cp ../${DEBUG_TOOL_PATH_PREFIX}/analysis_tool.py ./${DEBUG_TOOL_PATH_PREFIX}/
# clone submodule
#git submodule update --init --recursive
# git submodule update --init --recursive
prepare_thirdparty
}
......
lite_cc_library(debug_utils SRCS debug_utils.cc DEPS op_params model_parser)
lite_cc_binary(lite_model_debug_tool SRCS model_debug_tool.cc
if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK OR LITE_ON_MODEL_OPTIMIZE_TOOL)
lite_cc_binary(lite_model_debug_tool SRCS model_debug_tool.cc
DEPS
cxx_api
debug_utils
target_wrapper_host
mir_passes
gflags
logging
${ops} ${host_kernels}
X86_DEPS ${x86_kernels}
ARM_DEPS ${arm_kernels}
NPU_DEPS ${npu_kernels}
FPGA_DEPS ${fpga_kernels}
CL_DEPS ${opencl_kernels})
endif()
......@@ -16,9 +16,6 @@
#include <string>
#include <vector>
#include "lite/api/cxx_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/core/op_registry.h"
#include "lite/model_parser/model_parser.h"
#include "lite/model_parser/pb/program_desc.h"
......@@ -47,6 +44,9 @@ void Run(DebugConfig* conf) {
#endif
#ifdef LITE_WITH_FPGA
Place{TARGET(kFPGA), PRECISION(kFloat)},
#endif
#ifdef LITE_WITH_CUDA
Place{TARGET(kCUDA), PRECISION(kFloat)},
#endif
});
......@@ -68,6 +68,12 @@ void Run(DebugConfig* conf) {
#endif
#ifdef LITE_WITH_X86
Place{TARGET(kX86), PRECISION(kFloat)},
#endif
#ifdef LITE_WITH_FPGA
Place{TARGET(kFPGA), PRECISION(kFloat)},
#endif
#ifdef LITE_WITH_CUDA
Place{TARGET(kCUDA), PRECISION(kFloat)},
#endif
valid_places,
passes);
......
......@@ -18,6 +18,9 @@
*/
#pragma once
#ifndef _LOGGING_H_
#define _LOGGING_H_
#include <assert.h>
#include <sys/time.h>
#include <sys/types.h>
......@@ -183,3 +186,4 @@ class VoidifyFatal : public Voidify {
} // namespace lite
} // namespace paddle
#endif
......@@ -35,5 +35,5 @@
CHECK_GT((a), (b)) << paddle::lite::string_format("" __VA_ARGS__);
#ifndef PADDLE_THROW
#define PADDLE_THROW
#define PADDLE_THROW(...) printf("" __VA_ARGS__);
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册