提交 4a948cfc 编写于 作者: 石晓伟 提交者: GitHub

add full_api_static target and fix building errors, test=develop (#2064)

* add full_api_static target and fix building errors, test=develop

* fix build errors, test=develop

* fix code style, test=develop

* fix lite/model_parser/pb/var_desc.cc, test=develop

* fix building errors, test=develop

* modify lite/tools/debug/CMakeLists.txt, test=develop
上级 26925ab9
...@@ -176,6 +176,7 @@ include(generic) # simplify cmake module ...@@ -176,6 +176,7 @@ include(generic) # simplify cmake module
include(ccache) # set ccache for compilation include(ccache) # set ccache for compilation
include(util) # set unittest and link libs include(util) # set unittest and link libs
include(version) # set PADDLE_VERSION include(version) # set PADDLE_VERSION
include(flags)
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
......
...@@ -105,8 +105,8 @@ set_property(GLOBAL PROPERTY FLUID_MODULES "") ...@@ -105,8 +105,8 @@ set_property(GLOBAL PROPERTY FLUID_MODULES "")
function(find_fluid_modules TARGET_NAME) function(find_fluid_modules TARGET_NAME)
get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE) get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE)
string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path}) string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path})
string(FIND "${__target_path}" "fluid" pos) string(FIND "${__target_path}" "lite" pos)
if(pos GREATER 1) if((pos GREATER 0) OR (pos EQUAL 0))
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
set(fluid_modules ${fluid_modules} ${TARGET_NAME}) set(fluid_modules ${fluid_modules} ${TARGET_NAME})
set_property(GLOBAL PROPERTY FLUID_MODULES "${fluid_modules}") set_property(GLOBAL PROPERTY FLUID_MODULES "${fluid_modules}")
...@@ -369,6 +369,7 @@ function(cc_binary TARGET_NAME) ...@@ -369,6 +369,7 @@ function(cc_binary TARGET_NAME)
endif() endif()
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(${TARGET_NAME} ${os_dependency_modules}) target_link_libraries(${TARGET_NAME} ${os_dependency_modules})
find_fluid_modules(${TARGET_NAME})
endfunction(cc_binary) endfunction(cc_binary)
function(cc_test TARGET_NAME) function(cc_test TARGET_NAME)
......
...@@ -126,12 +126,12 @@ function(lite_cc_library TARGET) ...@@ -126,12 +126,12 @@ function(lite_cc_library TARGET)
) )
if (args_SHARED OR ARGS_shared) if (args_SHARED OR ARGS_shared)
cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS} SHARED) cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} SHARED)
elseif (args_MODULE OR ARGS_module) elseif (args_MODULE OR ARGS_module)
add_library(${TARGET} MODULE ${args_SRCS}) add_library(${TARGET} MODULE ${args_SRCS})
add_dependencies(${TARGET} ${deps} ${args_DEPS}) add_dependencies(${TARGET} ${deps} ${args_DEPS})
else() else()
cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS}) cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps})
endif() endif()
target_compile_options(${TARGET} BEFORE PRIVATE -Wno-ignored-qualifiers) target_compile_options(${TARGET} BEFORE PRIVATE -Wno-ignored-qualifiers)
...@@ -163,7 +163,7 @@ function(lite_cc_binary TARGET) ...@@ -163,7 +163,7 @@ function(lite_cc_binary TARGET)
LIGHT_DEPS ${args_LIGHT_DEPS} LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS} HVY_DEPS ${args_HVY_DEPS}
) )
cc_binary(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS}) cc_binary(${TARGET} SRCS ${args_SRCS} DEPS ${deps})
target_compile_options(${TARGET} BEFORE PRIVATE -Wno-ignored-qualifiers) target_compile_options(${TARGET} BEFORE PRIVATE -Wno-ignored-qualifiers)
if (NOT APPLE) if (NOT APPLE)
# strip binary target to reduce size # strip binary target to reduce size
......
...@@ -210,6 +210,8 @@ if (NOT LITE_ON_TINY_PUBLISH) ...@@ -210,6 +210,8 @@ if (NOT LITE_ON_TINY_PUBLISH)
FPGA_DEPS ${fpga_kernels}) FPGA_DEPS ${fpga_kernels})
# The final inference library for just MobileConfig. # The final inference library for just MobileConfig.
bundle_static_library(paddle_api_full paddle_api_full_bundled bundle_full_api) bundle_static_library(paddle_api_full paddle_api_full_bundled bundle_full_api)
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
cc_library(api_full_static SRCS DEPS paddle_api_full cxx_api paddle_api light_api ${cxx_api_deps} ${ops} ${host_kernels} ${cuda_kernels} program tensor memory naive_buffer types ${fluid_modules} protobuf)
endif() endif()
bundle_static_library(paddle_api_light paddle_api_light_bundled bundle_light_api) bundle_static_library(paddle_api_light paddle_api_light_bundled bundle_light_api)
#----------------------------------------------------------------------------------------------------- #-----------------------------------------------------------------------------------------------------
......
...@@ -18,9 +18,6 @@ ...@@ -18,9 +18,6 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "lite/api/paddle_api.h" #include "lite/api/paddle_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h" #include "lite/api/test_helper.h"
#include "lite/core/device_info.h" #include "lite/core/device_info.h"
#include "lite/utils/cp_logging.h" #include "lite/utils/cp_logging.h"
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "lite/api/cxx_api.h" #include "lite/api/cxx_api.h"
#include <string> #include <string>
#include "lite/api/paddle_api.h" #include "lite/api/paddle_api.h"
#include "lite/core/device_info.h"
#include "lite/core/version.h" #include "lite/core/version.h"
namespace paddle { namespace paddle {
...@@ -49,6 +50,9 @@ class CxxPaddleApiImpl : public lite_api::PaddlePredictor { ...@@ -49,6 +50,9 @@ class CxxPaddleApiImpl : public lite_api::PaddlePredictor {
CxxPaddleApiImpl::CxxPaddleApiImpl() {} CxxPaddleApiImpl::CxxPaddleApiImpl() {}
void CxxPaddleApiImpl::Init(const lite_api::CxxConfig &config) { void CxxPaddleApiImpl::Init(const lite_api::CxxConfig &config) {
#ifdef LITE_WITH_CUDA
Env<TARGET(kCUDA)>::Init();
#endif
auto places = config.valid_places(); auto places = config.valid_places();
places.emplace_back(TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)); places.emplace_back(TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny));
raw_predictor_.Build(config, places); raw_predictor_.Build(config, places);
......
...@@ -16,9 +16,6 @@ ...@@ -16,9 +16,6 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "lite/api/paddle_api.h" #include "lite/api/paddle_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h" #include "lite/api/test_helper.h"
#include "lite/core/device_info.h" #include "lite/core/device_info.h"
#include "lite/utils/cp_logging.h" #include "lite/utils/cp_logging.h"
......
...@@ -286,7 +286,6 @@ bool CudnnConv2DInt8<Ptype_out>::create(const operators::ConvParam& param, ...@@ -286,7 +286,6 @@ bool CudnnConv2DInt8<Ptype_out>::create(const operators::ConvParam& param,
} }
} }
this->scale_.Resize({oc}); this->scale_.Resize({oc});
auto* scale_data = this->scale_.template mutable_data<float>(TARGET(kCUDA));
this->scale_.template Assign<float, lite::DDim, TARGET(kCUDA)>( this->scale_.template Assign<float, lite::DDim, TARGET(kCUDA)>(
weight_scale.data(), this->scale_.dims()); weight_scale.data(), this->scale_.dims());
......
...@@ -32,17 +32,17 @@ class CudnnConv2DBase { ...@@ -32,17 +32,17 @@ class CudnnConv2DBase {
public: public:
CudnnConv2DBase() CudnnConv2DBase()
: handle_(NULL), : handle_(NULL),
workspace_data_(NULL), fwd_algo_((cudnnConvolutionFwdAlgo_t)0),
workspace_(NULL),
conv_desc_(NULL),
input_desc_(NULL), input_desc_(NULL),
output_desc_(NULL), output_desc_(NULL),
bias_desc_(NULL),
filter_desc_(NULL), filter_desc_(NULL),
conv_desc_(NULL),
act_desc_(NULL), act_desc_(NULL),
bias_desc_(NULL), workspace_data_(NULL),
workspace_(NULL),
workspace_fwd_sizes_(0), workspace_fwd_sizes_(0),
workspace_size_inbytes_(0), workspace_size_inbytes_(0) {}
fwd_algo_((cudnnConvolutionFwdAlgo_t)0) {}
~CudnnConv2DBase() { ~CudnnConv2DBase() {
if (conv_desc_) { if (conv_desc_) {
...@@ -85,10 +85,10 @@ class CudnnConv2DBase { ...@@ -85,10 +85,10 @@ class CudnnConv2DBase {
cudnnActivationDescriptor_t act_desc_; cudnnActivationDescriptor_t act_desc_;
bool with_relu_act_{true}; bool with_relu_act_{true};
size_t workspace_fwd_sizes_;
size_t workspace_size_inbytes_; // size of underlying storage
void* workspace_data_; // underlying storage void* workspace_data_; // underlying storage
void* workspace_; // aliases into _workspaceData void* workspace_; // aliases into _workspaceData
size_t workspace_fwd_sizes_;
size_t workspace_size_inbytes_; // size of underlying storage
const bool use_tensor_core_ = true; const bool use_tensor_core_ = true;
const size_t workspace_limit_bytes_ = 4 * 1024 * 1024; const size_t workspace_limit_bytes_ = 4 * 1024 * 1024;
...@@ -104,6 +104,7 @@ template <PrecisionType Ptype_out> ...@@ -104,6 +104,7 @@ template <PrecisionType Ptype_out>
class CudnnConv2D : public CudnnConv2DBase<Ptype_out> { class CudnnConv2D : public CudnnConv2DBase<Ptype_out> {
public: public:
CudnnConv2D() : CudnnConv2DBase<Ptype_out>() {} CudnnConv2D() : CudnnConv2DBase<Ptype_out>() {}
virtual ~CudnnConv2D() = default;
virtual bool init(const operators::ConvParam& param, virtual bool init(const operators::ConvParam& param,
Context<TARGET(kCUDA)>* ctx); Context<TARGET(kCUDA)>* ctx);
...@@ -117,6 +118,7 @@ template <PrecisionType Ptype_out> ...@@ -117,6 +118,7 @@ template <PrecisionType Ptype_out>
class CudnnConv2DInt8 : CudnnConv2DBase<Ptype_out> { class CudnnConv2DInt8 : CudnnConv2DBase<Ptype_out> {
public: public:
CudnnConv2DInt8() : CudnnConv2DBase<Ptype_out>() {} CudnnConv2DInt8() : CudnnConv2DBase<Ptype_out>() {}
virtual ~CudnnConv2DInt8() = default;
virtual bool init(const operators::ConvParam& param, virtual bool init(const operators::ConvParam& param,
Context<TARGET(kCUDA)>* ctx); Context<TARGET(kCUDA)>* ctx);
......
...@@ -153,7 +153,8 @@ static inline void* GetDsoHandleFromSearchPath(const std::string& search_root, ...@@ -153,7 +153,8 @@ static inline void* GetDsoHandleFromSearchPath(const std::string& search_root,
dso_handle = GetDsoHandleFromDefaultPath(dlPath, dynload_flags); dso_handle = GetDsoHandleFromDefaultPath(dlPath, dynload_flags);
} }
} }
auto error_msg = /*
auto error_msg =
"Failed to find dynamic library: %s ( %s ) \n Please specify " "Failed to find dynamic library: %s ( %s ) \n Please specify "
"its path correctly using following ways: \n Method. set " "its path correctly using following ways: \n Method. set "
"environment variable LD_LIBRARY_PATH on Linux or " "environment variable LD_LIBRARY_PATH on Linux or "
...@@ -161,8 +162,9 @@ static inline void* GetDsoHandleFromSearchPath(const std::string& search_root, ...@@ -161,8 +162,9 @@ static inline void* GetDsoHandleFromSearchPath(const std::string& search_root,
"export LD_LIBRARY_PATH=... \n Note: After Mac OS 10.11, " "export LD_LIBRARY_PATH=... \n Note: After Mac OS 10.11, "
"using the DYLD_LIBRARY_PATH is impossible unless System " "using the DYLD_LIBRARY_PATH is impossible unless System "
"Integrity Protection (SIP) is disabled."; "Integrity Protection (SIP) is disabled.";
*/
#if !defined(_WIN32) #if !defined(_WIN32)
auto errorno = dlerror(); // auto errorno = dlerror();
#else #else
auto errorno = GetLastError(); auto errorno = GetLastError();
#endif // !_WIN32 #endif // !_WIN32
......
...@@ -49,6 +49,7 @@ class BeamSearchFunctor<TARGET(kX86), T> { ...@@ -49,6 +49,7 @@ class BeamSearchFunctor<TARGET(kX86), T> {
end_id, end_id,
is_accumulated); is_accumulated);
auto selected_items = ToMap(items, high_level.back()); auto selected_items = ToMap(items, high_level.back());
/*
if (FLAGS_v == 3) { if (FLAGS_v == 3) {
VLOG(3) << "selected_items:"; VLOG(3) << "selected_items:";
for (size_t i = 0; i < selected_items.size(); ++i) { for (size_t i = 0; i < selected_items.size(); ++i) {
...@@ -58,6 +59,7 @@ class BeamSearchFunctor<TARGET(kX86), T> { ...@@ -58,6 +59,7 @@ class BeamSearchFunctor<TARGET(kX86), T> {
} }
} }
} }
*/
PruneEndBeams(pre_ids, abs_lod, &selected_items, level, end_id); PruneEndBeams(pre_ids, abs_lod, &selected_items, level, end_id);
// calculate the output tensor's height // calculate the output tensor's height
...@@ -69,7 +71,8 @@ class BeamSearchFunctor<TARGET(kX86), T> { ...@@ -69,7 +71,8 @@ class BeamSearchFunctor<TARGET(kX86), T> {
// the output tensor shape should be [num_instances, 1] // the output tensor shape should be [num_instances, 1]
// auto dims = framework::make_ddim( // auto dims = framework::make_ddim(
// std::vector<int64_t>({static_cast<int>(num_instances), 1})); // std::vector<int64_t>({static_cast<int>(num_instances), 1}));
lite::DDim dims(std::vector<int64_t>({num_instances, 1L})); lite::DDim dims(
std::vector<int64_t>({static_cast<int>(num_instances), 1L}));
selected_ids->Resize(dims); selected_ids->Resize(dims);
auto *selected_ids_data = selected_ids->mutable_data<int64_t>(TARGET(kX86)); auto *selected_ids_data = selected_ids->mutable_data<int64_t>(TARGET(kX86));
...@@ -296,7 +299,7 @@ class BeamSearchFunctor<TARGET(kX86), T> { ...@@ -296,7 +299,7 @@ class BeamSearchFunctor<TARGET(kX86), T> {
result.emplace_back(top_beam); result.emplace_back(top_beam);
} }
/*
if (FLAGS_v == 3) { if (FLAGS_v == 3) {
VLOG(3) << "SelectTopBeamSizeItems result size " << result.size(); VLOG(3) << "SelectTopBeamSizeItems result size " << result.size();
for (auto &items : result) { for (auto &items : result) {
...@@ -306,7 +309,7 @@ class BeamSearchFunctor<TARGET(kX86), T> { ...@@ -306,7 +309,7 @@ class BeamSearchFunctor<TARGET(kX86), T> {
} }
} }
} }
*/
return result; return result;
} }
}; };
......
...@@ -48,6 +48,7 @@ inline ActivationType GetActivationType(const std::string &type) { ...@@ -48,6 +48,7 @@ inline ActivationType GetActivationType(const std::string &type) {
LOG(ERROR) << "Not support type " << type; LOG(ERROR) << "Not support type " << type;
// PADDLE_ENFORCE(false, "Not support type %s", type); // PADDLE_ENFORCE(false, "Not support type %s", type);
// PADDLE_THROW("Not support type %s.", type); // PADDLE_THROW("Not support type %s.", type);
return ActivationType();
} }
namespace forward { namespace forward {
......
...@@ -107,7 +107,8 @@ class Tree2ColFunctor<lite::TargetType::kX86, T> { ...@@ -107,7 +107,8 @@ class Tree2ColFunctor<lite::TargetType::kX86, T> {
// patch->mutable_data<T>({static_cast<int64_t>(patch_size), // patch->mutable_data<T>({static_cast<int64_t>(patch_size),
// static_cast<int64_t>(patch_elem_size)}, // static_cast<int64_t>(patch_elem_size)},
// cpu_place); // cpu_place);
patch->Resize({static_cast<int64_t>(patch_size, patch_elem_size)}); patch->Resize({static_cast<int64_t>(patch_size),
static_cast<int64_t>(patch_elem_size)});
auto *patch_data = patch->mutable_data<T>(lite::TargetType::kX86); auto *patch_data = patch->mutable_data<T>(lite::TargetType::kX86);
constant(context, patch, 0); constant(context, patch, 0);
const T *features = node_features.data<T>(); const T *features = node_features.data<T>();
......
...@@ -42,6 +42,7 @@ class TestCase { ...@@ -42,6 +42,7 @@ class TestCase {
: place_(place), scope_(new Scope), alias_(alias) { : place_(place), scope_(new Scope), alias_(alias) {
ctx_ = ContextScheduler::Global().NewContext(place_.target); ctx_ = ContextScheduler::Global().NewContext(place_.target);
} }
virtual ~TestCase() {}
void Prepare() { void Prepare() {
PrepareScopes(); PrepareScopes();
...@@ -138,20 +139,18 @@ class TestCase { ...@@ -138,20 +139,18 @@ class TestCase {
} }
private: private:
Place place_;
std::shared_ptr<Scope> scope_; std::shared_ptr<Scope> scope_;
std::string alias_;
// The workspace for the Instruction. // The workspace for the Instruction.
Scope* inst_scope_{}; Scope* inst_scope_{};
// The workspace for the baseline implementation. // The workspace for the baseline implementation.
Scope* base_scope_{}; Scope* base_scope_{};
std::unique_ptr<cpp::OpDesc> op_desc_; std::unique_ptr<cpp::OpDesc> op_desc_;
std::unique_ptr<Instruction> instruction_; std::unique_ptr<Instruction> instruction_;
Place place_;
std::string alias_;
}; };
class Arena { class Arena {
float abs_error_{};
public: public:
Arena(std::unique_ptr<TestCase>&& tester, Arena(std::unique_ptr<TestCase>&& tester,
const Place& place, const Place& place,
...@@ -203,12 +202,14 @@ class Arena { ...@@ -203,12 +202,14 @@ class Arena {
default: default:
LOG(FATAL) << "not support type " << PrecisionToStr(type->precision()); LOG(FATAL) << "not support type " << PrecisionToStr(type->precision());
return false;
} }
} }
private: private:
std::unique_ptr<TestCase> tester_; std::unique_ptr<TestCase> tester_;
Place place_; Place place_;
float abs_error_;
}; };
template <typename T> template <typename T>
......
...@@ -192,10 +192,10 @@ class Context<TargetType::kCUDA> { ...@@ -192,10 +192,10 @@ class Context<TargetType::kCUDA> {
ctx->cublas_fp32_ = cublas_fp32_; ctx->cublas_fp32_ = cublas_fp32_;
} }
const cudaStream_t exec_stream() { return exec_stream_; } const cudaStream_t& exec_stream() const { return exec_stream_; }
void SetExecStream(cudaStream_t stream) { exec_stream_ = stream; } void SetExecStream(cudaStream_t stream) { exec_stream_ = stream; }
const cudaStream_t io_stream() { return io_stream_; } const cudaStream_t& io_stream() const { return io_stream_; }
void SetIoStream(cudaStream_t stream) { io_stream_ = stream; } void SetIoStream(cudaStream_t stream) { io_stream_ = stream; }
std::shared_ptr<cuda::Blas<float>> cublas_fp32() { return cublas_fp32_; } std::shared_ptr<cuda::Blas<float>> cublas_fp32() { return cublas_fp32_; }
......
...@@ -167,7 +167,7 @@ class Device<TARGET(kCUDA)> { ...@@ -167,7 +167,7 @@ class Device<TARGET(kCUDA)> {
int id() { return idx_; } int id() { return idx_; }
int max_stream() { return max_stream_; } int max_stream() { return max_stream_; }
int SetId(int idx) { idx_ = idx; } void SetId(int idx) { idx_ = idx; }
std::string name() { return device_prop_.name; } std::string name() { return device_prop_.name; }
int core_num() { return device_prop_.multiProcessorCount; } int core_num() { return device_prop_.multiProcessorCount; }
float max_memory() { return device_prop_.totalGlobalMem / 1048576.; } float max_memory() { return device_prop_.totalGlobalMem / 1048576.; }
...@@ -186,8 +186,8 @@ class Device<TARGET(kCUDA)> { ...@@ -186,8 +186,8 @@ class Device<TARGET(kCUDA)> {
void GetInfo(); void GetInfo();
private: private:
int max_stream_;
int idx_{0}; int idx_{0};
int max_stream_;
cudaDeviceProp device_prop_; cudaDeviceProp device_prop_;
std::string device_name_; std::string device_name_;
float max_memory_; float max_memory_;
......
...@@ -46,7 +46,7 @@ std::set<T> Types<T>::ValidSet(const T& element) const { ...@@ -46,7 +46,7 @@ std::set<T> Types<T>::ValidSet(const T& element) const {
return std::set<T>({element}); return std::set<T>({element});
} }
bool ExpandPlaces(std::set<Place>* places, const Place& place) { void ExpandPlaces(std::set<Place>* places, const Place& place) {
static const Types<TargetType> target_set({TARGET(kHost), static const Types<TargetType> target_set({TARGET(kHost),
TARGET(kX86), TARGET(kX86),
TARGET(kCUDA), TARGET(kCUDA),
......
...@@ -278,19 +278,21 @@ int SubgraphProgramPass::FuseSubgraphID( ...@@ -278,19 +278,21 @@ int SubgraphProgramPass::FuseSubgraphID(
const std::unique_ptr<SSAGraph>& graph) { const std::unique_ptr<SSAGraph>& graph) {
int sub_id = 1; // id start from 1 not 0 int sub_id = 1; // id start from 1 not 0
for (auto& item : graph->StmtTopologicalOrder()) { for (auto& item : graph->StmtTopologicalOrder()) {
bool inputvar = 0; // bool inputvar = false;
if (!item->IsStmt()) continue; if (!item->IsStmt()) continue;
auto& stmt = item->AsStmt(); auto& stmt = item->AsStmt();
/*
if (stmt.subgraph_id() == -1) { if (stmt.subgraph_id() == -1) {
for (auto& i : item->outlinks) { for (auto& i : item->outlinks) {
for (auto& j : i->outlinks) { for (auto& j : i->outlinks) {
if (j->IsStmt()) { if (j->IsStmt()) {
auto& jstmt = j->AsStmt(); auto& jstmt = j->AsStmt();
if (jstmt.subgraph_id() == 0) inputvar = 1; if (jstmt.subgraph_id() == 0) inputvar = true;
} }
} }
} }
} }
*/
if (stmt.subgraph_id() != 0) continue; if (stmt.subgraph_id() != 0) continue;
ChangeAllOutConnectedID(item, sub_id); ChangeAllOutConnectedID(item, sub_id);
sub_id++; sub_id++;
......
...@@ -214,7 +214,6 @@ TEST(SubGraphTest, SimpleNet) { ...@@ -214,7 +214,6 @@ TEST(SubGraphTest, SimpleNet) {
auto* pass = new mir::subgraph::SubgraphProgramPass; auto* pass = new mir::subgraph::SubgraphProgramPass;
ASSERT_EQ(pass->FuseSubgraph(graph, supported_op_types), 1); ASSERT_EQ(pass->FuseSubgraph(graph, supported_op_types), 1);
const int num_nodes = graph->nodes().size();
ASSERT_EQ(graph->nodes().size(), 9); ASSERT_EQ(graph->nodes().size(), 9);
// LOG(INFO) << "After NPU Pass \n" << Visualize(graph.get()); // LOG(INFO) << "After NPU Pass \n" << Visualize(graph.get());
} }
......
...@@ -110,7 +110,6 @@ void TypeLayoutTransformPass::AddLayoutInst( ...@@ -110,7 +110,6 @@ void TypeLayoutTransformPass::AddLayoutInst(
bool is_found = false; bool is_found = false;
for (auto& kernel : kernels) { for (auto& kernel : kernels) {
const Type* in_arg_ty = kernel->GetInputDeclType("Input"); const Type* in_arg_ty = kernel->GetInputDeclType("Input");
const Type* out_arg_ty = kernel->GetOutputDeclType("Out");
if (TypeCompatible(*in_arg_ty, from)) { if (TypeCompatible(*in_arg_ty, from)) {
is_found = true; is_found = true;
selected_kernels.emplace_back(std::move(kernel)); selected_kernels.emplace_back(std::move(kernel));
......
...@@ -114,7 +114,6 @@ void TypeTargetTransformPass::AddIoCopyInst( ...@@ -114,7 +114,6 @@ void TypeTargetTransformPass::AddIoCopyInst(
std::vector<std::unique_ptr<KernelBase>> selected_kernels; std::vector<std::unique_ptr<KernelBase>> selected_kernels;
for (auto& kernel : kernels) { for (auto& kernel : kernels) {
const Type* in_arg_ty = kernel->GetInputDeclType("Input"); const Type* in_arg_ty = kernel->GetInputDeclType("Input");
const Type* out_arg_ty = kernel->GetOutputDeclType("Out");
if (TypeCompatible(*in_arg_ty, from)) { if (TypeCompatible(*in_arg_ty, from)) {
is_found = true; is_found = true;
selected_kernels.emplace_back(std::move(kernel)); selected_kernels.emplace_back(std::move(kernel));
......
...@@ -209,7 +209,7 @@ class KernelRegistry final { ...@@ -209,7 +209,7 @@ class KernelRegistry final {
ss << "Count of kernel kinds: "; ss << "Count of kernel kinds: ";
int count = 0; int count = 0;
for (auto &item : kernel_info_map_) { for (auto &item : kernel_info_map_) {
for (auto &kernel : item.second) ++count; count += item.second.size();
} }
ss << count << "\n"; ss << count << "\n";
......
...@@ -68,6 +68,7 @@ framework::proto::VarType::Type ToDataType(std::type_index type) { ...@@ -68,6 +68,7 @@ framework::proto::VarType::Type ToDataType(std::type_index type) {
return it->second; return it->second;
} }
PADDLE_THROW("Not support %s as tensor type", type.name()); PADDLE_THROW("Not support %s as tensor type", type.name());
return static_cast<framework::proto::VarType::Type>(-1);
} }
std::type_index ToTypeIndex(framework::proto::VarType::Type type) { std::type_index ToTypeIndex(framework::proto::VarType::Type type) {
...@@ -77,6 +78,7 @@ std::type_index ToTypeIndex(framework::proto::VarType::Type type) { ...@@ -77,6 +78,7 @@ std::type_index ToTypeIndex(framework::proto::VarType::Type type) {
} }
PADDLE_THROW("Not support framework::proto::VarType::Type(%d) as tensor type", PADDLE_THROW("Not support framework::proto::VarType::Type(%d) as tensor type",
static_cast<int>(type)); static_cast<int>(type));
return std::type_index(typeid(void));
} }
std::string DataTypeToString(const framework::proto::VarType::Type type) { std::string DataTypeToString(const framework::proto::VarType::Type type) {
...@@ -86,6 +88,7 @@ std::string DataTypeToString(const framework::proto::VarType::Type type) { ...@@ -86,6 +88,7 @@ std::string DataTypeToString(const framework::proto::VarType::Type type) {
} }
PADDLE_THROW("Not support framework::proto::VarType::Type(%d) as tensor type", PADDLE_THROW("Not support framework::proto::VarType::Type(%d) as tensor type",
static_cast<int>(type)); static_cast<int>(type));
return std::string();
} }
size_t SizeOfType(framework::proto::VarType::Type type) { size_t SizeOfType(framework::proto::VarType::Type type) {
...@@ -93,7 +96,8 @@ size_t SizeOfType(framework::proto::VarType::Type type) { ...@@ -93,7 +96,8 @@ size_t SizeOfType(framework::proto::VarType::Type type) {
if (it != gDataTypeMap().proto_to_size_.end()) { if (it != gDataTypeMap().proto_to_size_.end()) {
return it->second; return it->second;
} }
PADDLE_THROW("Not support %s as tensor type", DataTypeToString(type)); PADDLE_THROW("Not support %s as tensor type", DataTypeToString(type).c_str());
return 0;
} }
} // namespace fluid } // namespace fluid
......
...@@ -163,7 +163,7 @@ int64_t SelectedRows::AutoGrownIndex(int64_t key, ...@@ -163,7 +163,7 @@ int64_t SelectedRows::AutoGrownIndex(int64_t key,
if (iter == id_to_index_.end()) { if (iter == id_to_index_.end()) {
rwlock_->UNLock(); rwlock_->UNLock();
if (!auto_grown) { if (!auto_grown) {
PADDLE_THROW("key %d not found", key); PADDLE_THROW("key %ld not found", key);
} }
rwlock_->WRLock(); rwlock_->WRLock();
auto map_size = id_to_index_.size(); auto map_size = id_to_index_.size();
...@@ -171,7 +171,7 @@ int64_t SelectedRows::AutoGrownIndex(int64_t key, ...@@ -171,7 +171,7 @@ int64_t SelectedRows::AutoGrownIndex(int64_t key,
if (map_size != vector_size) { if (map_size != vector_size) {
rwlock_->UNLock(); rwlock_->UNLock();
PADDLE_THROW( PADDLE_THROW(
"id_to_index_ size %d should have the same size with rows_ %d", "id_to_index_ size %lu should have the same size with rows_ %lu",
map_size, map_size,
vector_size); vector_size);
} }
......
...@@ -82,7 +82,7 @@ class SelectedRows { ...@@ -82,7 +82,7 @@ class SelectedRows {
int64_t Index(int64_t key) const { int64_t Index(int64_t key) const {
auto it = std::find(rows_.begin(), rows_.end(), key); auto it = std::find(rows_.begin(), rows_.end(), key);
if (it == rows_.end()) { if (it == rows_.end()) {
PADDLE_THROW("id %s not in table", key); PADDLE_THROW("id %ld not in table", key);
} }
return static_cast<int64_t>(std::distance(rows_.begin(), it)); return static_cast<int64_t>(std::distance(rows_.begin(), it));
} }
......
...@@ -75,7 +75,6 @@ TEST(calib_cuda, int8_to_fp32) { ...@@ -75,7 +75,6 @@ TEST(calib_cuda, int8_to_fp32) {
output.Resize({n, c, h, w}); output.Resize({n, c, h, w});
output_cpu.Resize({n, c, h, w}); output_cpu.Resize({n, c, h, w});
// initialize the data of input tensors // initialize the data of input tensors
auto* x_data = x.mutable_data<int8_t>(TARGET(kCUDA));
auto* x_cpu_data = x_cpu.mutable_data<int8_t>(); auto* x_cpu_data = x_cpu.mutable_data<int8_t>();
for (int i = 0; i < x.dims().production(); i++) { for (int i = 0; i < x.dims().production(); i++) {
float sign = i % 3 == 0 ? -1.0f : 1.0f; float sign = i % 3 == 0 ? -1.0f : 1.0f;
...@@ -131,7 +130,6 @@ TEST(calib_cuda, fp32_to_int8) { ...@@ -131,7 +130,6 @@ TEST(calib_cuda, fp32_to_int8) {
output.Resize({n, c, h, w}); output.Resize({n, c, h, w});
output_cpu.Resize({n, c, h, w}); output_cpu.Resize({n, c, h, w});
// initialize the data of input tensors // initialize the data of input tensors
auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
auto* x_cpu_data = x_cpu.mutable_data<float>(); auto* x_cpu_data = x_cpu.mutable_data<float>();
for (int i = 0; i < x.dims().production(); i++) { for (int i = 0; i < x.dims().production(); i++) {
float sign = i % 3 == 0 ? -1.0f : 1.0f; float sign = i % 3 == 0 ? -1.0f : 1.0f;
......
...@@ -53,7 +53,6 @@ TEST(conv_compute, fp32) { ...@@ -53,7 +53,6 @@ TEST(conv_compute, fp32) {
y_cpu.Resize({n, c_o, h_o, w_o}); y_cpu.Resize({n, c_o, h_o, w_o});
bias_cpu.Resize({c_o}); bias_cpu.Resize({c_o});
auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
auto* y_data = y.mutable_data<float>(TARGET(kCUDA)); auto* y_data = y.mutable_data<float>(TARGET(kCUDA));
float* x_cpu_data = x_cpu.mutable_data<float>(); float* x_cpu_data = x_cpu.mutable_data<float>();
float* filter_cpu_data = filter_cpu.mutable_data<float>(); float* filter_cpu_data = filter_cpu.mutable_data<float>();
...@@ -127,7 +126,6 @@ TEST(conv_compute, int8) { ...@@ -127,7 +126,6 @@ TEST(conv_compute, int8) {
y_cpu.Resize({1, 1, 1, c}); y_cpu.Resize({1, 1, 1, c});
bias_cpu.Resize({c}); bias_cpu.Resize({c});
auto* x_data = x.mutable_data<int8_t>(TARGET(kCUDA));
auto* y_data = y.mutable_data<float>(TARGET(kCUDA)); auto* y_data = y.mutable_data<float>(TARGET(kCUDA));
auto* x_cpu_data = x_cpu.mutable_data<int8_t>(); auto* x_cpu_data = x_cpu.mutable_data<int8_t>();
auto* filter_cpu_data = filter_cpu.mutable_data<int8_t>(); auto* filter_cpu_data = filter_cpu.mutable_data<int8_t>();
...@@ -194,7 +192,6 @@ TEST(conv_compute, int8_int8_out) { ...@@ -194,7 +192,6 @@ TEST(conv_compute, int8_int8_out) {
y_cpu.Resize({1, 1, 1, c}); y_cpu.Resize({1, 1, 1, c});
bias_cpu.Resize({c}); bias_cpu.Resize({c});
auto* x_data = x.mutable_data<int8_t>(TARGET(kCUDA));
auto* y_data = y.mutable_data<int8_t>(TARGET(kCUDA)); auto* y_data = y.mutable_data<int8_t>(TARGET(kCUDA));
auto* x_cpu_data = x_cpu.mutable_data<int8_t>(); auto* x_cpu_data = x_cpu.mutable_data<int8_t>();
auto* filter_cpu_data = filter_cpu.mutable_data<int8_t>(); auto* filter_cpu_data = filter_cpu.mutable_data<int8_t>();
......
...@@ -56,8 +56,6 @@ TEST(elementwise_add, normal) { ...@@ -56,8 +56,6 @@ TEST(elementwise_add, normal) {
y_ref.Resize({n, c, h, w}); y_ref.Resize({n, c, h, w});
out_ref.Resize({n, c, h, w}); out_ref.Resize({n, c, h, w});
auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
auto* y_data = y.mutable_data<float>(TARGET(kCUDA));
auto* out_data = out.mutable_data<float>(TARGET(kCUDA)); auto* out_data = out.mutable_data<float>(TARGET(kCUDA));
auto* x_cpu_data = x_cpu.mutable_data<float>(); auto* x_cpu_data = x_cpu.mutable_data<float>();
......
...@@ -35,7 +35,6 @@ TEST(leaky_relu, normal) { ...@@ -35,7 +35,6 @@ TEST(leaky_relu, normal) {
x_cpu.Resize({h, w}); x_cpu.Resize({h, w});
y_cpu.Resize({h, w}); y_cpu.Resize({h, w});
auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
auto* y_data = y.mutable_data<float>(TARGET(kCUDA)); auto* y_data = y.mutable_data<float>(TARGET(kCUDA));
float* x_cpu_data = x_cpu.mutable_data<float>(); float* x_cpu_data = x_cpu.mutable_data<float>();
float* y_cpu_data = x_cpu.mutable_data<float>(); float* y_cpu_data = x_cpu.mutable_data<float>();
......
...@@ -80,7 +80,6 @@ TEST(nearest_interp, normal) { ...@@ -80,7 +80,6 @@ TEST(nearest_interp, normal) {
Tensor x_ref, osz_ref, out_ref; Tensor x_ref, osz_ref, out_ref;
int n = 1, c = 3, in_h = 40, in_w = 40; int n = 1, c = 3, in_h = 40, in_w = 40;
int in_chw = c * in_h * in_w;
int out_h = 80, out_w = 80; int out_h = 80, out_w = 80;
float scale = 2.0; float scale = 2.0;
...@@ -101,8 +100,6 @@ TEST(nearest_interp, normal) { ...@@ -101,8 +100,6 @@ TEST(nearest_interp, normal) {
osz_ref.Resize({2}); osz_ref.Resize({2});
out_ref.Resize({n, c, out_h, out_w}); out_ref.Resize({n, c, out_h, out_w});
auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
auto* osz_data = osz.mutable_data<float>(TARGET(kCUDA));
auto* out_data = out.mutable_data<float>(TARGET(kCUDA)); auto* out_data = out.mutable_data<float>(TARGET(kCUDA));
float* x_cpu_data = x_cpu.mutable_data<float>(); float* x_cpu_data = x_cpu.mutable_data<float>();
......
...@@ -41,7 +41,6 @@ void nchw2nhwc_ref(lite::Tensor* input, ...@@ -41,7 +41,6 @@ void nchw2nhwc_ref(lite::Tensor* input,
int input_c = input->dims()[1]; int input_c = input->dims()[1];
int input_h = input->dims()[2]; int input_h = input->dims()[2];
int input_w = input->dims()[3]; int input_w = input->dims()[3];
int output_n = output->dims()[0];
int output_c = output->dims()[1]; int output_c = output->dims()[1];
int output_h = output->dims()[2]; int output_h = output->dims()[2];
int output_w = output->dims()[3]; int output_w = output->dims()[3];
...@@ -75,7 +74,6 @@ void nhwc2nchw_ref(lite::Tensor* input, ...@@ -75,7 +74,6 @@ void nhwc2nchw_ref(lite::Tensor* input,
int input_h = input->dims()[1]; int input_h = input->dims()[1];
int input_w = input->dims()[2]; int input_w = input->dims()[2];
int input_c = input->dims()[3]; int input_c = input->dims()[3];
int output_n = output->dims()[0];
int output_h = output->dims()[1]; int output_h = output->dims()[1];
int output_w = output->dims()[2]; int output_w = output->dims()[2];
int output_c = output->dims()[3]; int output_c = output->dims()[3];
...@@ -145,7 +143,6 @@ TEST(transpose_nchw, normal) { ...@@ -145,7 +143,6 @@ TEST(transpose_nchw, normal) {
x_ref.Resize({N, C, H, W}); x_ref.Resize({N, C, H, W});
out_ref.Resize({N, H, W, C}); out_ref.Resize({N, H, W, C});
auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
auto* x_cpu_data = x_cpu.mutable_data<float>(); auto* x_cpu_data = x_cpu.mutable_data<float>();
auto* out_cpu_data = out_cpu.mutable_data<float>(); auto* out_cpu_data = out_cpu.mutable_data<float>();
auto* x_ref_data = x_ref.mutable_data<float>(); auto* x_ref_data = x_ref.mutable_data<float>();
...@@ -200,7 +197,6 @@ TEST(transpose_nhwc, normal) { ...@@ -200,7 +197,6 @@ TEST(transpose_nhwc, normal) {
x_ref.Resize({N, H, W, C}); x_ref.Resize({N, H, W, C});
out_ref.Resize({N, C, H, W}); out_ref.Resize({N, C, H, W});
auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
auto* x_cpu_data = x_cpu.mutable_data<float>(); auto* x_cpu_data = x_cpu.mutable_data<float>();
auto* out_cpu_data = out_cpu.mutable_data<float>(); auto* out_cpu_data = out_cpu.mutable_data<float>();
auto* x_ref_data = x_ref.mutable_data<float>(); auto* x_ref_data = x_ref.mutable_data<float>();
...@@ -253,7 +249,6 @@ TEST(transpose, normal) { ...@@ -253,7 +249,6 @@ TEST(transpose, normal) {
x_ref.Resize({C, H, W}); x_ref.Resize({C, H, W});
out_ref.Resize({W, C, H}); out_ref.Resize({W, C, H});
auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
auto* x_cpu_data = x_cpu.mutable_data<float>(); auto* x_cpu_data = x_cpu.mutable_data<float>();
auto* out_cpu_data = out_cpu.mutable_data<float>(); auto* out_cpu_data = out_cpu.mutable_data<float>();
auto* x_ref_data = x_ref.mutable_data<float>(); auto* x_ref_data = x_ref.mutable_data<float>();
......
...@@ -180,8 +180,6 @@ TEST(yolo_box, normal) { ...@@ -180,8 +180,6 @@ TEST(yolo_box, normal) {
boxes_ref.Resize({n, m, 4}); boxes_ref.Resize({n, m, 4});
scores_ref.Resize({n, cls, m}); scores_ref.Resize({n, cls, m});
auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
auto* sz_data = sz.mutable_data<float>(TARGET(kCUDA));
auto* boxes_data = boxes.mutable_data<float>(TARGET(kCUDA)); auto* boxes_data = boxes.mutable_data<float>(TARGET(kCUDA));
auto* scores_data = scores.mutable_data<float>(TARGET(kCUDA)); auto* scores_data = scores.mutable_data<float>(TARGET(kCUDA));
......
...@@ -64,6 +64,7 @@ bool Activate(const lite::Tensor* X, lite::Tensor* Out) { ...@@ -64,6 +64,7 @@ bool Activate(const lite::Tensor* X, lite::Tensor* Out) {
auto x = lite::fluid::EigenVector<T>::Flatten(*X); auto x = lite::fluid::EigenVector<T>::Flatten(*X);
auto out = lite::fluid::EigenVector<T>::Flatten(*Out); auto out = lite::fluid::EigenVector<T>::Flatten(*Out);
Functor()(place, x, out); Functor()(place, x, out);
return true;
} }
// square(x) = x^2 // square(x) = x^2
......
...@@ -293,7 +293,7 @@ TEST(NaiveBufferWrapper, ProgramDesc) { ...@@ -293,7 +293,7 @@ TEST(NaiveBufferWrapper, ProgramDesc) {
// Set ProgramDesc // Set ProgramDesc
nb_desc0.SetVersion(1); nb_desc0.SetVersion(1);
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
auto* item = nb_desc0.AddBlock<proto::BlockDesc>(); nb_desc0.AddBlock<proto::BlockDesc>();
} }
// Save model // Save model
......
...@@ -130,6 +130,7 @@ class OpDesc : public OpDescAPI { ...@@ -130,6 +130,7 @@ class OpDesc : public OpDescAPI {
DEF_ONE(LONGS); DEF_ONE(LONGS);
default: default:
LOG(FATAL) << "Unknown attribute type"; LOG(FATAL) << "Unknown attribute type";
return static_cast<AttrType>(-1);
} }
#undef DEF_ONE #undef DEF_ONE
} }
......
...@@ -97,6 +97,7 @@ VarDescAPI::VarDataType ParamDesc::GetDataType() const { ...@@ -97,6 +97,7 @@ VarDescAPI::VarDataType ParamDesc::GetDataType() const {
default: default:
LOG(FATAL) << "Unknown var data type"; LOG(FATAL) << "Unknown var data type";
} }
return VarDescAPI::VarDataType();
#undef GET_DATA_TYPE_CASE_ITEM #undef GET_DATA_TYPE_CASE_ITEM
} }
......
...@@ -51,6 +51,7 @@ VarDescAPI::Type VarDesc::GetType() const { ...@@ -51,6 +51,7 @@ VarDescAPI::Type VarDesc::GetType() const {
GET_TYPE_CASE_ITEM(READER); GET_TYPE_CASE_ITEM(READER);
default: default:
LOG(FATAL) << "Unknown var type"; LOG(FATAL) << "Unknown var type";
return VarDescAPI::Type();
} }
#undef GET_TYPE_CASE_ITEM #undef GET_TYPE_CASE_ITEM
} }
......
...@@ -121,6 +121,7 @@ class OpDesc : public OpDescAPI { ...@@ -121,6 +121,7 @@ class OpDesc : public OpDescAPI {
DEF_ONE(LONGS); DEF_ONE(LONGS);
default: default:
LOG(FATAL) << "Unknown attribute type"; LOG(FATAL) << "Unknown attribute type";
return static_cast<AttrType>(-1);
} }
#undef DEF_ONE #undef DEF_ONE
} }
......
...@@ -39,6 +39,7 @@ VarDescAPI::Type VarDesc::GetType() const { ...@@ -39,6 +39,7 @@ VarDescAPI::Type VarDesc::GetType() const {
GET_TYPE_CASE_ITEM(READER); GET_TYPE_CASE_ITEM(READER);
default: default:
LOG(FATAL) << "Unknown var type"; LOG(FATAL) << "Unknown var type";
return VarDescAPI::Type();
} }
#undef GET_TYPE_CASE_ITEM #undef GET_TYPE_CASE_ITEM
} }
......
...@@ -32,7 +32,6 @@ bool GRUUnitOpLite::CheckShape() const { ...@@ -32,7 +32,6 @@ bool GRUUnitOpLite::CheckShape() const {
auto hidden_prev_dims = param_.hidden_prev->dims(); auto hidden_prev_dims = param_.hidden_prev->dims();
auto weight_dims = param_.weight->dims(); auto weight_dims = param_.weight->dims();
int batch_size = input_dims[0];
int input_size = input_dims[1]; int input_size = input_dims[1];
int frame_size = hidden_prev_dims[1]; int frame_size = hidden_prev_dims[1];
int weight_height = weight_dims[0]; int weight_height = weight_dims[0];
......
...@@ -29,7 +29,6 @@ bool Im2SequenceOp::CheckShape() const { return true; } ...@@ -29,7 +29,6 @@ bool Im2SequenceOp::CheckShape() const { return true; }
bool Im2SequenceOp::InferShape() const { bool Im2SequenceOp::InferShape() const {
CHECK_OR_FALSE(param_.Out); CHECK_OR_FALSE(param_.Out);
// TODO(Superjomn) Enable data sharing. // TODO(Superjomn) Enable data sharing.
auto inputs = param_.X;
auto input_dims = param_.X->dims(); auto input_dims = param_.X->dims();
int img_num = input_dims[0]; int img_num = input_dims[0];
int img_channels = input_dims[1]; int img_channels = input_dims[1];
......
...@@ -21,7 +21,7 @@ namespace operators { ...@@ -21,7 +21,7 @@ namespace operators {
bool IsEmptyOp::CheckShape() const { return true; } bool IsEmptyOp::CheckShape() const { return true; }
bool IsEmptyOp::InferShape() const {} bool IsEmptyOp::InferShape() const { return true; }
bool IsEmptyOp::AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) { bool IsEmptyOp::AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) {
param_.X = param_.X =
......
...@@ -33,7 +33,7 @@ template <typename T> ...@@ -33,7 +33,7 @@ template <typename T>
void GetSize(T start, T end, T step, int64_t* size) { void GetSize(T start, T end, T step, int64_t* size) {
CHECK(!std::equal_to<T>()(step, 0)) CHECK(!std::equal_to<T>()(step, 0))
<< "The step of range op should not be 0."; << "The step of range op should not be 0.";
CHECK(((start < end) && (step > 0)) || (start > end) && (step < 0)) CHECK(((start < end) && (step > 0)) || ((start > end) && (step < 0)))
<< "The step should be greater than 0 while start < end. And the " << "The step should be greater than 0 while start < end. And the "
"step should be less than 0 while start > end."; "step should be less than 0 while start > end.";
*size = std::is_integral<T>::value *size = std::is_integral<T>::value
......
...@@ -48,7 +48,6 @@ bool YoloBoxOp::CheckShape() const { ...@@ -48,7 +48,6 @@ bool YoloBoxOp::CheckShape() const {
bool YoloBoxOp::InferShape() const { bool YoloBoxOp::InferShape() const {
auto* X = param_.X; auto* X = param_.X;
auto* ImgSize = param_.ImgSize;
auto anchors = param_.anchors; auto anchors = param_.anchors;
int anchor_num = anchors.size() / 2; int anchor_num = anchors.size() / 2;
auto class_num = param_.class_num; auto class_num = param_.class_num;
......
...@@ -64,8 +64,6 @@ class AffineChannelComputeTester : public arena::TestCase { ...@@ -64,8 +64,6 @@ class AffineChannelComputeTester : public arena::TestCase {
if (data_layout_ == "NCHW") { if (data_layout_ == "NCHW") {
int channel = x_dims_[1]; int channel = x_dims_[1];
int height = x_dims_[2];
int width = x_dims_[3];
int size = x_dims_[2] * x_dims_[3]; int size = x_dims_[2] * x_dims_[3];
int in_channel = channel * size; int in_channel = channel * size;
for (int n = 0; n < num; n++) { for (int n = 0; n < num; n++) {
......
...@@ -121,16 +121,10 @@ class BoxCoderComputeTester : public arena::TestCase { ...@@ -121,16 +121,10 @@ class BoxCoderComputeTester : public arena::TestCase {
auto* output_box = scope->NewTensor(output_box_); auto* output_box = scope->NewTensor(output_box_);
CHECK(output_box); CHECK(output_box);
output_box->Resize(target_box_dims_); output_box->Resize(target_box_dims_);
auto* output_box_data = output_box->mutable_data<float>();
auto* prior_box = scope->FindTensor(prior_box_); auto* prior_box = scope->FindTensor(prior_box_);
const auto* prior_box_data = prior_box->data<float>();
auto* prior_box_var = scope->FindTensor(prior_box_var_); auto* prior_box_var = scope->FindTensor(prior_box_var_);
const auto* prior_box_var_data = prior_box_var->data<float>();
auto* target_box = scope->FindTensor(target_box_); auto* target_box = scope->FindTensor(target_box_);
const auto* target_box_data = target_box->data<float>();
box_coder_ref(output_box, box_coder_ref(output_box,
prior_box, prior_box,
......
...@@ -45,10 +45,6 @@ class CastComputeTester : public arena::TestCase { ...@@ -45,10 +45,6 @@ class CastComputeTester : public arena::TestCase {
auto* output_data = out->mutable_data<float>(); auto* output_data = out->mutable_data<float>();
auto* x = scope->FindTensor(input_); auto* x = scope->FindTensor(input_);
auto* x_data = x->data<char>(); auto* x_data = x->data<char>();
int num = x_dims_[0];
int channel = x_dims_[1];
int size = x_dims_[2] * x_dims_[3];
int in_channel = channel * size;
auto* output_data_tmp = output_data; auto* output_data_tmp = output_data;
auto* x_data_tmp = x_data; auto* x_data_tmp = x_data;
for (int i = 0; i < x_dims_.production(); i++) { for (int i = 0; i < x_dims_.production(); i++) {
...@@ -60,10 +56,6 @@ class CastComputeTester : public arena::TestCase { ...@@ -60,10 +56,6 @@ class CastComputeTester : public arena::TestCase {
auto* output_data = out->mutable_data<float>(); auto* output_data = out->mutable_data<float>();
auto* x = scope->FindTensor(input_); auto* x = scope->FindTensor(input_);
auto* x_data = x->data<int32_t>(); auto* x_data = x->data<int32_t>();
int num = x_dims_[0];
int channel = x_dims_[1];
int size = x_dims_[2] * x_dims_[3];
int in_channel = channel * size;
auto* output_data_tmp = output_data; auto* output_data_tmp = output_data;
auto* x_data_tmp = x_data; auto* x_data_tmp = x_data;
for (int i = 0; i < x_dims_.production(); i++) { for (int i = 0; i < x_dims_.production(); i++) {
......
...@@ -190,7 +190,6 @@ bool deconv_basic(const Dtype1* din, ...@@ -190,7 +190,6 @@ bool deconv_basic(const Dtype1* din,
auto* workspace_ptr = workspace_tensor.mutable_data<Dtype2>(); auto* workspace_ptr = workspace_tensor.mutable_data<Dtype2>();
int group_size_in = win * hin * chin / group; int group_size_in = win * hin * chin / group;
int group_size_out = wout * hout * chout / group;
int group_size_coldata = m * n; int group_size_coldata = m * n;
int group_size_weights = chin * chout * kernel_w * kernel_h / (group * group); int group_size_weights = chin * chout * kernel_w * kernel_h / (group * group);
bool flag_1x1s1p1 = (kernel_w == 1) && (kernel_h == 1) && (stride_h == 1) && bool flag_1x1s1p1 = (kernel_w == 1) && (kernel_h == 1) && (stride_h == 1) &&
......
...@@ -43,7 +43,6 @@ class ElementwiseComputeTester : public arena::TestCase { ...@@ -43,7 +43,6 @@ class ElementwiseComputeTester : public arena::TestCase {
auto* x = scope->FindTensor(inputx_); auto* x = scope->FindTensor(inputx_);
const auto* x_data = x->data<float>(); const auto* x_data = x->data<float>();
auto* y = scope->FindTensor(inputy_);
const auto* y_data = x->data<float>(); const auto* y_data = x->data<float>();
for (int i = 0; i < dims_.production(); i++) { for (int i = 0; i < dims_.production(); i++) {
...@@ -94,7 +93,6 @@ class ElementwiseSubComputeTester : public arena::TestCase { ...@@ -94,7 +93,6 @@ class ElementwiseSubComputeTester : public arena::TestCase {
auto* x = scope->FindTensor(inputx_); auto* x = scope->FindTensor(inputx_);
const auto* x_data = x->data<float>(); const auto* x_data = x->data<float>();
auto* y = scope->FindTensor(inputy_);
const auto* y_data = x->data<float>(); const auto* y_data = x->data<float>();
for (int i = 0; i < dims_.production(); i++) { for (int i = 0; i < dims_.production(); i++) {
...@@ -145,7 +143,6 @@ class ElementwiseMulComputeTester : public arena::TestCase { ...@@ -145,7 +143,6 @@ class ElementwiseMulComputeTester : public arena::TestCase {
auto* x = scope->FindTensor(inputx_); auto* x = scope->FindTensor(inputx_);
const auto* x_data = x->data<float>(); const auto* x_data = x->data<float>();
auto* y = scope->FindTensor(inputy_);
const auto* y_data = x->data<float>(); const auto* y_data = x->data<float>();
for (int i = 0; i < dims_.production(); i++) { for (int i = 0; i < dims_.production(); i++) {
...@@ -196,7 +193,6 @@ class ElementwiseMaxComputeTester : public arena::TestCase { ...@@ -196,7 +193,6 @@ class ElementwiseMaxComputeTester : public arena::TestCase {
auto* x = scope->FindTensor(inputx_); auto* x = scope->FindTensor(inputx_);
const auto* x_data = x->data<float>(); const auto* x_data = x->data<float>();
auto* y = scope->FindTensor(inputy_);
const auto* y_data = x->data<float>(); const auto* y_data = x->data<float>();
for (int i = 0; i < dims_.production(); i++) { for (int i = 0; i < dims_.production(); i++) {
...@@ -249,7 +245,6 @@ class FusionElementwiseAddActivationComputeTester : public arena::TestCase { ...@@ -249,7 +245,6 @@ class FusionElementwiseAddActivationComputeTester : public arena::TestCase {
auto* x = scope->FindTensor(inputx_); auto* x = scope->FindTensor(inputx_);
const auto* x_data = x->data<float>(); const auto* x_data = x->data<float>();
auto* y = scope->FindTensor(inputy_);
const auto* y_data = x->data<float>(); const auto* y_data = x->data<float>();
for (int i = 0; i < dims_.production(); i++) { for (int i = 0; i < dims_.production(); i++) {
...@@ -308,7 +303,6 @@ class FusionElementwiseSubActivationComputeTester : public arena::TestCase { ...@@ -308,7 +303,6 @@ class FusionElementwiseSubActivationComputeTester : public arena::TestCase {
auto* x = scope->FindTensor(inputx_); auto* x = scope->FindTensor(inputx_);
const auto* x_data = x->data<float>(); const auto* x_data = x->data<float>();
auto* y = scope->FindTensor(inputy_);
const auto* y_data = x->data<float>(); const auto* y_data = x->data<float>();
for (int i = 0; i < dims_.production(); i++) { for (int i = 0; i < dims_.production(); i++) {
...@@ -367,7 +361,6 @@ class FusionElementwiseMulActivationComputeTester : public arena::TestCase { ...@@ -367,7 +361,6 @@ class FusionElementwiseMulActivationComputeTester : public arena::TestCase {
auto* x = scope->FindTensor(inputx_); auto* x = scope->FindTensor(inputx_);
const auto* x_data = x->data<float>(); const auto* x_data = x->data<float>();
auto* y = scope->FindTensor(inputy_);
const auto* y_data = x->data<float>(); const auto* y_data = x->data<float>();
for (int i = 0; i < dims_.production(); i++) { for (int i = 0; i < dims_.production(); i++) {
...@@ -426,7 +419,6 @@ class FusionElementwiseMaxActivationComputeTester : public arena::TestCase { ...@@ -426,7 +419,6 @@ class FusionElementwiseMaxActivationComputeTester : public arena::TestCase {
auto* x = scope->FindTensor(inputx_); auto* x = scope->FindTensor(inputx_);
const auto* x_data = x->data<float>(); const auto* x_data = x->data<float>();
auto* y = scope->FindTensor(inputy_);
const auto* y_data = x->data<float>(); const auto* y_data = x->data<float>();
for (int i = 0; i < dims_.production(); i++) { for (int i = 0; i < dims_.production(); i++) {
......
...@@ -51,10 +51,10 @@ class FcOPTest : public arena::TestCase { ...@@ -51,10 +51,10 @@ class FcOPTest : public arena::TestCase {
std::string weight_ = "w"; std::string weight_ = "w";
std::string bias_ = "b"; std::string bias_ = "b";
std::string out_ = "out"; std::string out_ = "out";
int in_num_col_dims_{1};
DDim dims_{{1, 128}}; DDim dims_{{1, 128}};
DDim wdims_{{128, 4}}; DDim wdims_{{128, 4}};
DDim bdims_{{4}}; DDim bdims_{{4}};
int in_num_col_dims_{1};
public: public:
FcOPTest(const Place& place, FcOPTest(const Place& place,
......
...@@ -243,11 +243,11 @@ class GRUUnitTester : public arena::TestCase { ...@@ -243,11 +243,11 @@ class GRUUnitTester : public arena::TestCase {
std::string reset_hidden_prev_ = "reset_hidden_prev"; std::string reset_hidden_prev_ = "reset_hidden_prev";
std::string hidden_ = "hidden"; std::string hidden_ = "hidden";
DDim dims_{{16, 256 * 3}};
// 0: indentity; 1: sigmoid; 2: tanh; 3: relu // 0: indentity; 1: sigmoid; 2: tanh; 3: relu
int gate_activation_{1}; int gate_activation_{1};
int activation_{2}; int activation_{2};
bool origin_mode_{false}; bool origin_mode_{false};
DDim dims_{{16, 256 * 3}};
public: public:
GRUUnitTester(const Place& place, GRUUnitTester(const Place& place,
......
...@@ -123,7 +123,6 @@ class LrnComputeTester : public arena::TestCase { ...@@ -123,7 +123,6 @@ class LrnComputeTester : public arena::TestCase {
int H = dims_[2]; int H = dims_[2];
int W = dims_[3]; int W = dims_[3];
int pre_pad = (local_size_ - 1) / 2;
int offset_num = 0; int offset_num = 0;
int offset_within_channel = 0; int offset_within_channel = 0;
int dst_id; int dst_id;
......
...@@ -120,12 +120,12 @@ class MatMulComputeTester : public arena::TestCase { ...@@ -120,12 +120,12 @@ class MatMulComputeTester : public arena::TestCase {
// common attributes for this op. // common attributes for this op.
std::string x_ = "X"; std::string x_ = "X";
std::string y_ = "Y"; std::string y_ = "Y";
std::string out_ = "Out";
DDim x_dims_;
DDim y_dims_;
bool x_transpose_; bool x_transpose_;
bool y_transpose_; bool y_transpose_;
float alpha_; float alpha_;
std::string out_ = "Out";
DDim x_dims_;
DDim y_dims_;
public: public:
MatMulComputeTester(const Place& place, MatMulComputeTester(const Place& place,
......
...@@ -26,8 +26,8 @@ class Pad2dComputeTester : public arena::TestCase { ...@@ -26,8 +26,8 @@ class Pad2dComputeTester : public arena::TestCase {
std::string input_ = "X"; std::string input_ = "X";
std::string output_ = "Out"; std::string output_ = "Out";
DDim dims_{{1, 1, 14, 14}}; DDim dims_{{1, 1, 14, 14}};
std::vector<int> paddings_;
std::string mode_{"constant"}; std::string mode_{"constant"};
std::vector<int> paddings_;
float pad_value_ = 0.f; float pad_value_ = 0.f;
std::string data_format_{"NCHW"}; std::string data_format_{"NCHW"};
......
...@@ -125,7 +125,6 @@ void prior_box_compute_ref(const lite::Tensor* input, ...@@ -125,7 +125,6 @@ void prior_box_compute_ref(const lite::Tensor* input,
if (fixed_size_.size() > 0) { if (fixed_size_.size() > 0) {
for (int s = 0; s < fixed_size_.size(); ++s) { for (int s = 0; s < fixed_size_.size(); ++s) {
int fixed_size = fixed_size_[s]; int fixed_size = fixed_size_[s];
int com_idx = 0;
box_width = fixed_size; box_width = fixed_size;
box_height = fixed_size; box_height = fixed_size;
......
...@@ -28,7 +28,7 @@ void reduce_n(const float* src, ...@@ -28,7 +28,7 @@ void reduce_n(const float* src,
int width_in) { int width_in) {
int hw_size = height_in * width_in; int hw_size = height_in * width_in;
int chw_size = channel_in * hw_size; int chw_size = channel_in * hw_size;
int data_index, src_index, src_index0; int data_index, src_index;
for (int c = 0; c < channel_in; ++c) { for (int c = 0; c < channel_in; ++c) {
for (int h = 0; h < height_in; ++h) { for (int h = 0; h < height_in; ++h) {
for (int w = 0; w < width_in; ++w) { for (int w = 0; w < width_in; ++w) {
...@@ -196,9 +196,9 @@ class ReduceMaxComputeTester : public arena::TestCase { ...@@ -196,9 +196,9 @@ class ReduceMaxComputeTester : public arena::TestCase {
std::string input_ = "x"; std::string input_ = "x";
std::string output_ = "out"; std::string output_ = "out";
std::vector<int> dim_{0}; std::vector<int> dim_{0};
DDim x_dims_{{3, 2, 3, 4}};
bool keep_dim_ = false; bool keep_dim_ = false;
bool reduce_all_ = false; bool reduce_all_ = false;
DDim x_dims_{{3, 2, 3, 4}};
public: public:
ReduceMaxComputeTester(const Place& place, ReduceMaxComputeTester(const Place& place,
......
...@@ -28,7 +28,7 @@ void reduce_mean_n(const float* src, ...@@ -28,7 +28,7 @@ void reduce_mean_n(const float* src,
int width_in) { int width_in) {
int hw_size = height_in * width_in; int hw_size = height_in * width_in;
int chw_size = channel_in * hw_size; int chw_size = channel_in * hw_size;
int data_index, src_index, src_index0; int data_index, src_index;
for (int c = 0; c < channel_in; ++c) { for (int c = 0; c < channel_in; ++c) {
for (int h = 0; h < height_in; ++h) { for (int h = 0; h < height_in; ++h) {
for (int w = 0; w < width_in; ++w) { for (int w = 0; w < width_in; ++w) {
...@@ -195,8 +195,8 @@ class ReduceMeanComputeTester : public arena::TestCase { ...@@ -195,8 +195,8 @@ class ReduceMeanComputeTester : public arena::TestCase {
std::string input_ = "x"; std::string input_ = "x";
std::string output_ = "out"; std::string output_ = "out";
std::vector<int> dim_{0}; std::vector<int> dim_{0};
DDim x_dims_{{3, 2, 3, 4}};
bool keep_dim_ = false; bool keep_dim_ = false;
DDim x_dims_{{3, 2, 3, 4}};
bool reduce_all_ = false; bool reduce_all_ = false;
public: public:
......
...@@ -25,10 +25,10 @@ class SequenceExpandComputeTester : public arena::TestCase { ...@@ -25,10 +25,10 @@ class SequenceExpandComputeTester : public arena::TestCase {
const std::string input_x_ = "x"; const std::string input_x_ = "x";
const std::string input_y_ = "y"; const std::string input_y_ = "y";
const std::string output_ = "out"; const std::string output_ = "out";
int ref_level_ = -1;
DDim dims_{{4, 1}};
LoD lod_x_{{0, 2, 4}}; LoD lod_x_{{0, 2, 4}};
LoD lod_y_{{0, 1, 4}}; LoD lod_y_{{0, 1, 4}};
int ref_level_ = -1;
DDim dims_{{4, 1}};
public: public:
SequenceExpandComputeTester(const Place& place, SequenceExpandComputeTester(const Place& place,
...@@ -50,7 +50,6 @@ class SequenceExpandComputeTester : public arena::TestCase { ...@@ -50,7 +50,6 @@ class SequenceExpandComputeTester : public arena::TestCase {
const auto* x_data = x->data<float>(); const auto* x_data = x->data<float>();
(x->mutable_lod())->clear(); (x->mutable_lod())->clear();
(x->mutable_lod())->push_back(lod_x_[0]); (x->mutable_lod())->push_back(lod_x_[0]);
int x_rank = dims_.size();
auto width = x->numel() / dims_[0]; auto width = x->numel() / dims_[0];
auto lod_x = x->lod(); auto lod_x = x->lod();
...@@ -59,7 +58,6 @@ class SequenceExpandComputeTester : public arena::TestCase { ...@@ -59,7 +58,6 @@ class SequenceExpandComputeTester : public arena::TestCase {
for (int i = 0; i < lod_y_.size(); i++) { for (int i = 0; i < lod_y_.size(); i++) {
(y->mutable_lod())->push_back(lod_y_[i]); (y->mutable_lod())->push_back(lod_y_[i]);
} }
const auto* y_data = y->data<float>();
if (ref_level_ == -1) { if (ref_level_ == -1) {
ref_level_ = lod_y_.size() - 1; ref_level_ = lod_y_.size() - 1;
} }
......
...@@ -25,9 +25,9 @@ class SequencePoolComputeTester : public arena::TestCase { ...@@ -25,9 +25,9 @@ class SequencePoolComputeTester : public arena::TestCase {
// common attributes for this op. // common attributes for this op.
std::string input_ = "x"; std::string input_ = "x";
std::string output_ = "out"; std::string output_ = "out";
DDim dims_{{5, 1}};
LoD lod_{{0, 2, 5}}; LoD lod_{{0, 2, 5}};
std::string pool_type_ = "SUM"; std::string pool_type_ = "SUM";
DDim dims_{{5, 1}};
public: public:
SequencePoolComputeTester(const Place& place, SequencePoolComputeTester(const Place& place,
......
...@@ -60,10 +60,6 @@ bool test_gemm_int8(bool tra, ...@@ -60,10 +60,6 @@ bool test_gemm_int8(bool tra,
Tensor tc_basic_fp32; Tensor tc_basic_fp32;
Tensor tbias; Tensor tbias;
int lda = tra ? m : k;
int ldb = trb ? k : n;
int ldc = n;
ta.Resize({m, k}); ta.Resize({m, k});
tb.Resize({k, n}); tb.Resize({k, n});
tc_int8.Resize({m, n}); tc_int8.Resize({m, n});
...@@ -94,6 +90,16 @@ bool test_gemm_int8(bool tra, ...@@ -94,6 +90,16 @@ bool test_gemm_int8(bool tra,
scale_merge_int8[j] = scale_merge_fp32[j] / scale_c[0]; scale_merge_int8[j] = scale_merge_fp32[j] / scale_c[0];
} }
LOG(INFO) << "gemm_int8 M: " << m << ", N: " << n << ", K: " << k
<< ", transA: " << (tra ? "true" : "false")
<< ", transB: " << (trb ? "true" : "false")
<< ", relu: " << (has_relu ? "true" : "false")
<< ", bias: " << (has_bias ? "true" : "false");
#ifdef LITE_WITH_ARM
int lda = tra ? m : k;
int ldb = trb ? k : n;
int ldc = n;
auto da = ta.mutable_data<int8_t>(); auto da = ta.mutable_data<int8_t>();
auto db = tb.mutable_data<int8_t>(); auto db = tb.mutable_data<int8_t>();
auto dc_int8 = tc_int8.mutable_data<int8_t>(); auto dc_int8 = tc_int8.mutable_data<int8_t>();
...@@ -102,12 +108,6 @@ bool test_gemm_int8(bool tra, ...@@ -102,12 +108,6 @@ bool test_gemm_int8(bool tra,
auto dc_basic_fp32 = tc_basic_fp32.mutable_data<float>(); auto dc_basic_fp32 = tc_basic_fp32.mutable_data<float>();
auto dbias = tbias.mutable_data<float>(); auto dbias = tbias.mutable_data<float>();
LOG(INFO) << "gemm_int8 M: " << m << ", N: " << n << ", K: " << k
<< ", transA: " << (tra ? "true" : "false")
<< ", transB: " << (trb ? "true" : "false")
<< ", relu: " << (has_relu ? "true" : "false")
<< ", bias: " << (has_bias ? "true" : "false");
#ifdef LITE_WITH_ARM
if (FLAGS_check_result) { if (FLAGS_check_result) {
Tensor ta_fp32; Tensor ta_fp32;
Tensor tb_fp32; Tensor tb_fp32;
......
...@@ -42,7 +42,7 @@ function prepare_workspace { ...@@ -42,7 +42,7 @@ function prepare_workspace {
cp ../${DEBUG_TOOL_PATH_PREFIX}/analysis_tool.py ./${DEBUG_TOOL_PATH_PREFIX}/ cp ../${DEBUG_TOOL_PATH_PREFIX}/analysis_tool.py ./${DEBUG_TOOL_PATH_PREFIX}/
# clone submodule # clone submodule
#git submodule update --init --recursive # git submodule update --init --recursive
prepare_thirdparty prepare_thirdparty
} }
......
lite_cc_library(debug_utils SRCS debug_utils.cc DEPS op_params model_parser) lite_cc_library(debug_utils SRCS debug_utils.cc DEPS op_params model_parser)
lite_cc_binary(lite_model_debug_tool SRCS model_debug_tool.cc if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK OR LITE_ON_MODEL_OPTIMIZE_TOOL)
lite_cc_binary(lite_model_debug_tool SRCS model_debug_tool.cc
DEPS DEPS
cxx_api cxx_api
debug_utils debug_utils
target_wrapper_host target_wrapper_host
mir_passes mir_passes
gflags gflags
logging
${ops} ${host_kernels} ${ops} ${host_kernels}
X86_DEPS ${x86_kernels} X86_DEPS ${x86_kernels}
ARM_DEPS ${arm_kernels} ARM_DEPS ${arm_kernels}
NPU_DEPS ${npu_kernels} NPU_DEPS ${npu_kernels}
FPGA_DEPS ${fpga_kernels} FPGA_DEPS ${fpga_kernels}
CL_DEPS ${opencl_kernels}) CL_DEPS ${opencl_kernels})
endif()
...@@ -16,9 +16,6 @@ ...@@ -16,9 +16,6 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "lite/api/cxx_api.h" #include "lite/api/cxx_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/core/op_registry.h" #include "lite/core/op_registry.h"
#include "lite/model_parser/model_parser.h" #include "lite/model_parser/model_parser.h"
#include "lite/model_parser/pb/program_desc.h" #include "lite/model_parser/pb/program_desc.h"
...@@ -47,6 +44,9 @@ void Run(DebugConfig* conf) { ...@@ -47,6 +44,9 @@ void Run(DebugConfig* conf) {
#endif #endif
#ifdef LITE_WITH_FPGA #ifdef LITE_WITH_FPGA
Place{TARGET(kFPGA), PRECISION(kFloat)}, Place{TARGET(kFPGA), PRECISION(kFloat)},
#endif
#ifdef LITE_WITH_CUDA
Place{TARGET(kCUDA), PRECISION(kFloat)},
#endif #endif
}); });
...@@ -68,6 +68,12 @@ void Run(DebugConfig* conf) { ...@@ -68,6 +68,12 @@ void Run(DebugConfig* conf) {
#endif #endif
#ifdef LITE_WITH_X86 #ifdef LITE_WITH_X86
Place{TARGET(kX86), PRECISION(kFloat)}, Place{TARGET(kX86), PRECISION(kFloat)},
#endif
#ifdef LITE_WITH_FPGA
Place{TARGET(kFPGA), PRECISION(kFloat)},
#endif
#ifdef LITE_WITH_CUDA
Place{TARGET(kCUDA), PRECISION(kFloat)},
#endif #endif
valid_places, valid_places,
passes); passes);
......
...@@ -18,6 +18,9 @@ ...@@ -18,6 +18,9 @@
*/ */
#pragma once #pragma once
#ifndef _LOGGING_H_
#define _LOGGING_H_
#include <assert.h> #include <assert.h>
#include <sys/time.h> #include <sys/time.h>
#include <sys/types.h> #include <sys/types.h>
...@@ -183,3 +186,4 @@ class VoidifyFatal : public Voidify { ...@@ -183,3 +186,4 @@ class VoidifyFatal : public Voidify {
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
#endif
...@@ -35,5 +35,5 @@ ...@@ -35,5 +35,5 @@
CHECK_GT((a), (b)) << paddle::lite::string_format("" __VA_ARGS__); CHECK_GT((a), (b)) << paddle::lite::string_format("" __VA_ARGS__);
#ifndef PADDLE_THROW #ifndef PADDLE_THROW
#define PADDLE_THROW #define PADDLE_THROW(...) printf("" __VA_ARGS__);
#endif #endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册