未验证 提交 c74a68b7 编写于 作者: myq406450149's avatar myq406450149 提交者: GitHub

Merge branch 'develop' into gpu

...@@ -18,6 +18,7 @@ endif() ...@@ -18,6 +18,7 @@ endif()
set(ANDROID TRUE) set(ANDROID TRUE)
add_definitions(-DLITE_WITH_LINUX) add_definitions(-DLITE_WITH_LINUX)
add_definitions(-DLITE_WITH_ANDROID)
if(NOT DEFINED ANDROID_NDK) if(NOT DEFINED ANDROID_NDK)
set(ANDROID_NDK $ENV{NDK_ROOT}) set(ANDROID_NDK $ENV{NDK_ROOT})
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "lite/api/cxx_api.h" #include "lite/api/cxx_api.h"
#include <algorithm>
#include <memory> #include <memory>
#include <string> #include <string>
#include <utility> #include <utility>
...@@ -52,35 +53,36 @@ lite::Tensor *Predictor::GetInput(size_t offset) { ...@@ -52,35 +53,36 @@ lite::Tensor *Predictor::GetInput(size_t offset) {
} }
// get inputs names // get inputs names
std::vector<std::string> Predictor::GetInputNames() { const std::vector<std::string> &Predictor::GetInputNames() {
std::vector<std::string> input_names; return input_names_;
for (auto &item : input_names_) {
input_names.push_back(item.second);
}
return input_names;
} }
// get outputnames // get outputnames
std::vector<std::string> Predictor::GetOutputNames() { const std::vector<std::string> &Predictor::GetOutputNames() {
std::vector<std::string> output_names; return output_names_;
for (auto &item : output_names_) {
output_names.push_back(item.second);
}
return output_names;
} }
// append the names of inputs and outputs into input_names_ and output_names_ // append the names of inputs and outputs into input_names_ and output_names_
void Predictor::PrepareFeedFetch() { void Predictor::PrepareFeedFetch() {
auto current_block = program_desc_.GetBlock<cpp::BlockDesc>(0); auto current_block = program_desc_.GetBlock<cpp::BlockDesc>(0);
std::vector<cpp::OpDesc *> feeds;
std::vector<cpp::OpDesc *> fetchs;
for (int i = 0; i < current_block->OpsSize(); i++) { for (int i = 0; i < current_block->OpsSize(); i++) {
auto op = current_block->GetOp<cpp::OpDesc>(i); auto op = current_block->GetOp<cpp::OpDesc>(i);
if (op->Type() == "feed") { if (op->Type() == "feed") {
int idx = op->GetAttr<int>("col"); feeds.push_back(op);
input_names_[idx] = op->Output("Out").front();
idx2feeds_[op->Output("Out").front()] = idx;
} else if (op->Type() == "fetch") { } else if (op->Type() == "fetch") {
int idx = op->GetAttr<int>("col"); fetchs.push_back(op);
output_names_[idx] = op->Input("X").front();
} }
} }
input_names_.resize(feeds.size());
output_names_.resize(fetchs.size());
for (int i = 0; i < feeds.size(); i++) {
input_names_[feeds[i]->GetAttr<int>("col")] =
feeds[i]->Output("Out").front();
}
for (int i = 0; i < fetchs.size(); i++) {
output_names_[fetchs[i]->GetAttr<int>("col")] =
fetchs[i]->Input("X").front();
}
} }
const lite::Tensor *Predictor::GetOutput(size_t offset) const { const lite::Tensor *Predictor::GetOutput(size_t offset) const {
...@@ -189,16 +191,17 @@ const lite::Tensor *Predictor::GetTensor(const std::string &name) const { ...@@ -189,16 +191,17 @@ const lite::Tensor *Predictor::GetTensor(const std::string &name) const {
} }
// get input by name // get input by name
lite::Tensor *Predictor::GetInputByName(const std::string &name) { lite::Tensor *Predictor::GetInputByName(const std::string &name) {
if (idx2feeds_.find(name) == idx2feeds_.end()) { auto element = std::find(input_names_.begin(), input_names_.end(), name);
if (element == input_names_.end()) {
LOG(ERROR) << "Model do not have input named with: [" << name LOG(ERROR) << "Model do not have input named with: [" << name
<< "], model's inputs include:"; << "], model's inputs include:";
for (int i = 0; i < input_names_.size(); i++) { for (int i = 0; i < input_names_.size(); i++) {
LOG(ERROR) << "[" << input_names_[i] << "]"; LOG(ERROR) << "[" << input_names_[i] << "]";
} }
return NULL; return nullptr;
} else { } else {
int idx = idx2feeds_[name]; int position = std::distance(input_names_.begin(), element);
return GetInput(idx); return GetInput(position);
} }
} }
......
...@@ -74,8 +74,8 @@ class LITE_API Predictor { ...@@ -74,8 +74,8 @@ class LITE_API Predictor {
// get input by name. // get input by name.
lite::Tensor* GetInputByName(const std::string& name); lite::Tensor* GetInputByName(const std::string& name);
// get inputnames and get outputnames. // get inputnames and get outputnames.
std::vector<std::string> GetInputNames(); const std::vector<std::string>& GetInputNames();
std::vector<std::string> GetOutputNames(); const std::vector<std::string>& GetOutputNames();
void PrepareFeedFetch(); void PrepareFeedFetch();
// Get offset-th col of fetch results. // Get offset-th col of fetch results.
...@@ -107,9 +107,8 @@ class LITE_API Predictor { ...@@ -107,9 +107,8 @@ class LITE_API Predictor {
const Scope* exec_scope_; const Scope* exec_scope_;
std::unique_ptr<RuntimeProgram> program_; std::unique_ptr<RuntimeProgram> program_;
bool program_generated_{false}; bool program_generated_{false};
std::map<size_t, std::string> input_names_; std::vector<std::string> input_names_;
std::map<std::string, size_t> idx2feeds_; std::vector<std::string> output_names_;
std::map<size_t, std::string> output_names_;
}; };
/* /*
......
...@@ -37,8 +37,8 @@ class CxxPaddleApiImpl : public lite_api::PaddlePredictor { ...@@ -37,8 +37,8 @@ class CxxPaddleApiImpl : public lite_api::PaddlePredictor {
std::string GetVersion() const override; std::string GetVersion() const override;
// get inputs names and get outputs names // get inputs names and get outputs names
std::vector<std::string> GetInputNames() override; const std::vector<std::string> &GetInputNames() override;
std::vector<std::string> GetOutputNames() override; const std::vector<std::string> &GetOutputNames() override;
std::unique_ptr<const lite_api::Tensor> GetTensor( std::unique_ptr<const lite_api::Tensor> GetTensor(
const std::string &name) const override; const std::string &name) const override;
...@@ -76,11 +76,11 @@ std::unique_ptr<const lite_api::Tensor> CxxPaddleApiImpl::GetOutput( ...@@ -76,11 +76,11 @@ std::unique_ptr<const lite_api::Tensor> CxxPaddleApiImpl::GetOutput(
return std::unique_ptr<lite_api::Tensor>(new lite_api::Tensor(x)); return std::unique_ptr<lite_api::Tensor>(new lite_api::Tensor(x));
} }
std::vector<std::string> CxxPaddleApiImpl::GetInputNames() { const std::vector<std::string> &CxxPaddleApiImpl::GetInputNames() {
return raw_predictor_.GetInputNames(); return raw_predictor_.GetInputNames();
} }
std::vector<std::string> CxxPaddleApiImpl::GetOutputNames() { const std::vector<std::string> &CxxPaddleApiImpl::GetOutputNames() {
return raw_predictor_.GetOutputNames(); return raw_predictor_.GetOutputNames();
} }
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "lite/api/light_api.h" #include "lite/api/light_api.h"
#include <algorithm>
namespace paddle { namespace paddle {
namespace lite { namespace lite {
...@@ -56,16 +57,17 @@ Tensor* LightPredictor::GetInput(size_t offset) { ...@@ -56,16 +57,17 @@ Tensor* LightPredictor::GetInput(size_t offset) {
// get input by name // get input by name
Tensor* LightPredictor::GetInputByName(const std::string& name) { Tensor* LightPredictor::GetInputByName(const std::string& name) {
if (idx2feeds_.find(name) == idx2feeds_.end()) { auto element = std::find(input_names_.begin(), input_names_.end(), name);
if (element == input_names_.end()) {
LOG(ERROR) << "Model do not have input named with: [" << name LOG(ERROR) << "Model do not have input named with: [" << name
<< "], model's inputs include:"; << "], model's inputs include:";
for (int i = 0; i < input_names_.size(); i++) { for (int i = 0; i < input_names_.size(); i++) {
LOG(ERROR) << "[" << input_names_[i] << "]"; LOG(ERROR) << "[" << input_names_[i] << "]";
} }
return NULL; return nullptr;
} else { } else {
int idx = idx2feeds_[name]; int position = std::distance(input_names_.begin(), element);
return GetInput(idx); return GetInput(position);
} }
} }
...@@ -79,35 +81,36 @@ const Tensor* LightPredictor::GetOutput(size_t offset) { ...@@ -79,35 +81,36 @@ const Tensor* LightPredictor::GetOutput(size_t offset) {
return out_var->GetMutable<lite::Tensor>(); return out_var->GetMutable<lite::Tensor>();
} }
// get inputs names // get inputs names
std::vector<std::string> LightPredictor::GetInputNames() { const std::vector<std::string>& LightPredictor::GetInputNames() {
std::vector<std::string> input_names; return input_names_;
for (auto& item : input_names_) {
input_names.push_back(item.second);
}
return input_names;
} }
// get outputnames // get outputnames
std::vector<std::string> LightPredictor::GetOutputNames() { const std::vector<std::string>& LightPredictor::GetOutputNames() {
std::vector<std::string> output_names; return output_names_;
for (auto& item : output_names_) {
output_names.push_back(item.second);
}
return output_names;
} }
// append the names of inputs and outputs into input_names_ and output_names_ // append the names of inputs and outputs into input_names_ and output_names_
void LightPredictor::PrepareFeedFetch() { void LightPredictor::PrepareFeedFetch() {
auto current_block = cpp_program_desc_.GetBlock<cpp::BlockDesc>(0); auto current_block = cpp_program_desc_.GetBlock<cpp::BlockDesc>(0);
std::vector<cpp::OpDesc*> feeds;
std::vector<cpp::OpDesc*> fetchs;
for (int i = 0; i < current_block->OpsSize(); i++) { for (int i = 0; i < current_block->OpsSize(); i++) {
auto op = current_block->GetOp<cpp::OpDesc>(i); auto op = current_block->GetOp<cpp::OpDesc>(i);
if (op->Type() == "feed") { if (op->Type() == "feed") {
int idx = op->GetAttr<int>("col"); feeds.push_back(op);
input_names_[idx] = op->Output("Out").front();
idx2feeds_[op->Output("Out").front()] = idx;
} else if (op->Type() == "fetch") { } else if (op->Type() == "fetch") {
int idx = op->GetAttr<int>("col"); fetchs.push_back(op);
output_names_[idx] = op->Input("X").front();
} }
} }
input_names_.resize(feeds.size());
output_names_.resize(fetchs.size());
for (int i = 0; i < feeds.size(); i++) {
input_names_[feeds[i]->GetAttr<int>("col")] =
feeds[i]->Output("Out").front();
}
for (int i = 0; i < fetchs.size(); i++) {
output_names_[fetchs[i]->GetAttr<int>("col")] =
fetchs[i]->Input("X").front();
}
} }
void LightPredictor::BuildRuntimeProgram(const cpp::ProgramDesc& prog) { void LightPredictor::BuildRuntimeProgram(const cpp::ProgramDesc& prog) {
......
...@@ -64,8 +64,8 @@ class LITE_API LightPredictor { ...@@ -64,8 +64,8 @@ class LITE_API LightPredictor {
} }
// get inputnames and get outputnames. // get inputnames and get outputnames.
std::vector<std::string> GetInputNames(); const std::vector<std::string>& GetInputNames();
std::vector<std::string> GetOutputNames(); const std::vector<std::string>& GetOutputNames();
void PrepareFeedFetch(); void PrepareFeedFetch();
private: private:
...@@ -82,9 +82,8 @@ class LITE_API LightPredictor { ...@@ -82,9 +82,8 @@ class LITE_API LightPredictor {
std::shared_ptr<Scope> scope_; std::shared_ptr<Scope> scope_;
std::unique_ptr<RuntimeProgram> program_; std::unique_ptr<RuntimeProgram> program_;
cpp::ProgramDesc cpp_program_desc_; cpp::ProgramDesc cpp_program_desc_;
std::map<size_t, std::string> input_names_; std::vector<std::string> input_names_;
std::map<std::string, size_t> idx2feeds_; std::vector<std::string> output_names_;
std::map<size_t, std::string> output_names_;
}; };
} // namespace lite } // namespace lite
......
...@@ -32,8 +32,8 @@ class LightPredictorImpl : public PaddlePredictor { ...@@ -32,8 +32,8 @@ class LightPredictorImpl : public PaddlePredictor {
void Run() override; void Run() override;
std::string GetVersion() const override; std::string GetVersion() const override;
std::vector<std::string> GetInputNames() override; const std::vector<std::string>& GetInputNames() override;
std::vector<std::string> GetOutputNames() override; const std::vector<std::string>& GetOutputNames() override;
std::unique_ptr<const Tensor> GetTensor( std::unique_ptr<const Tensor> GetTensor(
const std::string& name) const override; const std::string& name) const override;
...@@ -78,11 +78,11 @@ std::unique_ptr<Tensor> LightPredictorImpl::GetInputByName( ...@@ -78,11 +78,11 @@ std::unique_ptr<Tensor> LightPredictorImpl::GetInputByName(
new Tensor(raw_predictor_->GetInputByName(name))); new Tensor(raw_predictor_->GetInputByName(name)));
} }
std::vector<std::string> LightPredictorImpl::GetInputNames() { const std::vector<std::string>& LightPredictorImpl::GetInputNames() {
return raw_predictor_->GetInputNames(); return raw_predictor_->GetInputNames();
} }
std::vector<std::string> LightPredictorImpl::GetOutputNames() { const std::vector<std::string>& LightPredictorImpl::GetOutputNames() {
return raw_predictor_->GetOutputNames(); return raw_predictor_->GetOutputNames();
} }
......
...@@ -36,12 +36,14 @@ TEST(LightAPI, load) { ...@@ -36,12 +36,14 @@ TEST(LightAPI, load) {
data[i] = i; data[i] = i;
} }
std::vector<std::string> inputs = predictor.GetInputNames(); predictor.PrepareFeedFetch();
const std::vector<std::string>& inputs = predictor.GetInputNames();
LOG(INFO) << "input size: " << inputs.size(); LOG(INFO) << "input size: " << inputs.size();
for (int i = 0; i < inputs.size(); i++) { for (int i = 0; i < inputs.size(); i++) {
LOG(INFO) << "inputnames: " << inputs[i]; LOG(INFO) << "inputnames: " << inputs[i];
} }
std::vector<std::string> outputs = predictor.GetOutputNames(); const std::vector<std::string>& outputs = predictor.GetOutputNames();
for (int i = 0; i < outputs.size(); i++) { for (int i = 0; i < outputs.size(); i++) {
LOG(INFO) << "outputnames: " << outputs[i]; LOG(INFO) << "outputnames: " << outputs[i];
} }
......
...@@ -75,9 +75,9 @@ class LITE_API PaddlePredictor { ...@@ -75,9 +75,9 @@ class LITE_API PaddlePredictor {
virtual std::string GetVersion() const = 0; virtual std::string GetVersion() const = 0;
// Get input names // Get input names
virtual std::vector<std::string> GetInputNames() = 0; virtual const std::vector<std::string>& GetInputNames() = 0;
// Get output names // Get output names
virtual std::vector<std::string> GetOutputNames() = 0; virtual const std::vector<std::string>& GetOutputNames() = 0;
// Get Input by name // Get Input by name
virtual std::unique_ptr<Tensor> GetInputByName(const std::string& name) = 0; virtual std::unique_ptr<Tensor> GetInputByName(const std::string& name) = 0;
......
...@@ -37,12 +37,12 @@ TEST(CxxApi, run) { ...@@ -37,12 +37,12 @@ TEST(CxxApi, run) {
LOG(INFO) << "Version: " << predictor->GetVersion(); LOG(INFO) << "Version: " << predictor->GetVersion();
std::vector<std::string> inputs = predictor->GetInputNames(); auto& inputs = predictor->GetInputNames();
LOG(INFO) << "input size: " << inputs.size(); LOG(INFO) << "input size: " << inputs.size();
for (int i = 0; i < inputs.size(); i++) { for (int i = 0; i < inputs.size(); i++) {
LOG(INFO) << "inputnames: " << inputs[i]; LOG(INFO) << "inputnames: " << inputs[i];
} }
std::vector<std::string> outputs = predictor->GetOutputNames(); auto& outputs = predictor->GetOutputNames();
for (int i = 0; i < outputs.size(); i++) { for (int i = 0; i < outputs.size(); i++) {
LOG(INFO) << "outputnames: " << outputs[i]; LOG(INFO) << "outputnames: " << outputs[i];
} }
...@@ -76,14 +76,14 @@ TEST(LightApi, run) { ...@@ -76,14 +76,14 @@ TEST(LightApi, run) {
auto predictor = lite_api::CreatePaddlePredictor(config); auto predictor = lite_api::CreatePaddlePredictor(config);
std::vector<std::string> inputs = predictor->GetInputNames(); auto& inputs = predictor->GetInputNames();
LOG(INFO) << "input size: " << inputs.size(); LOG(INFO) << "input size: " << inputs.size();
for (int i = 0; i < inputs.size(); i++) { for (int i = 0; i < inputs.size(); i++) {
LOG(INFO) << "inputnames: " << inputs[i]; LOG(INFO) << "inputnames: " << inputs.at(i);
} }
std::vector<std::string> outputs = predictor->GetOutputNames(); auto& outputs = predictor->GetOutputNames();
for (int i = 0; i < outputs.size(); i++) { for (int i = 0; i < outputs.size(); i++) {
LOG(INFO) << "outputnames: " << outputs[i]; LOG(INFO) << "outputnames: " << outputs.at(i);
} }
LOG(INFO) << "Version: " << predictor->GetVersion(); LOG(INFO) << "Version: " << predictor->GetVersion();
......
...@@ -35,6 +35,9 @@ ...@@ -35,6 +35,9 @@
#include <sys/syscall.h> #include <sys/syscall.h>
#include <unistd.h> #include <unistd.h>
#endif #endif
#ifdef LITE_WITH_ANDROID
#include <sys/system_properties.h>
#endif
#if __APPLE__ #if __APPLE__
#include "TargetConditionals.h" #include "TargetConditionals.h"
#if LITE_WITH_IPHONE #if LITE_WITH_IPHONE
...@@ -218,6 +221,7 @@ void get_cpu_arch(std::vector<ARMArch>* archs, const int cpu_num) { ...@@ -218,6 +221,7 @@ void get_cpu_arch(std::vector<ARMArch>* archs, const int cpu_num) {
#ifdef LITE_WITH_LINUX #ifdef LITE_WITH_LINUX
std::string get_cpu_name() { std::string get_cpu_name() {
std::string cpu_name;
FILE* fp = fopen("/proc/cpuinfo", "rb"); FILE* fp = fopen("/proc/cpuinfo", "rb");
if (!fp) { if (!fp) {
return ""; return "";
...@@ -229,12 +233,23 @@ std::string get_cpu_name() { ...@@ -229,12 +233,23 @@ std::string get_cpu_name() {
break; break;
} }
if (strstr(line, "Hardware") != NULL) { if (strstr(line, "Hardware") != NULL) {
fclose(fp); cpu_name = std::string(line);
return std::string(line);
} }
} }
#ifdef LITE_WITH_ANDROID
// cpu name concat board name, platform name and chip name
char board_name[128];
char platform_name[128];
char chip_name[128];
__system_property_get("ro.product.board", board_name);
__system_property_get("ro.board.platform", platform_name);
__system_property_get("ro.chipname", chip_name);
cpu_name =
cpu_name + "_" + board_name + "_" + platform_name + "_" + chip_name;
#endif
std::transform(cpu_name.begin(), cpu_name.end(), cpu_name.begin(), ::toupper);
fclose(fp); fclose(fp);
return ""; return cpu_name;
} }
int get_min_freq_khz(int cpuid) { int get_min_freq_khz(int cpuid) {
...@@ -780,7 +795,9 @@ bool DeviceInfo::SetCPUInfoByName() { ...@@ -780,7 +795,9 @@ bool DeviceInfo::SetCPUInfoByName() {
cluster_ids_ = {0, 0, 0, 0}; cluster_ids_ = {0, 0, 0, 0};
SetArchInfo(1, kA53); SetArchInfo(1, kA53);
return true; return true;
} else if (dev_name_.find("KIRIN980") != std::string::npos) { // Kirin 980 } else if (dev_name_.find("KIRIN980") != std::string::npos ||
dev_name_.find("KIRIN990") !=
std::string::npos) { // Kirin 980, Kirin 990
core_num_ = 8; core_num_ = 8;
core_ids_ = {0, 1, 2, 3, 4, 5, 6, 7}; core_ids_ = {0, 1, 2, 3, 4, 5, 6, 7};
big_core_ids_ = {4, 5, 6, 7}; big_core_ids_ = {4, 5, 6, 7};
...@@ -1109,7 +1126,8 @@ void DeviceInfo::SetCache(int l1size, int l2size, int l3size) { ...@@ -1109,7 +1126,8 @@ void DeviceInfo::SetCache(int l1size, int l2size, int l3size) {
} }
bool DeviceInfo::ExtendWorkspace(size_t size) { bool DeviceInfo::ExtendWorkspace(size_t size) {
workspace_.Resize({size + llc_size()}); workspace_.Resize(
{static_cast<int64_t>(size + static_cast<size_t>(llc_size()))});
return workspace_.mutable_data<int8_t>() != nullptr; return workspace_.mutable_data<int8_t>() != nullptr;
} }
......
...@@ -82,6 +82,10 @@ Type StdTypeToRepr<double>() { ...@@ -82,6 +82,10 @@ Type StdTypeToRepr<double>() {
return Type::_float64; return Type::_float64;
} }
template <> template <>
Type StdTypeToRepr<std::vector<char>>() {
return Type::_char_list;
}
template <>
Type StdTypeToRepr<std::string>() { Type StdTypeToRepr<std::string>() {
return Type::_string; return Type::_string;
} }
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <stack> #include <stack>
#include <string> #include <string>
#include <vector>
#include "lite/api/paddle_place.h" #include "lite/api/paddle_place.h"
#include "lite/utils/all.h" #include "lite/utils/all.h"
...@@ -36,7 +37,9 @@ enum class Type { ...@@ -36,7 +37,9 @@ enum class Type {
_float64, _float64,
_bool, _bool,
_string, _string,
// primary list types // primary list type
_char_list,
// list types
_list, _list,
// enum type // enum type
_enum, _enum,
...@@ -89,6 +92,8 @@ Type StdTypeToRepr<float>(); ...@@ -89,6 +92,8 @@ Type StdTypeToRepr<float>();
template <> template <>
Type StdTypeToRepr<bool>(); Type StdTypeToRepr<bool>();
template <> template <>
Type StdTypeToRepr<std::vector<char>>();
template <>
Type StdTypeToRepr<std::string>(); Type StdTypeToRepr<std::string>();
// Factors that impact the kernel picking strategy. Multiple factors can be // Factors that impact the kernel picking strategy. Multiple factors can be
......
...@@ -39,6 +39,13 @@ void ConvCompute<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() { ...@@ -39,6 +39,13 @@ void ConvCompute<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
int pad = param.paddings[0]; int pad = param.paddings[0];
int stride = param.strides[0]; int stride = param.strides[0];
int chin = param.x->dims()[1];
int hin = param.x->dims()[2];
int win = param.x->dims()[3];
int chout = param.output->dims()[1];
int hout = param.output->dims()[2];
int wout = param.output->dims()[3];
bool kps_equal = (param.paddings[0] == param.paddings[1]) && bool kps_equal = (param.paddings[0] == param.paddings[1]) &&
(param.strides[0] == param.strides[1]) && (kw == kh); (param.strides[0] == param.strides[1]) && (kw == kh);
bool no_dilation = (param.dilations[0] == 1) && (param.dilations[1] == 1); bool no_dilation = (param.dilations[0] == 1) && (param.dilations[1] == 1);
...@@ -54,7 +61,7 @@ void ConvCompute<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() { ...@@ -54,7 +61,7 @@ void ConvCompute<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
VLOG(3) << "invoking dw conv"; VLOG(3) << "invoking dw conv";
} else if (param.groups == 1 && kw == 3 && stride == 1 && kps_equal && } else if (param.groups == 1 && kw == 3 && stride == 1 && kps_equal &&
no_dilation) { no_dilation) {
if (ic >= 32 && oc >= 32) { if (ic >= 32 && oc >= 32 && hout > 16 && wout > 16) {
/// winograd conv impl /// winograd conv impl
impl_ = new WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>; impl_ = new WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>;
VLOG(3) << "invoking winograd conv"; VLOG(3) << "invoking winograd conv";
...@@ -63,8 +70,8 @@ void ConvCompute<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() { ...@@ -63,8 +70,8 @@ void ConvCompute<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
impl_ = new DirectConv<PRECISION(kFloat), PRECISION(kFloat)>; impl_ = new DirectConv<PRECISION(kFloat), PRECISION(kFloat)>;
VLOG(3) << "invoking direct conv"; VLOG(3) << "invoking direct conv";
} }
} else if (param.groups == 1 && kw == 3 && stride == 2 && kps_equal && } else if (param.groups == 1 && kw == 3 && stride == 2 &&
no_dilation) { chin * chout < 4 * hin * win && kps_equal && no_dilation) {
/// direct conv impl /// direct conv impl
impl_ = new DirectConv<PRECISION(kFloat), PRECISION(kFloat)>; impl_ = new DirectConv<PRECISION(kFloat), PRECISION(kFloat)>;
VLOG(3) << "invoking direct conv"; VLOG(3) << "invoking direct conv";
......
...@@ -32,6 +32,8 @@ nv_test(yolo_box_compute_cuda_test SRCS yolo_box_compute_test.cc DEPS yolo_box_c ...@@ -32,6 +32,8 @@ nv_test(yolo_box_compute_cuda_test SRCS yolo_box_compute_test.cc DEPS yolo_box_c
nv_test(transpose_compute_cuda_test SRCS transpose_compute_test.cc DEPS transpose_compute_cuda) nv_test(transpose_compute_cuda_test SRCS transpose_compute_test.cc DEPS transpose_compute_cuda)
nv_test(concat_compute_cuda_test SRCS concat_compute_test.cc DEPS concat_compute_cuda) nv_test(concat_compute_cuda_test SRCS concat_compute_test.cc DEPS concat_compute_cuda)
nv_test(elementwise_add_compute_cuda_test SRCS elementwise_add_compute_test.cc DEPS elementwise_add_compute_cuda) nv_test(elementwise_add_compute_cuda_test SRCS elementwise_add_compute_test.cc DEPS elementwise_add_compute_cuda)
nv_test(softmax_compute_cuda_test SRCS softmax_compute_test.cc DEPS softmax_compute_cuda)
nv_test(pool_compute_cuda_test SRCS pool_compute_test.cc DEPS pool_compute_cuda)
#nv_test(layout_cuda_test SRCS layout_compute_test.cc DEPS layout_compute_cuda) #nv_test(layout_cuda_test SRCS layout_compute_test.cc DEPS layout_compute_cuda)
nv_test(mul_compute_cuda_test SRCS mul_compute_test.cc DEPS mul_compute_cuda) nv_test(mul_compute_cuda_test SRCS mul_compute_test.cc DEPS mul_compute_cuda)
nv_test(dropout_compute_cuda_test SRCS dropout_compute_test.cc DEPS dropout_compute_cuda ) nv_test(dropout_compute_cuda_test SRCS dropout_compute_test.cc DEPS dropout_compute_cuda )
......
...@@ -194,9 +194,9 @@ TEST(pool_cuda, compute) { ...@@ -194,9 +194,9 @@ TEST(pool_cuda, compute) {
for (auto stride : {1, 2}) { for (auto stride : {1, 2}) {
for (auto pad : {0, 1}) { for (auto pad : {0, 1}) {
for (auto n : {1, 2}) { for (auto n : {1, 2}) {
for (auto c : {1, 3, 256}) { for (auto c : {1, 3}) {
for (auto h : {2, 3, 4, 6, 13}) { for (auto h : {2, 3, 4, 11}) {
for (auto w : {2, 3, 4, 6, 13}) { for (auto w : {2, 3, 4, 11}) {
VLOG(3) << "n:" << n << " c:" << c << " h:" << h VLOG(3) << "n:" << n << " c:" << c << " h:" << h
<< " w:" << w << " ksize:" << ksize << " w:" << w << " ksize:" << ksize
<< " stride:" << stride << " pad:" << pad << " stride:" << stride << " pad:" << pad
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/cuda/softmax_compute.h"
#include <gtest/gtest.h>
#include <limits>
#include <memory>
#include <string>
#include <utility>
#include <vector>
namespace paddle {
namespace lite {
namespace kernels {
namespace cuda {
using Tensor = lite::Tensor;
using DDim = lite::DDim;
template <typename dtype>
static void softmax_compute_ref(const operators::SoftmaxParam& param) {
const dtype* x_data = param.x->mutable_data<const dtype>();
dtype* output_data = param.output->mutable_data<dtype>();
DDim x_dims = param.x->dims();
ASSERT_EQ(x_dims.data(), param.output->dims().data());
auto x_rank = x_dims.size();
int axis = param.axis;
if (axis < 0) {
axis += x_rank;
}
int axis_size = x_dims[axis];
int outer_num = x_dims.Slice(0, axis).production();
int inner_num = x_dims.Slice(axis + 1, x_rank).production();
int compute_size = outer_num * inner_num;
for (int i = 0; i < compute_size; i++) {
int idx_inner = i % inner_num;
int idx_outer = (i / inner_num) * axis_size;
int start = idx_outer * inner_num + idx_inner;
int offset;
offset = start;
dtype max_data = std::numeric_limits<dtype>::lowest();
for (int j = 0; j < axis_size; j++) {
max_data = x_data[offset] > max_data ? x_data[offset] : max_data;
offset += inner_num;
}
offset = start;
dtype sum_data = (dtype)0;
for (int j = 0; j < axis_size; j++) {
output_data[offset] = exp(x_data[offset] - max_data);
sum_data += output_data[offset];
offset += inner_num;
}
offset = start;
for (int j = 0; j < axis_size; j++) {
output_data[offset] /= sum_data;
offset += inner_num;
}
}
}
TEST(softmax_cuda, compute) {
std::unique_ptr<KernelContext> ctx(new KernelContext);
auto& context = ctx->As<CUDAContext>();
cudaStream_t stream;
cudaStreamCreate(&stream);
context.SetExecStream(stream);
SoftmaxCompute softmax;
operators::SoftmaxParam param;
softmax.SetContext(std::move(ctx));
lite::Tensor x;
lite::Tensor x_cpu;
lite::Tensor output;
lite::Tensor output_cpu;
lite::Tensor output_ref;
for (auto n : {1, 3}) {
for (auto c : {1, 4}) {
for (auto h : {5, 1, 112}) {
for (auto w : {1, 6, 112}) {
for (auto axis : {-2, -1, 0, 1, 2}) {
x.Resize({n, c, h, w});
x_cpu.Resize({n, c, h, w});
output.Resize({n, c, h, w});
output_cpu.Resize({n, c, h, w});
output_ref.Resize({n, c, h, w});
auto* x_cpu_data = x_cpu.mutable_data<float>();
auto* output_data = output.mutable_data<float>(TARGET(kCUDA));
auto* output_cpu_data = output_ref.mutable_data<float>();
auto* output_ref_data = output_ref.mutable_data<float>();
for (int i = 0; i < x.dims().production(); i++) {
x_cpu_data[i] = i;
}
x.Assign<float, lite::DDim, TARGET(kCUDA)>(x_cpu_data,
x_cpu.dims());
param.x = &x;
param.axis = axis;
param.output = &output;
softmax.SetParam(param);
softmax.Launch();
param.x = &x_cpu;
param.output = &output_ref;
softmax_compute_ref<float>(param);
cudaDeviceSynchronize();
CopySync<TARGET(kCUDA)>(output_cpu_data,
output_data,
sizeof(float) * output.numel(),
IoDirection::DtoH);
for (int i = 0; i < output.dims().production(); i++) {
EXPECT_NEAR(output_cpu_data[i], output_ref_data[i], 1e-5);
}
}
}
}
}
}
}
} // namespace cuda
} // namespace kernels
} // namespace lite
} // namespace paddle
...@@ -89,7 +89,7 @@ inline static void calc_label_score(float* scores, ...@@ -89,7 +89,7 @@ inline static void calc_label_score(float* scores,
template <typename T> template <typename T>
static void YoloBoxRef(const T* input, static void YoloBoxRef(const T* input,
const T* imgsize, const int* imgsize,
T* boxes, T* boxes,
T* scores, T* scores,
const float conf_thresh, const float conf_thresh,
...@@ -106,8 +106,8 @@ static void YoloBoxRef(const T* input, ...@@ -106,8 +106,8 @@ static void YoloBoxRef(const T* input,
float box[4]; float box[4];
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
int img_height = static_cast<int>(imgsize[2 * i]); int img_height = imgsize[2 * i];
int img_width = static_cast<int>(imgsize[2 * i + 1]); int img_width = imgsize[2 * i + 1];
for (int j = 0; j < an_num; j++) { for (int j = 0; j < an_num; j++) {
for (int k = 0; k < h; k++) { for (int k = 0; k < h; k++) {
...@@ -184,12 +184,12 @@ TEST(yolo_box, normal) { ...@@ -184,12 +184,12 @@ TEST(yolo_box, normal) {
auto* scores_data = scores.mutable_data<float>(TARGET(kCUDA)); auto* scores_data = scores.mutable_data<float>(TARGET(kCUDA));
float* x_cpu_data = x_cpu.mutable_data<float>(); float* x_cpu_data = x_cpu.mutable_data<float>();
float* sz_cpu_data = sz_cpu.mutable_data<float>(); int* sz_cpu_data = sz_cpu.mutable_data<int>();
float* boxes_cpu_data = boxes_cpu.mutable_data<float>(); float* boxes_cpu_data = boxes_cpu.mutable_data<float>();
float* scores_cpu_data = scores_cpu.mutable_data<float>(); float* scores_cpu_data = scores_cpu.mutable_data<float>();
float* x_ref_data = x_ref.mutable_data<float>(); float* x_ref_data = x_ref.mutable_data<float>();
float* sz_ref_data = sz_ref.mutable_data<float>(); int* sz_ref_data = sz_ref.mutable_data<int>();
float* boxes_ref_data = boxes_ref.mutable_data<float>(); float* boxes_ref_data = boxes_ref.mutable_data<float>();
float* scores_ref_data = scores_ref.mutable_data<float>(); float* scores_ref_data = scores_ref.mutable_data<float>();
...@@ -203,7 +203,7 @@ TEST(yolo_box, normal) { ...@@ -203,7 +203,7 @@ TEST(yolo_box, normal) {
sz_ref_data[1] = 32; sz_ref_data[1] = 32;
x.Assign<float, lite::DDim, TARGET(kCUDA)>(x_cpu_data, x_cpu.dims()); x.Assign<float, lite::DDim, TARGET(kCUDA)>(x_cpu_data, x_cpu.dims());
sz.Assign<float, lite::DDim, TARGET(kCUDA)>(sz_cpu_data, sz_cpu.dims()); sz.Assign<int, lite::DDim, TARGET(kCUDA)>(sz_cpu_data, sz_cpu.dims());
param.X = &x; param.X = &x;
param.ImgSize = &sz; param.ImgSize = &sz;
......
...@@ -727,10 +727,8 @@ void LoadModelNaiveFromMemory(const std::string &model_buffer, ...@@ -727,10 +727,8 @@ void LoadModelNaiveFromMemory(const std::string &model_buffer,
// Load model // Load model
std::string prog_path = model_buffer;
naive_buffer::BinaryTable table; naive_buffer::BinaryTable table;
table.LoadFromMemory(prog_path.c_str(), prog_path.length()); table.LoadFromMemory(model_buffer.c_str(), model_buffer.length());
naive_buffer::proto::ProgramDesc nb_proto_prog(&table); naive_buffer::proto::ProgramDesc nb_proto_prog(&table);
nb_proto_prog.Load(); nb_proto_prog.Load();
...@@ -742,8 +740,7 @@ void LoadModelNaiveFromMemory(const std::string &model_buffer, ...@@ -742,8 +740,7 @@ void LoadModelNaiveFromMemory(const std::string &model_buffer,
// Load Params // Load Params
// NOTE: Only main block be used now. // NOTE: Only main block be used now.
// only combined Params are supported in Loading Model from memory // only combined Params are supported in Loading Model from memory
std::string combined_params_path = param_buffer; LoadCombinedParamsNaive(param_buffer, scope, *cpp_prog, true);
LoadCombinedParamsNaive(combined_params_path, scope, *cpp_prog, true);
VLOG(4) << "Load model from naive buffer memory successfully"; VLOG(4) << "Load model from naive buffer memory successfully";
} }
......
...@@ -126,6 +126,41 @@ using UInt64Builder = PrimaryBuilder<uint64_t>; ...@@ -126,6 +126,41 @@ using UInt64Builder = PrimaryBuilder<uint64_t>;
using Float32Builder = PrimaryBuilder<float>; using Float32Builder = PrimaryBuilder<float>;
using Float64Builder = PrimaryBuilder<double>; using Float64Builder = PrimaryBuilder<double>;
template <typename Primary>
class PrimaryListBuilder : public FieldBuilder {
std::vector<Primary> data_;
public:
using value_type = Primary;
explicit PrimaryListBuilder(BinaryTable* table) : FieldBuilder(table) {}
PrimaryListBuilder(BinaryTable* table, const std::vector<Primary>& val)
: FieldBuilder(table), data_(val) {}
/// Set data.
void set(const std::vector<Primary>& x) { data_ = x; }
const std::vector<Primary>& data() const { return data_; }
/// Save information to the corresponding BinaryTable.
void Save() override;
/// Load information from the corresponding BinaryTable.
void Load() override;
/// Number of elements.
size_t size() const { return data_.size(); }
Type type() const override {
return core::StdTypeToRepr<std::vector<Primary>>();
}
/// clear builder
void Clear() { data_.clear(); }
~PrimaryListBuilder() = default;
};
/* /*
* Builder for all the primary types. int32, float, bool and so on. * Builder for all the primary types. int32, float, bool and so on.
*/ */
...@@ -344,6 +379,36 @@ void PrimaryBuilder<Primary>::Load() { ...@@ -344,6 +379,36 @@ void PrimaryBuilder<Primary>::Load() {
table()->Consume(sizeof(value_type)); table()->Consume(sizeof(value_type));
} }
template <typename Primary>
void PrimaryListBuilder<Primary>::Load() {
CHECK(data_.empty()) << "Duplicate load";
// Load number of elements first.
uint64_t num_elems{};
memcpy(&num_elems, table()->cursor(), sizeof(uint64_t));
table()->Consume(sizeof(uint64_t));
data_.resize(num_elems);
for (uint64_t i = 0; i < num_elems; i++) {
memcpy(&data_[i], table()->cursor(), sizeof(value_type));
table()->Consume(sizeof(value_type));
}
}
template <typename Primary>
void PrimaryListBuilder<Primary>::Save() {
// store number of elements in the head.
uint64_t num_elems = size();
table()->Require(sizeof(uint64_t));
memcpy(table()->cursor(), &num_elems, sizeof(uint64_t));
table()->Consume(sizeof(uint64_t));
table()->Require(num_elems * sizeof(value_type));
memcpy(table()->cursor(),
reinterpret_cast<byte_t*>(&data_[0]),
num_elems * sizeof(value_type));
table()->Consume(num_elems * sizeof(value_type));
}
template <typename EnumType> template <typename EnumType>
void EnumBuilder<EnumType>::Save() { void EnumBuilder<EnumType>::Save() {
value_type holder = static_cast<value_type>(data_); value_type holder = static_cast<value_type>(data_);
......
...@@ -149,15 +149,16 @@ void ParamDesc::SetDim(const std::vector<int64_t>& dim) { ...@@ -149,15 +149,16 @@ void ParamDesc::SetDim(const std::vector<int64_t>& dim) {
CHECK(GetDataType() == VarDescAPI::VarDataType::type__) \ CHECK(GetDataType() == VarDescAPI::VarDataType::type__) \
<< "Data Type mismatch"; \ << "Data Type mismatch"; \
std::vector<T> res; \ std::vector<T> res; \
auto& data_builder = desc_->GetField<ListBuilder<CharBuilder>>("data"); \ auto& data_builder = desc_->GetField<PrimaryListBuilder<char>>("data"); \
auto data = RepeatedToVector<char, CharBuilder>(data_builder); \ auto& data = data_builder.data(); \
size_t size = data.size() / sizeof(T); \ size_t size = data.size() / sizeof(T); \
auto* data_ptr = reinterpret_cast<T*>(&data[0]); \ auto* data_ptr = reinterpret_cast<const T*>(&data[0]); \
for (size_t i = 0; i < size; ++i) { \ for (size_t i = 0; i < size; ++i) { \
res.push_back(data_ptr[i]); \ res.push_back(data_ptr[i]); \
} \ } \
return res; \ return res; \
} }
GET_DATA_IMPL(uint8_t, UINT8); GET_DATA_IMPL(uint8_t, UINT8);
GET_DATA_IMPL(int8_t, INT8); GET_DATA_IMPL(int8_t, INT8);
GET_DATA_IMPL(int16_t, INT16); GET_DATA_IMPL(int16_t, INT16);
...@@ -172,14 +173,13 @@ GET_DATA_IMPL(double, FP64); ...@@ -172,14 +173,13 @@ GET_DATA_IMPL(double, FP64);
CHECK(GetDataType() == VarDescAPI::VarDataType::type__) \ CHECK(GetDataType() == VarDescAPI::VarDataType::type__) \
<< "Data Type mismatch, call SetDataType first."; \ << "Data Type mismatch, call SetDataType first."; \
auto* data_builder = \ auto* data_builder = \
desc_->GetMutableField<ListBuilder<CharBuilder>>("data"); \ desc_->GetMutableField<PrimaryListBuilder<char>>("data"); \
CHECK(data_builder); \ CHECK(data_builder); \
data_builder->Clear(); \ data_builder->Clear(); \
size_t size = size__ * sizeof(T); \ size_t size = size__ * sizeof(T); \
auto* data_ptr = reinterpret_cast<const char*>(data_ptr__); \ auto* data_ptr = reinterpret_cast<const char*>(data_ptr__); \
for (size_t i = 0; i < size; ++i) { \ std::vector<char> data_vec(data_ptr, data_ptr + size); \
data_builder->New()->set(data_ptr[i]); \ data_builder->set(data_vec);
}
#define SET_DATA_IMPL(T, type__) \ #define SET_DATA_IMPL(T, type__) \
template <> \ template <> \
......
...@@ -191,7 +191,7 @@ class ParamDesc : public StructBuilder { ...@@ -191,7 +191,7 @@ class ParamDesc : public StructBuilder {
New<lod_type>("lod"); New<lod_type>("lod");
NewUInt32("tensor_version"); NewUInt32("tensor_version");
New<TensorDesc>("tensor_desc"); New<TensorDesc>("tensor_desc");
New<ListBuilder<CharBuilder>>("data"); New<PrimaryListBuilder<char>>("data");
} }
}; };
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "lite/operators/conv_op.h" #include "lite/operators/conv_op.h"
#include <algorithm>
#include <vector> #include <vector>
#include "lite/core/op_registry.h" #include "lite/core/op_registry.h"
...@@ -51,10 +52,41 @@ inline int ConvOutputSize( ...@@ -51,10 +52,41 @@ inline int ConvOutputSize(
return output_size; return output_size;
} }
inline void UpdatePaddingAndDilation(std::vector<int>* paddings,
std::vector<int>* dilations,
const std::vector<int>& strides,
const std::string padding_algorithm,
const lite::DDim data_dims,
const lite::DDim& ksize) {
// when padding_desc is "VALID" or "SAME"
if (padding_algorithm == "SAME") {
for (size_t i = 0; i < strides.size(); ++i) {
int out_size = (data_dims[i + 2] + strides[i] - 1) / strides[i];
int pad_sum =
std::max((out_size - 1) * strides[i] + ksize[i] - data_dims[i + 2],
(int64_t)0);
// pad
*(paddings->begin() + i) = pad_sum / 2;
// dilation
*(dilations->begin() + i) = 1;
}
} else if (padding_algorithm == "VALID") {
for (auto& it : *paddings) {
it = 0;
}
}
}
bool ConvOpLite::InferShape() const { bool ConvOpLite::InferShape() const {
const auto in_dims = param_.x->dims(); const auto in_dims = param_.x->dims();
const auto filter_dims = param_.filter->dims(); const auto filter_dims = param_.filter->dims();
UpdatePaddingAndDilation(&param_.paddings,
&param_.dilations,
param_.strides,
padding_algorithm_,
in_dims,
filter_dims);
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < param_.strides.size(); ++i) { for (size_t i = 0; i < param_.strides.size(); ++i) {
output_shape.push_back(ConvOutputSize(in_dims[i + 2], output_shape.push_back(ConvOutputSize(in_dims[i + 2],
......
...@@ -93,6 +93,10 @@ class ConvOpLite : public OpLite { ...@@ -93,6 +93,10 @@ class ConvOpLite : public OpLite {
<< "The fused conv only supports fuse with relu and leaky relu"; << "The fused conv only supports fuse with relu and leaky relu";
} }
} }
if (op_desc.HasAttr("padding_algorithm")) {
padding_algorithm_ = op_desc.GetAttr<std::string>("padding_algorithm");
}
// For Int8 // For Int8
if (op_desc.HasAttr("enable_int8")) { if (op_desc.HasAttr("enable_int8")) {
param_.enable_int8 = op_desc.GetAttr<bool>("enable_int8"); param_.enable_int8 = op_desc.GetAttr<bool>("enable_int8");
...@@ -114,6 +118,7 @@ class ConvOpLite : public OpLite { ...@@ -114,6 +118,7 @@ class ConvOpLite : public OpLite {
private: private:
mutable ConvParam param_; mutable ConvParam param_;
std::string padding_algorithm_{""};
}; };
} // namespace operators } // namespace operators
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册