提交 28eec65f 编写于 作者: C Chunwei

lite/enhance model

上级 d1b0af43
...@@ -5,7 +5,7 @@ cc_library(ir_params_sync_among_devices_pass SRCS ir_params_sync_among_devices_p ...@@ -5,7 +5,7 @@ cc_library(ir_params_sync_among_devices_pass SRCS ir_params_sync_among_devices_p
cc_library(ir_graph_to_program_pass SRCS ir_graph_to_program_pass.cc DEPS analysis_pass graph_to_program_pass) cc_library(ir_graph_to_program_pass SRCS ir_graph_to_program_pass.cc DEPS analysis_pass graph_to_program_pass)
cc_library(adjust_cudnn_workspace_size_pass SRCS adjust_cudnn_workspace_size_pass.cc DEPS analysis_pass graph_to_program_pass) cc_library(adjust_cudnn_workspace_size_pass SRCS adjust_cudnn_workspace_size_pass.cc DEPS analysis_pass graph_to_program_pass)
cc_library(analysis_passes SRCS passes.cc DEPS cc_library(analysis_passes SRCS use_passes.cc DEPS
ir_graph_build_pass ir_graph_build_pass
ir_analysis_pass ir_analysis_pass
ir_params_sync_among_devices_pass ir_params_sync_among_devices_pass
......
...@@ -12,13 +12,13 @@ ...@@ -12,13 +12,13 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/inference/analysis/passes/passes.h"
#include "paddle/fluid/inference/analysis/passes/adjust_cudnn_workspace_size_pass.h" #include "paddle/fluid/inference/analysis/passes/adjust_cudnn_workspace_size_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h"
#include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h" #include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h"
#include "paddle/fluid/inference/analysis/passes/passes.h"
namespace paddle { namespace paddle {
namespace inference { namespace inference {
......
...@@ -10,6 +10,9 @@ message(STATUS "LITE_WITH_ARM:\t${LITE_WITH_ARM}") ...@@ -10,6 +10,9 @@ message(STATUS "LITE_WITH_ARM:\t${LITE_WITH_ARM}")
message(STATUS "LITE_WITH_PROFILE:\t${LITE_WITH_PROFILE}") message(STATUS "LITE_WITH_PROFILE:\t${LITE_WITH_PROFILE}")
set(LITE_MODEL_DIR "${THIRD_PARTY_PATH}/install") set(LITE_MODEL_DIR "${THIRD_PARTY_PATH}/install")
set(LITE_ON_MOBILE LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
set(LITE_URL "http://paddle-inference-dist.bj.bcebos.com" CACHE STRING "inference download url") set(LITE_URL "http://paddle-inference-dist.bj.bcebos.com" CACHE STRING "inference download url")
function(lite_download_and_uncompress INSTALL_DIR URL FILENAME) function(lite_download_and_uncompress INSTALL_DIR URL FILENAME)
...@@ -182,3 +185,8 @@ add_subdirectory(model_parser) ...@@ -182,3 +185,8 @@ add_subdirectory(model_parser)
add_subdirectory(utils) add_subdirectory(utils)
add_subdirectory(api) add_subdirectory(api)
add_subdirectory(gen_code) add_subdirectory(gen_code)
if (WITH_TESTING)
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "lite_naive_model.tar.gz")
endif()
set(cxx_api_lite_deps scope_lite optimizer_lite target_wrapper_host model_parser_lite) set(cxx_api_lite_deps
scope_lite optimizer_lite target_wrapper_host model_parser_lite program_lite)
if(LITE_WITH_CUDA) if(LITE_WITH_CUDA)
set(cxx_api_lite_deps ${cxx_api_lite_deps} kernels_cuda) set(cxx_api_lite_deps ${cxx_api_lite_deps} kernels_cuda)
cc_library(cxx_api_lite_cuda SRCS cxx_api.cc DEPS ${cxx_api_lite_deps} target_wrapper_cuda) cc_library(cxx_api_lite_cuda SRCS cxx_api.cc DEPS ${cxx_api_lite_deps} target_wrapper_cuda)
nv_test(test_cxx_api_lite_cuda SRCS cxx_api_test.cc DEPS cxx_api_lite_cuda) nv_test(test_cxx_api_lite_cuda SRCS cxx_api_test.cc DEPS cxx_api_lite_cuda)
endif() endif()
cc_library(cxx_api_lite SRCS cxx_api.cc DEPS ${cxx_api_lite_deps} ${ops_lite} program_lite) lite_cc_library(lite_api_test_helper SRCS lite_api_test_helper.cc
DEPS scope_lite optimizer_lite target_wrapper_host model_parser_lite program_lite
${ops_lite} ${host_kernels}
CUDA_DEPS kernels_cuda
X86_DEPS ${x86_kernels}
)
lite_cc_library(cxx_api_lite SRCS cxx_api.cc DEPS lite_api_test_helper)
set(light_api_deps set(light_api_deps
scope_lite target_wrapper_host model_parser_lite) scope_lite target_wrapper_host model_parser_lite program_lite)
if(LITE_WITH_CUDA) if(LITE_WITH_CUDA)
set(light_api_deps ${light_api_deps} target_wrapper_cuda) set(light_api_deps ${light_api_deps} target_wrapper_cuda)
endif() endif()
#cc_library(light_api_lite SRCS light_api.cc DEPS ${light_api_deps} ${ops_lite} ${host_kernels}) lite_cc_library(light_api_lite SRCS light_api.cc
DEPS ${light_api_deps} ${ops_lite} ${host_kernels}
)
message(STATUS "get ops ${ops_lite}") message(STATUS "get ops ${ops_lite}")
message(STATUS "get Host kernels ${host_kernels}") message(STATUS "get Host kernels ${host_kernels}")
...@@ -30,18 +39,25 @@ if((NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) AND WITH_TESTING) ...@@ -30,18 +39,25 @@ if((NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) AND WITH_TESTING)
${ops_lite} ${host_kernels} ${x86_kernels} ${ops_lite} ${host_kernels} ${x86_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model
--optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "lite_naive_model.tar.gz")
add_dependencies(test_cxx_api_lite extern_lite_download_lite_naive_model_tar_gz) add_dependencies(test_cxx_api_lite extern_lite_download_lite_naive_model_tar_gz)
endif() endif()
if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING)
add_dependencies(test_cxx_api_lite extern_lite_download_lite_naive_model_tar_gz)
endif()
# if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) # These tests needs CLI arguments, and is not supported in ARM CI.
# lite_cc_test(test_light_api SRCS light_api_test.cc DEPS light_api_lite ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) # TODO(Superjomn) support latter.
# endif() if(NOT LITE_ON_MOBILE)
lite_cc_test(test_light_api SRCS light_api_test.cc
DEPS light_api_lite mir_passes
X86_DEPS ${x86_kernels}
ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt
SERIAL)
lite_cc_test(test_apis_lite SRCS apis_test.cc
DEPS cxx_api_lite light_api_lite ${ops_lite} mir_passes
X86_DEPS ${x86_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model
--optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
endif()
lite_cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc lite_cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc
DEPS DEPS
...@@ -51,4 +67,3 @@ lite_cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc ...@@ -51,4 +67,3 @@ lite_cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc
mir_passes mir_passes
${ops_lite} ${host_kernels} ${ops_lite} ${host_kernels}
ARM_DEPS ${arm_kernels}) ARM_DEPS ${arm_kernels})
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*
* We test multiple apis here.
*/
#include <gtest/gtest.h>
#include <sstream>
#include <vector>
#include "paddle/fluid/lite/api/cxx_api.h"
#include "paddle/fluid/lite/api/light_api.h"
#include "paddle/fluid/lite/core/mir/pass_registry.h"
#include "paddle/fluid/lite/core/mir/use_passes.h"
#include "paddle/fluid/lite/kernels/use_kernels.h"
#include "paddle/fluid/lite/operators/use_ops.h"
DEFINE_string(model_dir, "", "");
DEFINE_string(optimized_model, "", "");
namespace paddle {
namespace lite {
void SetConstInput(lite::Tensor* x) {
x->Resize(DDim(std::vector<DDim::value_type>({100, 100})));
auto* data = x->mutable_data<float>();
for (int i = 0; i < 100 * 100; i++) {
data[i] = i;
}
}
bool CompareTensors(const std::string& name, const ExecutorLite& cxx_api,
const LightPredictor& light_api) {
const auto* a = cxx_api.GetTensor(name);
const auto* b = light_api.GetTensor(name);
return TensorCompareWith(*a, *b);
}
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
TEST(CXXApi_LightApi, save_and_load_model) {
lite::ExecutorLite cxx_api;
lite::LightPredictor light_api;
// CXXAPi
{
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)}});
cxx_api.Build(FLAGS_model_dir, Place{TARGET(kCUDA), PRECISION(kFloat)},
valid_places);
auto* x = cxx_api.GetInput(0);
SetConstInput(x);
cxx_api.Run();
LOG(INFO) << "Save optimized model to " << FLAGS_optimized_model;
cxx_api.SaveModel(FLAGS_optimized_model);
}
// LightApi
{
light_api.Build(FLAGS_optimized_model);
auto* x = light_api.GetInput(0);
SetConstInput(x);
light_api.Run();
}
const auto* cxx_out = cxx_api.GetOutput(0);
const auto* light_out = light_api.GetOutput(0);
ASSERT_TRUE(TensorCompareWith(*cxx_out, *light_out));
std::vector<std::string> tensors_with_order({
"a", "fc_0.w_0", "fc_0.tmp_0", "scale_0.tmp_0",
});
for (const auto& tensor_name : tensors_with_order) {
ASSERT_TRUE(CompareTensors(tensor_name, cxx_api, light_api));
}
}
#endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
} // namespace lite
} // namespace paddle
...@@ -78,6 +78,11 @@ class ExecutorLite { ...@@ -78,6 +78,11 @@ class ExecutorLite {
return &fetch_list.at(offset); return &fetch_list.at(offset);
} }
const lite::Tensor* GetTensor(const std::string& name) const {
auto* var = program_->exec_scope()->FindVar(name);
return &var->Get<lite::Tensor>();
}
void Run() { program_->Run(); } void Run() { program_->Run(); }
const framework::proto::ProgramDesc& program_desc() const { const framework::proto::ProgramDesc& program_desc() const {
......
...@@ -13,9 +13,10 @@ ...@@ -13,9 +13,10 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/lite/api/cxx_api.h" #include "paddle/fluid/lite/api/cxx_api.h"
#include <chrono> // NOLINT #include <chrono>
#include "paddle/fluid/lite/core/mir/passes.h" #include "paddle/fluid/lite/core/mir/use_passes.h"
#include "paddle/fluid/lite/core/op_registry.h" #include "paddle/fluid/lite/core/op_registry.h"
namespace paddle { namespace paddle {
namespace lite { namespace lite {
......
...@@ -16,11 +16,13 @@ ...@@ -16,11 +16,13 @@
#include <gflags/gflags.h> #include <gflags/gflags.h>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <vector> #include <vector>
#include "paddle/fluid/lite/core/mir/passes.h" #include "paddle/fluid/lite/api/lite_api_test_helper.h"
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/core/mir/use_passes.h"
#include "paddle/fluid/lite/core/mir/use_passes.h"
#include "paddle/fluid/lite/core/op_registry.h" #include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/kernels/use_kernels.h"
DEFINE_string(model_dir, "", ""); #include "paddle/fluid/lite/operators/use_ops.h"
DEFINE_string(optimized_model, "", "");
// For training. // For training.
DEFINE_string(startup_program_path, "", ""); DEFINE_string(startup_program_path, "", "");
...@@ -30,40 +32,11 @@ namespace paddle { ...@@ -30,40 +32,11 @@ namespace paddle {
namespace lite { namespace lite {
TEST(CXXApi, test) { TEST(CXXApi, test) {
lite::ExecutorLite predictor; const lite::Tensor* out = RunHvyModel();
#ifndef LITE_WITH_CUDA
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)}});
#else
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)},
Place{TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW)},
Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kNCHW)},
Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kNCHW)},
Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kAny)},
Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)},
});
#endif
predictor.Build(FLAGS_model_dir,
Place{TARGET(kX86), PRECISION(kFloat)}, // origin cuda
valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({100, 100})));
auto* data = input_tensor->mutable_data<float>();
for (int i = 0; i < 100 * 100; i++) {
data[i] = i;
}
// LOG(INFO) << "input " << *input_tensor;
predictor.Run();
auto* out = predictor.GetOutput(0);
LOG(INFO) << out << " memory size " << out->data_size(); LOG(INFO) << out << " memory size " << out->data_size();
LOG(INFO) << "out " << out->data<float>()[0]; for (int i = 0; i < 10; i++) {
LOG(INFO) << "out " << out->data<float>()[1]; LOG(INFO) << "out " << out->data<float>()[i];
}
LOG(INFO) << "dims " << out->dims(); LOG(INFO) << "dims " << out->dims();
// LOG(INFO) << "out " << *out; // LOG(INFO) << "out " << *out;
} }
...@@ -117,44 +90,3 @@ TEST(CXXApi, save_model) { ...@@ -117,44 +90,3 @@ TEST(CXXApi, save_model) {
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
USE_LITE_OP(mul);
USE_LITE_OP(fc);
USE_LITE_OP(relu);
USE_LITE_OP(scale);
USE_LITE_OP(feed);
USE_LITE_OP(fetch);
USE_LITE_OP(io_copy);
USE_LITE_OP(elementwise_add)
USE_LITE_OP(elementwise_sub)
USE_LITE_OP(square)
USE_LITE_OP(softmax)
USE_LITE_OP(dropout)
USE_LITE_OP(concat)
USE_LITE_OP(conv2d)
USE_LITE_OP(depthwise_conv2d)
USE_LITE_OP(pool2d)
USE_LITE_KERNEL(feed, kHost, kAny, kAny, def);
USE_LITE_KERNEL(fetch, kHost, kAny, kAny, def);
#ifdef LITE_WITH_X86
USE_LITE_KERNEL(relu, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(fc, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(scale, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(square, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(elementwise_sub, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(elementwise_add, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(softmax, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(dropout, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(concat, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(conv2d, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(depthwise_conv2d, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(pool2d, kX86, kFloat, kNCHW, def);
#endif
#ifdef LITE_WITH_CUDA
USE_LITE_KERNEL(mul, kCUDA, kFloat, kNCHW, def);
USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, host_to_device);
USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, device_to_host);
#endif
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <string> #include <string>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/core/context.h" #include "paddle/fluid/lite/core/context.h"
#include "paddle/fluid/lite/core/program.h" #include "paddle/fluid/lite/core/program.h"
#include "paddle/fluid/lite/core/types.h" #include "paddle/fluid/lite/core/types.h"
...@@ -62,6 +63,11 @@ class LightPredictor { ...@@ -62,6 +63,11 @@ class LightPredictor {
return &fetch_list.at(offset); return &fetch_list.at(offset);
} }
const lite::Tensor* GetTensor(const std::string& name) const {
auto* var = program_->exec_scope()->FindVar(name);
return &var->Get<lite::Tensor>();
}
private: private:
void BuildRuntimeProgram(const framework::proto::ProgramDesc& prog) { void BuildRuntimeProgram(const framework::proto::ProgramDesc& prog) {
std::vector<Instruction> insts; std::vector<Instruction> insts;
...@@ -72,9 +78,8 @@ class LightPredictor { ...@@ -72,9 +78,8 @@ class LightPredictor {
// Create the kernels of the target places, and filter out the specific // Create the kernels of the target places, and filter out the specific
// kernel with the target alias. // kernel with the target alias.
for (auto& op : program.ops_) { for (auto& op : program.ops()) {
lite::pb::OpDesc desc(op->op_info()->desc()); auto kernel_type = op->op_info()->GetAttr<std::string>(kKernelTypeAttr);
auto kernel_type = desc.GetAttr(kKernelTypeAttr).get<std::string>();
std::string op_type, alias; std::string op_type, alias;
Place place; Place place;
KernelBase::ParseKernelType(kernel_type, &op_type, &alias, &place); KernelBase::ParseKernelType(kernel_type, &op_type, &alias, &place);
...@@ -89,8 +94,8 @@ class LightPredictor { ...@@ -89,8 +94,8 @@ class LightPredictor {
insts.emplace_back(op, std::move(*it)); insts.emplace_back(op, std::move(*it));
} }
program_.reset(new RuntimeProgram(std::move(insts))); program_.reset(new RuntimeProgram(std::move(insts)));
CHECK(program.exec_scope_); CHECK(program.exec_scope());
program_->set_exec_scope(program.exec_scope_); program_->set_exec_scope(program.exec_scope());
} }
private: private:
......
...@@ -15,6 +15,9 @@ ...@@ -15,6 +15,9 @@
#include "paddle/fluid/lite/api/light_api.h" #include "paddle/fluid/lite/api/light_api.h"
#include <gflags/gflags.h> #include <gflags/gflags.h>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "paddle/fluid/lite/core/mir/use_passes.h"
#include "paddle/fluid/lite/kernels/use_kernels.h"
#include "paddle/fluid/lite/operators/use_ops.h"
DEFINE_string(optimized_model, "", ""); DEFINE_string(optimized_model, "", "");
...@@ -33,29 +36,14 @@ TEST(LightAPI, load) { ...@@ -33,29 +36,14 @@ TEST(LightAPI, load) {
} }
predictor.Run(); predictor.Run();
const auto* output = predictor.GetOutput(0);
const float* raw_output = output->data<float>();
for (int i = 0; i < 10; i++) {
LOG(INFO) << "out " << raw_output[i];
}
} }
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
USE_LITE_OP(mul);
USE_LITE_OP(fc);
USE_LITE_OP(scale);
USE_LITE_OP(feed);
USE_LITE_OP(fetch);
USE_LITE_OP(io_copy);
USE_LITE_KERNEL(feed, kHost, kAny, kAny, def);
USE_LITE_KERNEL(fetch, kHost, kAny, kAny, def);
#ifdef LITE_WITH_X86
USE_LITE_KERNEL(relu, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(fc, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(scale, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(square, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(elementwise_sub, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(elementwise_add, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(softmax, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(dropout, kX86, kFloat, kNCHW, def);
#endif
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/api/lite_api_test_helper.h"
DEFINE_string(model_dir, "", "");
DEFINE_string(optimized_model, "", "");
namespace paddle {
namespace lite {
const lite::Tensor* RunHvyModel() {
lite::ExecutorLite predictor;
#ifndef LITE_WITH_CUDA
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)}});
#else
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)},
Place{TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW)},
Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kNCHW)},
Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kNCHW)},
Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kAny)},
Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)},
});
#endif
predictor.Build(FLAGS_model_dir,
Place{TARGET(kX86), PRECISION(kFloat)}, // origin cuda
valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({100, 100})));
auto* data = input_tensor->mutable_data<float>();
for (int i = 0; i < 100 * 100; i++) {
data[i] = i;
}
// LOG(INFO) << "input " << *input_tensor;
predictor.Run();
const auto* out = predictor.GetOutput(0);
return out;
}
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <gflags/gflags.h>
#include "paddle/fluid/lite/api/cxx_api.h"
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/core/op_registry.h"
DECLARE_string(model_dir);
DECLARE_string(optimized_model);
namespace paddle {
namespace lite {
const lite::Tensor* RunHvyModel();
} // namespace lite
} // namespace paddle
...@@ -86,6 +86,7 @@ class TensorHvy : public TensorBase<TensorHvy> { ...@@ -86,6 +86,7 @@ class TensorHvy : public TensorBase<TensorHvy> {
template <typename T> template <typename T>
T* mutable_data() { T* mutable_data() {
memory_size_ = framework::product(data_.dims()) * sizeof(T);
return data_.mutable_data<T>(data_.dims(), platform::CPUPlace()); return data_.mutable_data<T>(data_.dims(), platform::CPUPlace());
} }
template <typename T> template <typename T>
...@@ -128,8 +129,11 @@ class TensorHvy : public TensorBase<TensorHvy> { ...@@ -128,8 +129,11 @@ class TensorHvy : public TensorBase<TensorHvy> {
const framework::LoDTensor& raw_tensor() const { return data_; } const framework::LoDTensor& raw_tensor() const { return data_; }
framework::LoDTensor& raw_tensor() { return data_; } framework::LoDTensor& raw_tensor() { return data_; }
size_t memory_size() const { return memory_size_; }
private: private:
framework::LoDTensor data_; framework::LoDTensor data_;
size_t memory_size_{};
}; };
} // namespace lite } // namespace lite
......
...@@ -90,6 +90,8 @@ class TensorLite : public TensorBase<TensorLite> { ...@@ -90,6 +90,8 @@ class TensorLite : public TensorBase<TensorLite> {
void *mutable_data(size_t memory_size); void *mutable_data(size_t memory_size);
void *mutable_data(TargetType target, size_t memory_size); void *mutable_data(TargetType target, size_t memory_size);
const void *raw_data() const { return buffer_->data(); }
size_t memory_size() const { return memory_size_; } size_t memory_size() const { return memory_size_; }
bool IsInitialized() const { return buffer_->data(); } bool IsInitialized() const { return buffer_->data(); }
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
#include "paddle/fluid/lite/api/cxx_api.h" #include "paddle/fluid/lite/api/cxx_api.h"
#include "paddle/fluid/lite/core/compatible_tensor.h" #include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/core/mir/graph_visualize_pass.h" #include "paddle/fluid/lite/core/mir/graph_visualize_pass.h"
#include "paddle/fluid/lite/core/mir/passes.h" #include "paddle/fluid/lite/core/mir/use_passes.h"
#include "paddle/fluid/lite/core/op_registry.h" #include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/program.h" #include "paddle/fluid/lite/core/program.h"
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
#include "paddle/fluid/lite/api/cxx_api.h" #include "paddle/fluid/lite/api/cxx_api.h"
#include "paddle/fluid/lite/core/compatible_tensor.h" #include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/core/mir/graph_visualize_pass.h" #include "paddle/fluid/lite/core/mir/graph_visualize_pass.h"
#include "paddle/fluid/lite/core/mir/passes.h" #include "paddle/fluid/lite/core/mir/use_passes.h"
#include "paddle/fluid/lite/core/op_registry.h" #include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/program.h" #include "paddle/fluid/lite/core/program.h"
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <vector> #include <vector>
#include "paddle/fluid/lite/api/cxx_api.h" #include "paddle/fluid/lite/api/cxx_api.h"
#include "paddle/fluid/lite/core/mir/passes.h" #include "paddle/fluid/lite/core/mir/use_passes.h"
#include "paddle/fluid/lite/core/op_registry.h" #include "paddle/fluid/lite/core/op_registry.h"
DEFINE_string(model_dir, "", ""); DEFINE_string(model_dir, "", "");
......
...@@ -84,7 +84,7 @@ void ConvBNFuser::InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) { ...@@ -84,7 +84,7 @@ void ConvBNFuser::InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) {
->GetMutable<lite::Tensor>(); ->GetMutable<lite::Tensor>();
size_t bias_size = bn_scale_t->data_size(); size_t bias_size = bn_scale_t->data_size();
auto bn_scale_d = bn_scale_t->mutable_data<float>(); auto bn_scale_d = bn_scale_t->mutable_data<float>();
CHECK(bias_size == conv_weight_dims[0]) CHECK_EQ(bias_size, static_cast<size_t>(conv_weight_dims[0]))
<< "The BN bias's size should be equal to the size of the first " << "The BN bias's size should be equal to the size of the first "
<< "dim size of the conv weights"; << "dim size of the conv weights";
......
...@@ -24,7 +24,7 @@ namespace lite { ...@@ -24,7 +24,7 @@ namespace lite {
namespace mir { namespace mir {
void GenerateProgramPass::Apply(const std::unique_ptr<SSAGraph>& graph) { void GenerateProgramPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
LOG(INFO) << "final program \n" << Visualize(graph.get()); VLOG(4) << "final program \n" << Visualize(graph.get());
for (auto& item : graph->StmtTopologicalOrder()) { for (auto& item : graph->StmtTopologicalOrder()) {
if (item->IsStmt()) { if (item->IsStmt()) {
auto& stmt = item->AsStmt(); auto& stmt = item->AsStmt();
......
...@@ -24,8 +24,10 @@ namespace lite { ...@@ -24,8 +24,10 @@ namespace lite {
namespace mir { namespace mir {
bool SSAGraph::CheckBidirectionalConnection() { bool SSAGraph::CheckBidirectionalConnection() {
LOG(INFO) << "node count " << node_storage_.size(); VLOG(4) << "node count " << node_storage_.size();
for (auto &node : node_storage_) { for (auto &node : node_storage_) {
if (node.IsStmt()) VLOG(4) << node.AsStmt().op_info()->Type();
if (node.IsArg()) VLOG(4) << node.AsArg().name << " " << node.AsArg().id;
for (auto *in : node.inlinks) { for (auto *in : node.inlinks) {
CHECK(in->outlinks.end() != CHECK(in->outlinks.end() !=
std::find(in->outlinks.begin(), in->outlinks.end(), &node)); std::find(in->outlinks.begin(), in->outlinks.end(), &node));
...@@ -121,6 +123,7 @@ void SSAGraph::Build(const Program &program, ...@@ -121,6 +123,7 @@ void SSAGraph::Build(const Program &program,
std::unordered_map<std::string, mir::Node *> arg_update_node_map_; std::unordered_map<std::string, mir::Node *> arg_update_node_map_;
for (auto &op : program.ops()) { for (auto &op : program.ops()) {
VLOG(3) << op->op_info()->Type();
auto *op_node = GraphCreateInstructNode(op, valid_places); auto *op_node = GraphCreateInstructNode(op, valid_places);
for (const std::string &name : op->op_info()->input_names()) { for (const std::string &name : op->op_info()->input_names()) {
mir::Node *arg_node = nullptr; mir::Node *arg_node = nullptr;
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
#include <memory> #include <memory>
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/lite/core/mir/graph_visualize_pass.h" #include "paddle/fluid/lite/core/mir/graph_visualize_pass.h"
#include "paddle/fluid/lite/core/mir/passes.h" #include "paddle/fluid/lite/core/mir/use_passes.h"
#include "paddle/fluid/lite/core/op_registry.h" #include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/program_fake_utils.h" #include "paddle/fluid/lite/core/program_fake_utils.h"
......
...@@ -15,12 +15,6 @@ ...@@ -15,12 +15,6 @@
#pragma once #pragma once
#include "paddle/fluid/lite/core/mir/pass_registry.h" #include "paddle/fluid/lite/core/mir/pass_registry.h"
namespace paddle {
namespace lite {
namespace mir {} // namespace mir
} // namespace lite
} // namespace paddle
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK #ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
USE_MIR_PASS(demo); USE_MIR_PASS(demo);
USE_MIR_PASS(static_kernel_pick_pass); USE_MIR_PASS(static_kernel_pick_pass);
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
// limitations under the License. // limitations under the License.
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "paddle/fluid/lite/core/mir/passes.h" #include "paddle/fluid/lite/core/mir/use_passes.h"
#include "paddle/fluid/lite/core/optimizer.h" #include "paddle/fluid/lite/core/optimizer.h"
#include "paddle/fluid/lite/core/program_fake_utils.h" #include "paddle/fluid/lite/core/program_fake_utils.h"
#include "paddle/fluid/lite/kernels/cuda/use_kernels.h" #include "paddle/fluid/lite/kernels/cuda/use_kernels.h"
......
...@@ -54,9 +54,7 @@ class OpLite : public Registry { ...@@ -54,9 +54,7 @@ class OpLite : public Registry {
OpLite() = default; OpLite() = default;
explicit OpLite(const std::string &type) : op_type_(type) {} explicit OpLite(const std::string &type) : op_type_(type) {}
explicit OpLite(const std::vector<Place> &valid_places) explicit OpLite(const std::vector<Place> &valid_places)
: valid_places_(valid_places) { : valid_places_(valid_places) {}
LOG(INFO) << "valid places " << valid_places.size();
}
void SetValidPlaces(const std::vector<Place> &places) { void SetValidPlaces(const std::vector<Place> &places) {
VLOG(3) << "valid places " << valid_places_.size(); VLOG(3) << "valid places " << valid_places_.size();
......
...@@ -18,8 +18,8 @@ ...@@ -18,8 +18,8 @@
#include <utility> #include <utility>
#include "paddle/fluid/lite/core/mir/generate_program_pass.h" #include "paddle/fluid/lite/core/mir/generate_program_pass.h"
#include "paddle/fluid/lite/core/mir/pass_manager.h" #include "paddle/fluid/lite/core/mir/pass_manager.h"
#include "paddle/fluid/lite/core/mir/passes.h"
#include "paddle/fluid/lite/core/mir/static_kernel_pick_pass.h" #include "paddle/fluid/lite/core/mir/static_kernel_pick_pass.h"
#include "paddle/fluid/lite/core/mir/use_passes.h"
#include "paddle/fluid/lite/core/program_fake_utils.h" #include "paddle/fluid/lite/core/program_fake_utils.h"
namespace paddle { namespace paddle {
......
...@@ -19,7 +19,7 @@ namespace lite { ...@@ -19,7 +19,7 @@ namespace lite {
namespace profile { namespace profile {
const int BasicTimer::data_w = 10; const int BasicTimer::data_w = 10;
const int BasicTimer::name_w = 10; const int BasicTimer::name_w = 15;
} // namespace profile } // namespace profile
} // namespace lite } // namespace lite
......
...@@ -91,6 +91,18 @@ class DDimBase { ...@@ -91,6 +91,18 @@ class DDimBase {
return os; return os;
} }
friend bool operator==(const DDimBase &a, const DDimBase &b) {
if (a.size() != b.size()) return false;
for (size_t i = 0; i < a.size(); i++) {
if (a[i] != b[i]) return false;
}
return true;
}
friend bool operator!=(const DDimBase &a, const DDimBase &b) {
return !(a == b);
}
private: private:
DDimT *self() { return static_cast<DDimT *>(this); } DDimT *self() { return static_cast<DDimT *>(this); }
const DDimT *const_self() const { return static_cast<const DDimT *>(this); } const DDimT *const_self() const { return static_cast<const DDimT *>(this); }
...@@ -154,6 +166,7 @@ class TensorBase { ...@@ -154,6 +166,7 @@ class TensorBase {
const void *raw_data() const { return const_self()->data(); } const void *raw_data() const { return const_self()->data(); }
size_t data_size() const { return const_self()->dims().production(); } size_t data_size() const { return const_self()->dims().production(); }
size_t memory_size() const { return const_self()->memory_size(); }
void ShareDataWith(const TensorBase &other) { self()->ShareDataWith(other); } void ShareDataWith(const TensorBase &other) { self()->ShareDataWith(other); }
void CopyDataFrom(const TensorBase &other) { self()->CopyDataFrom(other); } void CopyDataFrom(const TensorBase &other) { self()->CopyDataFrom(other); }
...@@ -175,5 +188,13 @@ class TensorBase { ...@@ -175,5 +188,13 @@ class TensorBase {
} }
}; };
template <typename TensorT>
bool TensorCompareWith(const TensorT &a, const TensorT &b) {
if (a.dims() != b.dims()) return false;
LOG(INFO) << "data_size: " << a.data_size();
if (memcmp(a.raw_data(), b.raw_data(), a.data_size()) != 0) return false;
return true;
}
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
/*
* ATTENTION this header file can only include in .cc file.
*/
USE_LITE_KERNEL(feed, kHost, kAny, kAny, def);
USE_LITE_KERNEL(fetch, kHost, kAny, kAny, def);
#ifdef LITE_WITH_X86
USE_LITE_KERNEL(relu, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(fc, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(scale, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(square, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(elementwise_sub, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(elementwise_add, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(softmax, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(dropout, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(concat, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(conv2d, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(depthwise_conv2d, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(pool2d, kX86, kFloat, kNCHW, def);
#endif
#ifdef LITE_WITH_CUDA
USE_LITE_KERNEL(mul, kCUDA, kFloat, kNCHW, def);
USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, host_to_device);
USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, device_to_host);
#endif
#cc_library(runtime_lite SRCS runtime.cc) #cc_library(runtime_lite SRCS runtime.cc)
#TODO(Superjomn) enable it again. #TODO(Superjomn) enable it again.
if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) if(NOT LITE_ON_MOBILE)
lite_cc_test(test_model_parser_lite SRCS model_parser_test.cc lite_cc_test(test_model_parser_lite SRCS model_parser_test.cc
DEPS model_parser_lite framework_proto_lite DEPS model_parser_lite framework_proto_lite
ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model) ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model)
...@@ -13,18 +13,15 @@ endif() ...@@ -13,18 +13,15 @@ endif()
cc_library(compatible_pb_lite SRCS compatible_pb.cc DEPS op_desc_lite framework_proto_lite var_desc_lite) cc_library(compatible_pb_lite SRCS compatible_pb.cc DEPS op_desc_lite framework_proto_lite var_desc_lite)
set(model_parser_deps variable_lite scope_lite ${tensor_lite} scope_lite lite_cc_library(model_parser_lite SRCS model_parser.cc DEPS
target_wrapper_host variable_lite scope_lite ${tensor_lite} scope_lite
compatible_pb_lite target_wrapper_host
memory_lite compatible_pb_lite
) memory_lite
if (LITE_WITH_CUDA) CUDA_DEPS target_wrapper_cuda)
set(model_parser_deps ${model_parser_deps} target_wrapper_cuda)
endif()
cc_library(model_parser_lite SRCS model_parser.cc DEPS ${model_parser_deps})
lite_cc_test(test_op_desc_lite SRCS op_desc_test.cc DEPS cpp_op_desc_lite op_desc_lite compatible_pb_lite) lite_cc_test(test_op_desc_lite SRCS op_desc_test.cc DEPS cpp_op_desc_lite op_desc_lite compatible_pb_lite)
add_subdirectory(pb) add_subdirectory(pb)
add_subdirectory(cpp) add_subdirectory(cpp)
...@@ -209,7 +209,7 @@ void TensorToStream(std::ostream &os, const lite::Tensor &tensor) { ...@@ -209,7 +209,7 @@ void TensorToStream(std::ostream &os, const lite::Tensor &tensor) {
os.write(out.data(), size); os.write(out.data(), size);
} }
{ // the 3rd field, tensor data { // the 3rd field, tensor data
uint64_t size = tensor.data_size(); uint64_t size = tensor.memory_size();
CHECK_LT(size, std::numeric_limits<std::streamsize>::max()) CHECK_LT(size, std::numeric_limits<std::streamsize>::max())
<< "Index overflow when writing tensor"; << "Index overflow when writing tensor";
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
/*
* ATTENTION this header file can only include in .cc file.
*/
USE_LITE_OP(mul);
USE_LITE_OP(fc);
USE_LITE_OP(relu);
USE_LITE_OP(scale);
USE_LITE_OP(feed);
USE_LITE_OP(fetch);
USE_LITE_OP(io_copy);
USE_LITE_OP(elementwise_add)
USE_LITE_OP(elementwise_sub)
USE_LITE_OP(square)
USE_LITE_OP(softmax)
USE_LITE_OP(dropout)
USE_LITE_OP(concat)
USE_LITE_OP(conv2d)
USE_LITE_OP(depthwise_conv2d)
USE_LITE_OP(pool2d)
...@@ -3,5 +3,3 @@ if (NOT LITE_WITH_X86) ...@@ -3,5 +3,3 @@ if (NOT LITE_WITH_X86)
endif() endif()
cc_library(target_wrapper_x86 SRCS target_wrapper.cc) cc_library(target_wrapper_x86 SRCS target_wrapper.cc)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册