未验证 提交 a3e4c8de 编写于 作者: R Ray Liu 提交者: GitHub

Merge pull request #1084 from codeWorm2015/opencl

Opencl
......@@ -4,7 +4,7 @@ option(USE_OPENMP "openmp support" OFF)
project(paddle-mobile)
option(DEBUGING "enable debug mode" ON)
option(USE_EXCEPTION "use std exception" OFF)
option(USE_EXCEPTION "use std exception" ON)
option(LOG_PROFILE "log profile" OFF)
# select the platform to build
option(CPU "armv7 with neon" OFF)
......
......@@ -52,7 +52,7 @@ class CLEngine {
cl_context context, std::string file_name) {
FILE *file = fopen(file_name.c_str(), "rb");
PADDLE_MOBILE_ENFORCE(file != nullptr, "can't open file: %s ",
filename.c_str());
file_name.c_str());
fseek(file, 0, SEEK_END);
int64_t size = ftell(file);
PADDLE_MOBILE_ENFORCE(size > 0, "size is too small");
......
......@@ -488,7 +488,7 @@ static const uint8_t shifttable[512] = {
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x0d};
half_t float2half(float f) {
uint32_t v = *reinterpret_cast<uint32_t*>(&f);
uint32_t v = *reinterpret_cast<uint32_t *>(&f);
return basetable[(v >> 23) & 0x1ff] +
((v & 0x007fffff) >> shifttable[(v >> 23) & 0x1ff]);
}
......@@ -496,5 +496,17 @@ half_t float2half(float f) {
float half2float(half_t h) {
uint32_t v = mantissatable[offsettable[h >> 10] + (h & 0x3ff)] +
exponenttable[h >> 10];
return *reinterpret_cast<float*>(&v);
return *reinterpret_cast<float *>(&v);
}
void FloatArray2HalfArray(float *f_array, half_t *h_array, int count) {
for (int i = 0; i < count; ++i) {
h_array[i] = float2half(f_array[i]);
}
}
void HalfArray2FloatArray(half_t *h_array, float *f_array, int count) {
for (int i = 0; i < count; ++i) {
f_array[i] = float2half(h_array[i]);
}
}
......@@ -18,4 +18,9 @@ limitations under the License. */
typedef uint16_t half_t;
half_t float2half(float f);
float half2float(half_t h);
void FloatArray2HalfArray(float *f_array, half_t *h_array, int count);
void HalfArray2FloatArray(half_t *h_array, float *f_array, int count);
......@@ -18,6 +18,7 @@ limitations under the License. */
#include <type_traits>
#include <vector>
#include "common/log.h"
#include "framework/cl/cl_deleter.h"
#include "framework/cl/cl_image.h"
#include "framework/cl/cl_scope.h"
......@@ -32,11 +33,16 @@ class CLHelper {
explicit CLHelper(CLScope *scope) : scope_(scope) {}
void AddKernel(const std::string &kernel_name, const std::string &file_name) {
DLOG << " begin add kernel ";
auto kernel = scope_->GetKernel(kernel_name, file_name);
DLOG << " add kernel ing ";
kernels.emplace_back(std::move(kernel));
}
cl_kernel KernelAt(const int index) { return kernels[index].get(); }
cl_kernel KernelAt(const int index) {
DLOG << " kernel count: " << kernels.size();
return kernels[index].get();
}
cl_command_queue CLCommandQueue() { return scope_->CommandQueue(); }
......
......@@ -17,7 +17,9 @@ limitations under the License. */
#include <vector>
#include "CL/cl.h"
#include "framework/cl/cl_half.h"
#include "framework/cl/cl_tool.h"
#include "framework/ddim.h"
#include "framework/tensor.h"
......@@ -59,6 +61,7 @@ class CLImage {
PADDLE_MOBILE_THROW_EXCEPTION(
" empty image tensor data shouldn't have value");
}
DLOG << " init empty image ";
InitCLImage(context, nullptr, dim);
initialized_ = true;
}
......@@ -98,7 +101,8 @@ class CLImage {
T *data() const {
if (initialized_) {
PADDLE_MOBILE_THROW_EXCEPTION(
" cl image has initialized, tensor data has been deleted ");
" cl image has initialized, tensor data has been deleted, can't use "
"tensor data");
}
return reinterpret_cast<T *>(tensor_data_);
}
......@@ -115,6 +119,7 @@ class CLImage {
private:
void InitCLImage(cl_context context, float *tensor_data, const DDim &dim) {
DLOG << " tensor dim: " << dim;
cl_image_format cf = {.image_channel_order = CL_RGBA,
.image_channel_data_type = CL_HALF_FLOAT};
// NCHW -> [W * (C+3)/4, H * N]
......@@ -132,29 +137,23 @@ class CLImage {
tensor_data_[i] = 0;
}
}
size_t N, C, H, W;
if (tensor_dims_.size() == 4) {
N = tensor_dims_[0];
if (N < 0) {
N = 1;
}
C = tensor_dims_[1];
H = tensor_dims_[2];
W = tensor_dims_[3];
width_of_one_block_ = W;
height_of_one_block_ = H;
size_t new_dims[] = {1, 1, 1, 1};
} else if (tensor_dims_.size() == 1) {
N = 1;
C = tensor_dims_[0];
H = 1;
W = 1;
width_of_one_block_ = W;
height_of_one_block_ = H;
for (int j = 0; j < dim.size(); ++j) {
new_dims[4 - dim.size() + j] = dim[j];
}
size_t N, C, H, W;
N = new_dims[0];
C = new_dims[1];
H = new_dims[2];
W = new_dims[3];
width_of_one_block_ = W;
height_of_one_block_ = H;
size_t width = W * ((C + 3) / 4);
size_t height = H * N;
......@@ -193,9 +192,12 @@ class CLImage {
}
}
cl_int err;
DLOG << " image width: " << width;
DLOG << " image height: " << height;
cl_image_ = clCreateImage2D(
context, // cl_context context
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, // cl_mem_flags flags
context, // cl_context context
CL_MEM_READ_WRITE |
(imageData ? CL_MEM_COPY_HOST_PTR : 0), // cl_mem_flags flags
&cf, // const cl_image_format *image_format
width, // size_t image_width
height, // size_t image_height
......@@ -205,6 +207,7 @@ class CLImage {
if (err != CL_SUCCESS) {
// TODO(HaiPeng): error handling
CL_CHECK_ERRORS(err);
PADDLE_MOBILE_THROW_EXCEPTION(" create image 2d error ");
}
}
......@@ -222,9 +225,15 @@ class CLImage {
cl_context context_;
};
void TensorToCLImage(Tensor *tensor, CLImage *image,cl_command_queue commandQueue);
void TensorToCLImage(Tensor *tensor, CLImage *image,
cl_command_queue commandQueue);
void CLImageToTensor(CLImage *image, Tensor *tensor,
cl_command_queue commandQueue);
void CLImageToTensor(CLImage *image, Tensor *tensor,cl_command_queue commandQueue);
#ifdef PADDLE_MOBILE_DEBUG
Print &operator<<(Print &printer, const CLImage &image);
#endif
} // namespace framework
} // namespace paddle_mobile
......@@ -40,8 +40,11 @@ class CLScope {
std::unique_ptr<_cl_kernel, CLKernelDeleter> GetKernel(
const std::string &kernel_name, const std::string &file_name) {
auto program = Program(file_name);
DLOG << " get program ~ ";
std::unique_ptr<_cl_kernel, CLKernelDeleter> kernel(
clCreateKernel(program, kernel_name.c_str(), NULL));
clCreateKernel(program, kernel_name.c_str(), &status_));
CL_CHECK_ERRORS(status_);
DLOG << " create kernel ~ ";
return std::move(kernel);
}
......@@ -58,11 +61,12 @@ class CLScope {
status_ =
clBuildProgram(program.get(), 0, 0, "-cl-fast-relaxed-math", 0, 0);
CL_CHECK_ERRORS(status_);
programs_[file_name] = std::move(program);
return program.get();
return programs_[file_name].get();
}
private:
......
......@@ -37,6 +37,8 @@ limitations under the License. */
#include "framework/cl/cl_image.h"
#endif
int debug_to = 2;
namespace paddle_mobile {
namespace framework {
......@@ -85,7 +87,7 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
for (int i = 0; i < blocks.size(); ++i) {
std::shared_ptr<framework::BlockDesc> block_desc = blocks[i];
std::vector<std::shared_ptr<framework::OpDesc>> ops = block_desc->Ops();
for (int j = 0; j < ops.size(); ++j) {
for (int j = 0; j < debug_to; ++j) {
std::shared_ptr<framework::OpDesc> op = ops[j];
DLOG << "create op: " << j << " " << op->Type();
auto op_base = framework::OpRegistry<Dtype>::CreateOp(
......@@ -414,7 +416,7 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
}
}
#else
for (int i = 0; i < ops.size(); i++) {
for (int i = 0; i < debug_to; i++) {
#ifdef PADDLE_MOBILE_PROFILE
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
......@@ -428,6 +430,11 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
#endif
}
#endif
DLOG << " predict return nullptr";
return nullptr;
auto last_op = ops.rbegin();
auto output_map = (*last_op)->Outputs();
std::vector<std::string> out_keys = (*last_op)->GetOutKeys();
......@@ -647,13 +654,18 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
const std::vector<Ptype> &input, const std::vector<int64_t> &dims) {
framework::Tensor tensor(input, framework::make_ddim(dims));
std::shared_ptr<framework::Tensor> output_tensor = Predict(tensor, 0);
Executor<Dtype, P>::Ptype *output_ptr =
output_tensor->data<typename Executor<Dtype, P>::Ptype>();
std::vector<typename Executor<Dtype, P>::Ptype> result_vector;
for (int j = 0; j < output_tensor->numel(); ++j) {
result_vector.push_back(output_ptr[j]);
if (output_tensor != nullptr) {
Executor<Dtype, P>::Ptype *output_ptr =
output_tensor->data<typename Executor<Dtype, P>::Ptype>();
std::vector<typename Executor<Dtype, P>::Ptype> result_vector;
for (int j = 0; j < output_tensor->numel(); ++j) {
result_vector.push_back(output_ptr[j]);
}
return result_vector;
} else {
DLOG << "return empty vector";
return {};
}
return result_vector;
}
#ifdef PADDLE_MOBILE_FPGA
......
......@@ -57,7 +57,10 @@ void OperatorBase<Dtype>::CheckAllInputOutputSet() const {}
template <typename Dtype>
void OperatorBase<Dtype>::Run() {
DLOG << " begin run " << type_;
RunImpl();
DLOG << " end run " << type_;
return;
#ifdef PADDLE_MOBILE_DEBUG
DLOG << "-------------" << type_ << "----------------------------";
vector<string> input_keys = GetInputKeys();
......@@ -100,8 +103,9 @@ void OperatorBase<Dtype>::Run() {
#ifdef PADDLE_MOBILE_CL
if (type_ == "fetch") {
Tensor *tensor = vari->template GetMutable<framework::LoDTensor>();
if (tensor)
if (tensor) {
DLOG << type_ << " output- " << key << "=" << tensor->dims();
}
} else {
CLImage *cl_image = vari->template GetMutable<framework::CLImage>();
// cl_command_queue commandQueue =
......
......@@ -12,4 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "conv_kernel.inc.cl"
//#include "conv_kernel.inc.cl"
__kernel void conv_3x3() {}
\ No newline at end of file
......@@ -24,9 +24,16 @@ namespace operators {
template <>
bool ConvAddBNReluKernel<GPU_CL, float>::Init(
FusionConvAddBNReluParam<GPU_CL> *param) {
PADDLE_MOBILE_ENFORCE(
param->Filter()->dims()[2] == param->Filter()->dims()[3] &&
param->Paddings()[0] == param->Paddings()[1],
"need equal");
param->Filter()->InitCLImage(cl_helper_.CLContext());
param->Bias()->InitCLImage(cl_helper_.CLContext());
// const CL *mean = param->InputMean();
const framework::CLImage *mean = param->InputMean();
const framework::CLImage *variance = param->InputVariance();
const framework::CLImage *scale = param->InputScale();
const framework::CLImage *bias = param->InputBias();
......@@ -52,9 +59,6 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i];
}
delete[](new_scale_ptr);
delete[](new_bias_ptr);
framework::CLImage *new_scale = new framework::CLImage();
new_scale->SetTensorData(new_scale_ptr, variance->dims());
......@@ -68,6 +72,9 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
param->SetNewScale(new_scale);
param->SetNewBias(new_bias);
delete[](new_scale_ptr);
delete[](new_bias_ptr);
PADDLE_MOBILE_ENFORCE(
param->Filter()->dims()[2] == param->Filter()->dims()[3] &&
param->Paddings()[0] == param->Paddings()[1],
......
......@@ -25,6 +25,9 @@ bool ConvAddKernel<GPU_CL, float>::Init(FusionConvAddParam<GPU_CL> *param) {
param->Filter()->dims()[2] == param->Filter()->dims()[3] &&
param->Paddings()[0] == param->Paddings()[1],
"need equal");
param->Filter()->InitCLImage(cl_helper_.CLContext());
param->Bias()->InitCLImage(cl_helper_.CLContext());
int offset = static_cast<int>(param->Filter()->dims()[2]) / 2 -
static_cast<int>(param->Paddings()[1]);
param->SetOffset(offset);
......
......@@ -26,18 +26,32 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
param->Paddings()[0] == param->Paddings()[1],
"need equal");
param->Filter()->InitCLImage(cl_helper_.CLContext());
int offset = static_cast<int>(param->Filter()->dims()[2]) / 2 -
static_cast<int>(param->Paddings()[1]);
param->SetOffset(offset);
DLOG << " init helper: " << &cl_helper_;
DLOG << " conv kernel add kernel ~ ";
DLOG << " width of one block: " << param->Filter()->WidthOfOneBlock();
DLOG << " height of one block: " << param->Filter()->HeightOfOneBlock();
DLOG << " filter dims: " << param->Filter()->dims();
if (param->Filter()->WidthOfOneBlock() == 1 &&
param->Filter()->HeightOfOneBlock() == 1) {
this->cl_helper_.AddKernel("conv_1x1", "conv_add_bn_relu_kernel.cl");
DLOG << " here1 ";
this->cl_helper_.AddKernel("conv_1x1", "conv_kernel.cl");
} else if (param->Filter()->dims()[1] == 1) {
this->cl_helper_.AddKernel("depth_conv_3x3", "conv_add_bn_relu_kernel.cl");
DLOG << " here2 ";
this->cl_helper_.AddKernel("depth_conv_3x3", "conv_kernel.cl");
} else if (param->Filter()->WidthOfOneBlock() == 3 &&
param->Filter()->HeightOfOneBlock() == 3) {
this->cl_helper_.AddKernel("conv_3x3", "conv_add_bn_relu_kernel.cl");
DLOG << " here3 ";
this->cl_helper_.AddKernel("conv_3x3", "conv_kernel.cl");
} else {
PADDLE_MOBILE_THROW_EXCEPTION(" not support ");
}
......@@ -47,14 +61,27 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
template <>
void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
DLOG << " Compute helper: " << &cl_helper_;
DLOG << " begin compute ";
auto kernel = this->cl_helper_.KernelAt(0);
DLOG << " get work size ";
auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Output());
DLOG << " end work size ";
int c_block = default_work_size[0];
int w = default_work_size[1];
int nh = default_work_size[2];
auto input = param.Input()->GetCLImage();
DLOG << " get Input ";
auto filter = param.Filter()->GetCLImage();
DLOG << " get Filter ";
auto output = param.Output();
DLOG << " get Output ";
int stride = param.Strides()[0];
int offset = param.Offset();
int input_c = param.Input()->CBlock();
......@@ -64,6 +91,8 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
cl_int status;
DLOG << " begin set kernel arg ";
status = clSetKernelArg(kernel, 0, sizeof(int), &c_block);
status = clSetKernelArg(kernel, 1, sizeof(int), &w);
status = clSetKernelArg(kernel, 2, sizeof(int), &nh);
......@@ -77,12 +106,18 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
status = clSetKernelArg(kernel, 10, sizeof(int), &input_width);
status = clSetKernelArg(kernel, 11, sizeof(int), &input_height);
DLOG << " end set kernel arg ";
CL_CHECK_ERRORS(status);
DLOG << " begin enqueue ";
status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
default_work_size.data(), NULL, 0, NULL, NULL);
DLOG << " end enqueue ";
CL_CHECK_ERRORS(status);
}
......
......@@ -27,6 +27,7 @@ bool DepthwiseConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
param->Filter()->dims()[2] == param->Filter()->dims()[3] &&
param->Paddings()[0] == param->Paddings()[1],
"need equal");
param->Filter()->InitCLImage(cl_helper_.CLContext());
int offset = static_cast<int>(param->Filter()->dims()[2]) / 2 -
static_cast<int>(param->Paddings()[1]);
param->SetOffset(offset);
......
......@@ -948,6 +948,7 @@ class FetchParam : public OpParam {
input_x_ = InputXFrom<GType>(inputs, scope);
out_ = OutFrom(outputs, scope);
}
const RType *InputX() const { return input_x_; }
Tensor *Out() const { return out_; }
......
......@@ -34,23 +34,24 @@ int main() {
GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
auto vec_result = paddle_mobile.Predict(input, dims);
std::vector<float>::iterator biggest =
std::max_element(std::begin(vec_result), std::end(vec_result));
std::cout << " Max element is " << *biggest << " at position "
<< std::distance(std::begin(vec_result), biggest) << std::endl;
// 预热十次
for (int i = 0; i < 10; ++i) {
auto vec_result = paddle_mobile.Predict(input, dims);
}
auto time3 = paddle_mobile::time();
for (int i = 0; i < 10; ++i) {
auto vec_result = paddle_mobile.Predict(input, dims);
}
DLOG << vec_result;
auto time4 = paddle_mobile::time();
std::cout << "predict cost :" << paddle_mobile::time_diff(time3, time4) / 10
<< "ms" << std::endl;
// std::vector<float>::iterator biggest =
// std::max_element(std::begin(vec_result), std::end(vec_result));
// std::cout << " Max element is " << *biggest << " at position "
// << std::distance(std::begin(vec_result), biggest) <<
// std::endl;
// for (int i = 0; i < 10; ++i) {
// auto vec_result = paddle_mobile.Predict(input, dims);
// }
// auto time3 = paddle_mobile::time();
// for (int i = 0; i < 10; ++i) {
// auto vec_result = paddle_mobile.Predict(input, dims);
// }
// DLOG << vec_result;
// auto time4 = paddle_mobile::time();
// std::cout << "predict cost :" << paddle_mobile::time_diff(time3,
// time4) / 10 << "ms"
// << std::endl;
}
std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
......
cmake_minimum_required(VERSION 3.6)
project(web-exporter)
set(CMAKE_CXX_STANDARD 11)
file(GLOB PADDLE_MOBILE_CPP_FILES
"../../src/common/*.c"
"../../src/common/*.cpp"
"../../src/memory/*.cpp"
"../../src/framework/*.c"
"../../src/framework/*.cpp"
"../../src/framework/program/*.cpp"
"../../src/framework/program/program-optimize/*.cpp"
)
file(GLOB EXPORT_CPP_FILES "*.cpp")
add_executable(web-exporter ${PADDLE_MOBILE_CPP_FILES} ${EXPORT_CPP_FILES})
target_include_directories(web-exporter PRIVATE "../../src")
target_link_libraries(web-exporter)
\ No newline at end of file
#include "export.h"
inline std::string indent(int i) {
return std::string(i, ' ');
}
void export_nodejs(ProgramPtr program, ScopePtr scope, std::ostream & os) {
os << "module.exports.program = {\n";
os << indent(2) << var2str("blocks") << ": [\n";
for (const auto& block: program->Blocks()) {
os << indent(4) << "{\n";
os << indent(6) << var2str("vars") << ": {\n";
for (const auto& var: block->Vars()) {
const auto& dim = var->Tensor_desc().Dims();
os << indent(8) << var2str(var->Name()) << ": {\n";
os << indent(10) << var2str("dim") << ": " << var2str(dim) << ",\n";
os << indent(10) << var2str("persistable") << ": " << var2str(var->Persistable()) << "\n";
os << indent(8) << "},\n";
}
os << indent(6) << "},\n";
os << indent(6) << var2str("ops") << ": [\n";
for (const auto& op: block->Ops()) {
os << indent(8) << "{\n";
os << indent(10) << var2str("type") << ": " << var2str(op->Type()) << ",\n";
os << indent(10) << var2str("inputs") << ": {\n";
for (const auto& kv: op->GetInputs()) {
os << indent(12) << var2str(kv.first) << ": " << var2str(kv.second) << ",\n";
}
os << indent(10) << "},\n";
os << indent(10) << var2str("outputs") << ": {\n";
for (const auto& kv: op->GetInputs()) {
os << indent(12) << var2str(kv.first) << ": " << var2str(kv.second) << ",\n";
}
os << indent(10) << "},\n";
os << indent(10) << var2str("attrs") << ": {\n";
for (const auto& kv: op->GetAttrMap()) {
os << indent(12) << var2str(kv.first) << ": ";
os << decltype(kv.second)::ApplyVistor(VarVisitor(), kv.second) << ",\n";
}
os << indent(10) << "},\n";
os << indent(8) << "},\n";
}
os << indent(6) << "],\n";
os << indent(4) << "},\n";
}
os << indent(2) << "]\n";
os << "}\n";
}
#include <cstdio>
#include "export.h"
void export_scope(ProgramPtr program, ScopePtr scope, const std::string & dirname) {
for (const auto& block: program->Blocks()) {
for (const auto& var: block->Vars()) {
if (var->Name() == "feed" || var->Name() == "fetch") {
continue;
}
if (var->Persistable()) {
auto* v = scope->FindVar(var->Name());
assert(v != nullptr);
int count = 1;
for (auto n: var->Tensor_desc().Dims()) {
count *= n;
}
auto* tensor = v->GetMutable<paddle_mobile::framework::LoDTensor>();
const float * p = tensor->mutable_data<float>();
std::string para_file_name = dirname + '/' + var->Name();
FILE *para_file = fopen(para_file_name.c_str(), "w");
assert(p != nullptr);
fwrite(p, sizeof(float), count, para_file);
fclose(para_file);
// std::cout << "==> " << var->Name() << " " << count << "\n";
// for (int i = 0; i < count; i++) {
// std::cout << p[i] << ", ";
// }
// std::cout << "\n";
}
}
}
}
#include "export.h"
#include <sys/stat.h>
#include <sys/types.h>
class FakeExecutor : public paddle_mobile::framework::Executor<paddle_mobile::CPU, paddle_mobile::Precision::FP32> {
public:
FakeExecutor(const paddle_mobile::framework::Program<paddle_mobile::CPU> p) {
program_ = p;
batch_size_ = 1;
use_optimize_ = true;
loddable_ = false;
if (use_optimize_) {
to_predict_program_ = program_.optimizeProgram;
} else {
to_predict_program_ = program_.originProgram;
}
auto *variable_ptr = program_.scope->Var("batch_size");
variable_ptr[0].SetValue<int>(1);
if (program_.combined) {
InitCombineMemory();
} else {
InitMemory();
}
}
};
int main(int argc, char** argv) {
if (argc != 3) {
std::cout << "Usage: " << argv[0] << " <combined-modle-dir> <output-dir>\n";
return -1;
}
std::string model_dir = argv[1];
std::string model_path = model_dir + "/model";
std::string para_path = model_dir + "/params";
std::string out_dir = argv[2];
std::string out_model_js = out_dir + "/model.js";
std::string out_para_dir = out_dir + "/paras";
mkdir(out_dir.c_str(), S_IRWXU|S_IRWXG|S_IRWXO);
mkdir(out_para_dir.c_str(), S_IRWXU|S_IRWXG|S_IRWXO);
std::cout << "loading " << model_path << " & " << para_path << "\n";
paddle_mobile::framework::Loader<> loader;
auto program = loader.Load(model_path, para_path, true);
FakeExecutor executor(program);
auto optimizedProgram = program.optimizeProgram;
export_scope(optimizedProgram, program.scope, out_para_dir);
std::ofstream fs(out_model_js.c_str());
export_nodejs(optimizedProgram, program.scope, fs);
fs.close();
return 0;
}
#pragma once
#include <iostream>
#include <vector>
#include <memory>
#include <string>
#include <ostream>
#include <fstream>
#include "framework/loader.h"
#include "framework/executor.h"
#include "framework/scope.h"
#include "framework/program/program_desc.h"
// using paddle_mobile::framework::ProgramDesc;
// using paddle_mobile::framework::Scope;
using ProgramPtr = std::shared_ptr<paddle_mobile::framework::ProgramDesc>;
using ScopePtr = std::shared_ptr<paddle_mobile::framework::Scope>;
void export_nodejs(ProgramPtr program, ScopePtr scope, std::ostream & os = std::cout);
void export_scope(ProgramPtr program, ScopePtr scope, const std::string & dirname = ".");
template <typename T>
inline std::string var2str(const T & v) {
return std::to_string(v);
}
template <>
inline std::string var2str(const std::string & v) {
return "\"" + v + "\"";
}
inline std::string var2str(const char* v) {
return var2str<std::string>(v);
}
inline std::string var2str(const bool v) {
return v ? "true" : "false";
}
template <typename T>
std::string var2str(const std::vector<T> & v) {
std::string r = "[";
auto s = v.size();
for (int i = 0; i < s; i++) {
if (i) r += ", ";
r += var2str(v[i]);
}
return r + "]";
}
struct VarVisitor {
using type_t = decltype(var2str(0));
template <typename T>
type_t operator()(const T & v) {
return var2str(v);
}
};
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册