提交 c6dcffc6 编写于 作者: D dzhwinter

lb. add debug output

上级 607080e8
......@@ -333,9 +333,49 @@ std::vector<std::shared_ptr<ExecutorPrepareContext>> Executor::Prepare(
return result;
}
// void CheckResult(const std::string op_type, ExecutorPrepareContext* ctx, Scope* local_scope) {
// VLOG(3) << "before checking result";
// auto& dev_ctx = *platform::DeviceContextPool::Instance().Get(place_);
// std::vector<std::string> outputs;
// auto& block = ctx->prog_.Block(0);
// bool found = false;
// framework::OpDesc* myop = nullptr;
// for(auto& op : block.AllOps()) {
// if(op->Type() == "load_combine" || op->Type() == "fetch" || op->Type() == "feed") return;
// if (op->Type() == op_type) {
// found = true;
// myop = op;
// break;
// }
// }
// }
// if(!found) {
// VLOG(3) << "not found op!";
// return;
// }
// auto* op = myop;
// VLOG(3) << "start op output" << op->Type();
// for(auto var_name: op->OutputArgumentNames()) {
// auto* var = local_scope->Var(var_name);
// auto* var_desc = block.FindVar(var_name);
// if (var_desc->Persistable()) continue;
// auto* tensor = var->GetMutable<framework::LoDTensor>();
// framework::Tensor check;
// VLOG(3) << "before tensor copy";
// framework::TensorCopy(*tensor, platform::CPUPlace(), dev_ctx, &check);
// VLOG(3) << "after tensor copy";
// float sum = .0;
// for(size_t i=0; i < check.numel(); ++i) {
// sum += check.data<float>()[i];
// }
// VLOG(3) << "op " << op->Type() << " output var " << var_name << " sum " << sum;
// VLOG(3) << "after checking result";
// }
void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
bool create_local_scope, bool create_vars,
bool keep_kids) {
VLOG(3) << "RunPreparedContext inside";
Scope* local_scope = scope;
if (create_vars) {
if (create_local_scope) {
......@@ -346,13 +386,73 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
for (auto& op : ctx->ops_) {
op->Run(*local_scope, place_);
// CheckResult(op->Type(), ctx, local_scope);
if (FLAGS_benchmark) {
VLOG(2) << "Memory used after operator " + op->Type() + " running: "
<< memory::memory_usage(place_);
}
}
platform::DeviceContextPool::Instance().Get(place_)->Wait();
VLOG(3) << "start checking";
auto& dev_ctx = *platform::DeviceContextPool::Instance().Get(place_);
std::vector<std::string> outputs;
auto& block = ctx->prog_.Block(0);
for(auto& op : block.AllOps()) {
if(op->Type() == "load_combine" || op->Type() == "fetch" || op->Type() == "feed") continue;
// for(auto& real_op : ctx->ops_) {
// if(real_op->Type() == op->Type()) {
// VLOG(3) << real_op->Type() << " " <<place_ << " " << real_op->DebugStringEx(local_scope);
// }
// }
//VLOG(3) << "start op output" << op->Type();
for(auto var_name: op->InputArgumentNames()) {
auto* var = local_scope->Var(var_name);
auto* var_desc = block.FindVar(var_name);
if (var_desc->Persistable()) continue;
auto* tensor = var->GetMutable<framework::LoDTensor>();
framework::Tensor check;
VLOG(3) << "before tensor copy";
framework::TensorCopy(*tensor, platform::CPUPlace(), dev_ctx, &check);
VLOG(3) << "after tensor copy";
float sum = .0;
for(size_t i=0; i < check.numel(); ++i) {
sum += check.data<float>()[i];
}
VLOG(3) << "op " << op->Type() << " input var " << var_name << " sum " << sum;
}
VLOG(3) << "op " << op->Type() << "input finished";
for(auto var_name: op->OutputArgumentNames()) {
auto* var = local_scope->Var(var_name);
auto* var_desc = block.FindVar(var_name);
if (var_desc->Persistable()) continue;
auto* tensor = var->GetMutable<framework::LoDTensor>();
framework::Tensor check;
VLOG(3) << "before tensor copy";
if(op->Type() == "batch_norm" && platform::is_gpu_place(place_)) {
VLOG(3) << "op " << op->Type() << " output var " << var_name << " " << tensor->numel();
tensor->mutable_data<float>(place_);
framework::TensorCopy(*tensor, platform::CPUPlace(), dev_ctx, &check);
} else {
framework::TensorCopy(*tensor, platform::CPUPlace(), dev_ctx, &check);
}
VLOG(3) << "after tensor copy";
float sum = .0;
for(size_t i=0; i < check.numel(); ++i) {
sum += check.data<float>()[i];
}
VLOG(3) << "op " << op->Type() << " output var " << var_name << " sum " << sum;
}
}
VLOG(3) << "after checking result";
if (local_scope != scope) {
scope->DeleteScope(local_scope);
} else {
......
......@@ -46,7 +46,7 @@ if(WITH_GPU)
endif(NOT WIN32)
endif()
include_directories("D:/Paddle/")
include_directories("E:/Paddle/")
include_directories("${PADDLE_LIB}")
include_directories("${PADDLE_LIB}/third_party/install/protobuf/include")
include_directories("${PADDLE_LIB}/third_party/install/glog/include")
......@@ -72,7 +72,12 @@ link_directories("${PADDLE_LIB}/third_party/install/gflags/lib")
link_directories("${PADDLE_LIB}/paddle/fluid/inference")
# add_executable(${DEMO_NAME} ${DEMO_NAME}.cc)
add_library(${DEMO_NAME} ${DEMO_NAME}.cc)
# add_library(${DEMO_NAME} ${DEMO_NAME}.cc)
add_library(${DEMO_NAME} SHARED ${DEMO_NAME}.cc)
add_executable(real_data_icnet_tester real_data_icnet_tester.cc)
add_executable(test test.cc)
add_executable(thread_icnet_test thread_icnet_test.cc)
if(WITH_MKL)
include_directories("${PADDLE_LIB}/third_party/install/mklml/include")
set(MATH_LIB ${PADDLE_LIB}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX}
......@@ -89,7 +94,11 @@ endif()
# Note: libpaddle_inference_api.so/a must put before libpaddle_fluid.so/a
if(WITH_STATIC_LIB)
set(DEPS
${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX})
# ${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}
D:/Paddle/bazel-dll/fluid_install_dir/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}
# E:/Paddle/build/paddle/fluid/inference/api/Release/libpaddle_inference_api${CMAKE_STATIC_LIBRARY_SUFFIX}
D:/Paddle/bazel-dll/paddle/fluid/inference/api/Release/libpaddle_inference_api${CMAKE_STATIC_LIBRARY_SUFFIX}
)
else()
set(DEPS
${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
......@@ -121,3 +130,9 @@ if(WITH_GPU)
endif()
target_link_libraries(${DEMO_NAME} ${DEPS})
target_link_libraries(test ${DEMO_NAME} )
target_link_libraries(thread_icnet_test ${DEPS})
target_link_libraries(real_data_icnet_tester ${DEPS})
target_compile_definitions(${DEMO_NAME} PRIVATE "API_DEFINITION")
......@@ -19,49 +19,42 @@
#include <algorithm>
#include <vector>
#include <string>
#include <memory>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "inference_icnet.h"
namespace paddle {
std::string DIRNAME = "./infer_model";
std::string DATA = "./test-image.txt";
const int C = 3; // image channel
const int H = 449; // image height
const int W = 581; // image width
// 数据格式
// "<space splitted floats as data>\t<space splitted ints as shape"
// 1. 存储为float32格式。
// 2. 必须减去均值。 CHW三个通道为 mean = 112.15, 109.41, 185.42
using namespace paddle;
struct Record
{
class Predictor {
private:
std::unique_ptr<PaddlePredictor> predictor;
struct Record
{
std::vector<float> data;
std::vector<int32_t> shape;
};
};
NativeConfig GetConfig() {
NativeConfig config;
config.prog_file=DIRNAME + "/__model__";
config.param_file=DIRNAME + "/__params__";
config.fraction_of_gpu_memory = 0.0;
config.use_gpu = true;
config.device = 0;
return config;
}
const int C = 3; // image channel
const int H = 449; // image height
const int W = 581; // image width
using Time = decltype(std::chrono::high_resolution_clock::now());
using Time = decltype(std::chrono::high_resolution_clock::now());
Time time() { return std::chrono::high_resolution_clock::now(); };
Time time() { return std::chrono::high_resolution_clock::now(); };
double time_diff(Time t1, Time t2) {
double time_diff(Time t1, Time t2) {
typedef std::chrono::microseconds ms;
auto diff = t2 - t1;
ms counter = std::chrono::duration_cast<ms>(diff);
return counter.count() / 1000.0;
}
}
static void split(const std::string& str, char sep,
static void split(const std::string& str, char sep,
std::vector<std::string>* pieces) {
pieces->clear();
if (str.empty()) {
......@@ -77,9 +70,9 @@ static void split(const std::string& str, char sep,
if (!str.substr(pos).empty()) {
pieces->push_back(str.substr(pos));
}
}
}
Record ProcessALine(const std::string& line) {
Record ProcessALine(const std::string& line) {
std::vector<std::string> columns;
split(line, '\t', &columns);
......@@ -96,62 +89,74 @@ Record ProcessALine(const std::string& line) {
record.shape.push_back(std::stoi(s));
}
return record;
}
}
public:
Predictor (const char* prog_file,
const char* param_file, const float fraction_of_gpu_memory,
const bool use_gpu, const int device) {
void test_naive(int batch_size){
NativeConfig config = GetConfig();
auto predictor = CreatePaddlePredictor<NativeConfig>(config);
int height = H;
int width = W;
int channel = C;
int num_sum = height * width * channel * batch_size;
NativeConfig config;
config.prog_file = prog_file;
config.param_file = param_file;
config.fraction_of_gpu_memory = fraction_of_gpu_memory;
config.use_gpu = use_gpu;
config.device = device;
predictor = CreatePaddlePredictor<NativeConfig>(config);
}
// 1. use fake data
void predict(float* input, const int channel, const int height, const int width,
int64_t** output, int* output_length, int batch_size) {
std::vector<float> data;
for(int i = 0; i < num_sum; i++) {
data.push_back(0.0);
int intput_length = channel * height * width * batch_size;
for (int i = 0; i < intput_length; i++) {
data.push_back(*((float*)input + i));
}
// initialize the input data
PaddleTensor tensor;
tensor.shape = std::vector<int>({batch_size, channel, height, width});
tensor.shape = std::vector<int>({ batch_size, channel, height, width });
tensor.data.Resize(sizeof(float) * batch_size * channel * height * width);
std::copy(data.begin(), data.end(), static_cast<float*>(tensor.data.data()));
tensor.dtype = PaddleDType::FLOAT32;
// 2. read data from file
// std::string line;
// std::ifstream file(DATA);
// std::getline(file, line);
// auto record = ProcessALine(line);
// file.close();
// PaddleTensor tensor;
// tensor.shape = record.shape;
// tensor.data =
// PaddleBuf(record.data.data(), record.data.size() * sizeof(float));
tensor.dtype = PaddleDType::FLOAT32;
std::vector<PaddleTensor> paddle_tensor_feeds(1, tensor);
PaddleTensor tensor_out;
// initialize the output data
PaddleTensor tensor_out;
std::vector<PaddleTensor> outputs(1, tensor_out);
predictor->Run(paddle_tensor_feeds, &outputs, batch_size);
auto time1 = time();
for(size_t i = 0; i < 2; i++) {
std::cout << "Pass " << i << "predict";
predictor->Run(paddle_tensor_feeds, &outputs, batch_size);
*output_length = (int)outputs[0].data.length();
std::memcpy(static_cast<void *>(*output), outputs[0].data.data(), outputs[0].data.length());
int64_t sum_out = 0;
for(int i=0; i < outputs[0].data.length()/sizeof(int64_t); ++i) {
int64_t item = static_cast<int64_t*>(outputs[0].data.data())[i];
sum_out += item;
if (item != 0) {
std::cout << item << std::endl;
}
}
auto time2 = time();
std::ofstream ofresult("naive_test_result.txt", std::ios::app);
std::cout << "sum_out" << sum_out << std::endl;
}
};
std::cout <<"batch: " << batch_size << " predict cost: " << time_diff(time1, time2) / 100.0 << "ms" << std::endl;
std::cout << outputs.size() << std::endl;
API_REFERENCE void * init_predictor(const char* prog_file,
const char* param_file, const float fraction_of_gpu_memory,
const bool use_gpu, const int device) {
return new Predictor(prog_file, param_file, fraction_of_gpu_memory, use_gpu, device);
}
API_REFERENCE void predict(void* handle, float* input, const int channel, const int height, const int width,
int64_t** output, int* output_length, int batch_size) {
assert(handle != nullptr);
((Predictor*)handle)->predict(input, channel, height, width, output, output_length, batch_size);
}
} // namespace paddle
int main(int argc, char** argv) {
paddle::test_naive(1 << 0);
return 0;
API_REFERENCE void destory_predictor(void *handle) {
if (handle) {
delete handle;
handle = nullptr;
}
}
#ifdef _WIN32
#ifdef inference_icnet_EXPORTS
#define API_REFERENCE extern "C" __declspec(dllexport)
#else
#define API_REFERENCE extern "C" __declspec(dllimport)
#endif
#else
#define API_REFERENCE
#endif
//API_REFERENCE void * init_predictor();
//API_REFERENCE void destory_predictor(void *handle);
//API_REFERENCE void predict(void *handle, int n);
API_REFERENCE void * init_predictor(const char* prog_file,
const char* param_file, const float fraction_of_gpu_memory,
const bool use_gpu, const int device);
API_REFERENCE void predict(void* handle, float* input, const int channel, const int height,
const int width, int64_t** output, int* output_length, int batch_size);
API_REFERENCE void destory_predictor(void *handle);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#define GOOGLE_GLOG_DLL_DECL
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <chrono>
#include <fstream>
#include <iostream>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
namespace paddle {
// DEFINE_string(dirname, "./lb",
// "Directory of the inference model.");
NativeConfig GetConfig() {
NativeConfig config;
// config.model_dir = FLAGS_dirname;
config.prog_file= "lb/__model__";
config.param_file= "lb/__params__";
config.fraction_of_gpu_memory = 0.8;
config.use_gpu = true;
config.device = 0;
return config;
}
using Time = decltype(std::chrono::high_resolution_clock::now());
Time time() { return std::chrono::high_resolution_clock::now(); };
double time_diff(Time t1, Time t2) {
typedef std::chrono::microseconds ms;
auto diff = t2 - t1;
ms counter = std::chrono::duration_cast<ms>(diff);
return counter.count() / 1000.0;
}
void test_naive(int batch_size){
NativeConfig config = GetConfig();
auto predictor = CreatePaddlePredictor<NativeConfig>(config);
int height = 449;
int width = 581;
// =============read file list =============
std::ifstream infile("new_file.list");
std::string temp_s;
std::vector<std::string> all_files;
while (!infile.eof()) {
infile >> temp_s;
all_files.push_back(temp_s);
}
// size_t file_num = all_files.size();
infile.close();
// =============read file list =============
for (size_t f_k = 0; f_k < 1; f_k ++) {
std::ifstream in_img(all_files[f_k]);
std::cout << all_files[f_k] << std::endl;
float temp_v;
float sum_n = 0.0;
std::vector<float> data;
while (!in_img.eof()) {
in_img >> temp_v;
data.push_back(float(temp_v));
// std::cout << temp_v << " ";
sum_n += temp_v;
}
in_img.close();
std::cout << "sum: " << sum_n << std::endl;
PaddleTensor tensor;
tensor.shape = std::vector<int>({batch_size, 3, height, width});
tensor.data.Resize(sizeof(float) * batch_size * 3 * height * width);
std::copy(data.begin(), data.end(), static_cast<float*>(tensor.data.data()));
tensor.dtype = PaddleDType::FLOAT32;
std::vector<PaddleTensor> paddle_tensor_feeds(1, tensor);
PaddleTensor tensor_out;
std::vector<PaddleTensor> outputs(1, tensor_out);
predictor->Run(paddle_tensor_feeds, &outputs, batch_size);
std::cout << "start predict123:" << std::endl;
auto time1 = time();
for(size_t i = 0; i < 1; i++) {
predictor->Run(paddle_tensor_feeds, &outputs, batch_size);
}
auto time2 = time();
std::ofstream ofresult("naive_test_result.txt", std::ios::app);
std::cout <<"batch: " << batch_size << " predict cost: " << time_diff(time1, time2) / 1000.0 << "ms" << std::endl;
std::cout << outputs.size() << std::endl;
int64_t * data_o = static_cast<int64_t*>(outputs[0].data.data());
int64_t sum_out = 0;
for (size_t j = 0; j < outputs[0].data.length() / sizeof(int64_t); ++j) {
ofresult << std::to_string(data_o[j]) << " ";
sum_out += data_o[j];
}
std::cout << "sum_out " << sum_out << std::endl;
ofresult << std::endl;
ofresult.close();
}
}
} // namespace paddle
int main(int argc, char** argv) {
// google::ParseCommandLineFlags(&argc, &argv, true);
paddle::test_naive(1<<0);
return 0;
}
#include<windows.h>
#include <fstream>
#include "inference_icnet.h"
#include <thread>
#include <vector>
#include <string>
#include <iostream>
#include <sstream>
using namespace std;
template <class Type>
Type stringToNum(const string& str)
{
istringstream iss(str);
Type num;
iss >> num;
return num;
}
void test_imgs() {
void *h = init_predictor("./lb/__model__", "./lb/__params__", 0.3f, true, 0);
std::ifstream infile("new_file.list");
std::ofstream ofs("./1.png.output.txt");
std::string temp_s;
std::vector<std::string> all_files;
while (!infile.eof()) {
infile >> temp_s;
all_files.push_back(temp_s);
}
// size_t file_num = all_files.size();
infile.close();
// =============read file list =============
for (size_t f_k = 0; f_k < 1; f_k++) {
// std::string path = "D:\\Paddle\\paddle\\fluid\\inference\\api\\demo_ci\\build\\Release\\";
// std::ifstream in_img(path + all_files[f_k]);
std::string mypath = "D:\\Paddle\\paddle\\fluid\\inference\\api\\demo_ci\\build\\Release\\1.png.txt";
std::cout << "file" << mypath << std::endl;
std::ifstream in_img(mypath);
//std::cout << path + all_files[f_k] << std::endl;
double temp_v;
const int size = 3 * 449 * 581 * 1;
float * data = new float[size];
std::string value;
if (!in_img.is_open()) {
cout << "open failed" << endl;
}
double sum_input = .0;
for (auto i = 0; i < size; i++) {
getline(in_img, value, '\n');
double v = stringToNum<double>(value);
data[i] = static_cast<float>(v);
sum_input += v;
}
std::cout << "sum_input" << sum_input << std::endl;
in_img.close();
const int SIZE = 449 * 581 * 1;
int64_t * p = new int64_t[SIZE]();
int out_size = 0;
//memset(p, 0, size);
predict(h, data, 3, 449, 581, &p, &out_size, 1);
std::cout << "out_size = " << out_size << std::endl;
double out_sum = .0;
for (auto i = 0; i < out_size / sizeof(int64_t); i++) {
out_sum += p[i];
ofs << p[i] << " ";
}
ofs.close();
std::cout << "inferece out sum" << out_sum << std::endl;
delete p;
}
destory_predictor(h);
}
int main(int argc, char** argv) {
//if (true) {
// std::thread t1(func, init_predictor("./infer_model/__model__", "./infer_model/__params__", 0.1f, true, 0));
// std::thread t2(func, init_predictor("./infer_model/__model__", "./infer_model/__params__", 0.1f, true, 0));
// //std::thread t3(func, init_predictor("./infer_model/__model__", "./infer_model/__params__", 0.1f, true, 0));
// //std::thread t4(func, init_predictor("./infer_model/__model__", "./infer_model/__params__", 0.1f, true, 0));
// t1.join();
// t2.join();
// //t3.join();
// //t4.join();
// //Sleep(1);
//}
test_imgs();
return 0;
}
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#define GOOGLE_GLOG_DLL_DECL
#include <gflags/gflags.h>
#include <glog/logging.h>
//#include <gtest/gtest.h>
#include <chrono>
#include <fstream>
#include <iostream>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include <thread> // NOLINT
#define ASSERT_TRUE(x) x
#define ASSERT_EQ(x, y) assert(x == y)
namespace paddle {
// DEFINE_string(dirname, "./LB_icnet_model",
// "Directory of the inference model.");
NativeConfig GetConfig() {
NativeConfig config;
config.prog_file= "./dzh_lb/__model__";
config.param_file= "./dzh_lb/__params__";
config.fraction_of_gpu_memory = 0.08;
config.use_gpu = true;
config.device = 0;
return config;
}
using Time = decltype(std::chrono::high_resolution_clock::now());
Time time() { return std::chrono::high_resolution_clock::now(); };
double time_diff(Time t1, Time t2) {
typedef std::chrono::microseconds ms;
auto diff = t2 - t1;
ms counter = std::chrono::duration_cast<ms>(diff);
return counter.count() / 1000.0;
}
void test_naive(int batch_size, std::string model_path){
PaddlePredictor* pres[2];
NativeConfig config = GetConfig();
// config.model_dir = model_path;
auto predictor0 = CreatePaddlePredictor<NativeConfig>(config);
auto predictor1 = CreatePaddlePredictor<NativeConfig>(config);
pres[0] = predictor0.get();
pres[1] = predictor1.get();
int height = 449;
int width = 581;
std::vector<float> data;
for (int i = 0; i < 3 * height * width; i++) {
data.push_back(0);
}
PaddleTensor tensor;
tensor.shape = std::vector<int>({batch_size, 3, height, width});
tensor.data.Resize(sizeof(float) * batch_size * 3 * height * width);
std::copy(data.begin(), data.end(), static_cast<float*>(tensor.data.data()));
tensor.dtype = PaddleDType::FLOAT32;
std::vector<PaddleTensor> paddle_tensor_feeds(1, tensor);
constexpr int num_jobs = 5; // each job run 1 batch
std::vector<std::thread> threads;
for (int tid = 0; tid < num_jobs; ++tid) {
threads.emplace_back([&, tid]() {
auto predictor = pres[tid];
std::vector<PaddleTensor> local_outputs;
for(size_t i = 0; i < 1000; i++) {
ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &local_outputs));
std::cout << "run: " << tid << std::endl;
}
ASSERT_EQ(local_outputs.size(), 1UL);
});
}
for (int i = 0; i < num_jobs; ++i) {
threads[i].join();
}
}
//TEST(alexnet, naive) {
// test_naive(1 << 0, "./trt_models/vgg19");
//}
} // namespace paddle
int main(int argc, char** argv) {
paddle::test_naive(1 << 0, "");
}
......@@ -141,6 +141,27 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
bias->template data<BatchNormParamType<T>>(),
est_mean->template data<BatchNormParamType<T>>(),
est_var->template data<BatchNormParamType<T>>(), epsilon));
VLOG(3) << "before tensor copy";
Tensor mean_, var_, x_, y_;
framework::TensorCopy(*est_mean, platform::CPUPlace(), dev_ctx, &mean_);
framework::TensorCopy(*est_var, platform::CPUPlace(), dev_ctx, &var_);
framework::TensorCopy(*x, platform::CPUPlace(), dev_ctx, &x_);
framework::TensorCopy(*y, platform::CPUPlace(), dev_ctx, &y_);
VLOG(3) << "after tensor copy";
auto check_tensor = [&](const Tensor& check) {
float sum = .0;
for(size_t i=0; i < check.numel(); ++i) {
sum += check.data<float>()[i];
}
return sum;
};
VLOG(3) << "BatchNormKernel";
VLOG(3) << "mean" << check_tensor(mean_);
VLOG(3) << "var" << check_tensor(var_);
VLOG(3) << "x" << check_tensor(x_);
VLOG(3) << "y" << check_tensor(y_);
} else {
// Run training mode.
// obtain running mean and running inv var, and see if we need to
......
......@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <fstream>
#include <vector>
#include "paddle/fluid/framework/data_type_transform.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/device_context.h"
......@@ -34,6 +35,7 @@ class LoadCombineOp : public framework::OperatorBase {
auto load_as_fp16 = Attr<bool>("load_as_fp16");
std::ifstream fin(filename, std::ios_base::in | std::ios_base::binary);
//std::ifstream fin(filename, std::ios_base::in);
PADDLE_ENFORCE(!fin.bad(),
"Cannot open file %s for load_combine op", filename);
......@@ -46,7 +48,7 @@ class LoadCombineOp : public framework::OperatorBase {
auto &dev_ctx = *pool.Get(place);
for (size_t i = 0; i < out_var_names.size(); i++) {
VLOG(3) << "load " << out_var_names[i];
VLOG(3) << "load variable " << out_var_names[i];
auto *out_var = scope.FindVar(out_var_names[i]);
PADDLE_ENFORCE(out_var != nullptr, "Output variable %s cannot be found",
......@@ -61,6 +63,13 @@ class LoadCombineOp : public framework::OperatorBase {
// Get data from fin to tensor
DeserializeFromStream(fin, tensor, dev_ctx);
VLOG(3) << "after deserialization";
framework::Tensor check;
framework::TensorCopy(*tensor, platform::CPUPlace(), dev_ctx, &check);
float sum = .0;
for(size_t i=0; i < check.numel(); ++i) {
sum += check.data<float>()[i];
}
VLOG(3) << "sum result" << sum;
auto in_dtype = framework::ToDataType(tensor->type());
auto out_dtype =
load_as_fp16 ? framework::proto::VarType::FP16 : in_dtype;
......@@ -80,6 +89,7 @@ class LoadCombineOp : public framework::OperatorBase {
tensor = out_var->GetMutable<framework::LoDTensor>();
tensor->set_lod(fp16_tensor.lod());
tensor->ShareDataWith(fp16_tensor);
}
VLOG(3) << "load " << out_var_names[i] << " finished";
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册