diff --git a/CMakeLists.txt b/CMakeLists.txt index e37afa3ec717eba6dbcdf6972ce5b90f95172eb3..c1003f32a837c3f438eb3c219e9e9859b9170df2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -212,6 +212,7 @@ endif() include(external/threadpool) +include(flags) # set paddle compile flags include(cudnn) # set cudnn libraries, must before configure include(configure) # add paddle env configuration @@ -225,9 +226,6 @@ elseif() set(WITH_ANAKIN OFF CACHE STRING "Anakin is used in MKL only now." FORCE) endif() -include(flags) # set paddle compile flags -include(cudnn) # set cudnn libraries, must before configure -include(configure) # add paddle env configuration include(generic) # simplify cmake module include(package) # set paddle packages include(ccache) # set ccache for compilation diff --git a/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc b/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc index 82f0ecaee130e60e36cf06eecf65db50fec73d81..487fc7b14e2c04af1e17efff91de0bfeed15c8a7 100644 --- a/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc +++ b/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc @@ -135,7 +135,6 @@ void MainThreads(int num_threads, bool use_gpu) { } // namespace paddle int main(int argc, char** argv) { - FLAGS_dirname = "./word2vec.inference.model"; google::ParseCommandLineFlags(&argc, &argv, true); paddle::demo::Main(false /* use_gpu*/); paddle::demo::MainThreads(1, false /* use_gpu*/); diff --git a/paddle/fluid/inference/api/demo_ci/test.cc b/paddle/fluid/inference/api/demo_ci/test.cc deleted file mode 100644 index 41f05a9b501ad1c182e308ffada4dc759cfa0209..0000000000000000000000000000000000000000 --- a/paddle/fluid/inference/api/demo_ci/test.cc +++ /dev/null @@ -1,99 +0,0 @@ - -#include -#include -#include "inference_icnet.h" -#include -#include -#include -#include - -#include -using namespace std; - - -template -Type stringToNum(const string& str) -{ - istringstream iss(str); - Type num; - iss >> num; - return num; -} - -void test_imgs() { - void *h = init_predictor("./lb/__model__", "./lb/__params__", 0.3f, true, 0); - - std::ifstream infile("new_file.list"); - std::ofstream ofs("./1.png.output.txt"); - - std::string temp_s; - std::vector all_files; - while (!infile.eof()) { - infile >> temp_s; - all_files.push_back(temp_s); - } - // size_t file_num = all_files.size(); - infile.close(); - // =============read file list ============= - for (size_t f_k = 0; f_k < 1; f_k++) { - // std::string path = "D:\\Paddle\\paddle\\fluid\\inference\\api\\demo_ci\\build\\Release\\"; - // std::ifstream in_img(path + all_files[f_k]); - std::string mypath = "D:\\Paddle\\paddle\\fluid\\inference\\api\\demo_ci\\build\\Release\\1.png.txt"; - std::cout << "file" << mypath << std::endl; - std::ifstream in_img(mypath); - //std::cout << path + all_files[f_k] << std::endl; - double temp_v; - const int size = 3 * 449 * 581 * 1; - float * data = new float[size]; - std::string value; - - if (!in_img.is_open()) { - cout << "open failed" << endl; - } - double sum_input = .0; - for (auto i = 0; i < size; i++) { - getline(in_img, value, '\n'); - double v = stringToNum(value); - data[i] = static_cast(v); - sum_input += v; - } - std::cout << "sum_input" << sum_input << std::endl; - - in_img.close(); - const int SIZE = 449 * 581 * 1; - int64_t * p = new int64_t[SIZE](); - int out_size = 0; - //memset(p, 0, size); - predict(h, data, 3, 449, 581, &p, &out_size, 1); - std::cout << "out_size = " << out_size << std::endl; - - double out_sum = .0; - for (auto i = 0; i < out_size / sizeof(int64_t); i++) { - out_sum += p[i]; - ofs << p[i] << " "; - } - ofs.close(); - - std::cout << "inferece out sum" << out_sum << std::endl; - delete p; - } - - destory_predictor(h); -} - -int main(int argc, char** argv) { - //if (true) { - // std::thread t1(func, init_predictor("./infer_model/__model__", "./infer_model/__params__", 0.1f, true, 0)); - // std::thread t2(func, init_predictor("./infer_model/__model__", "./infer_model/__params__", 0.1f, true, 0)); - // //std::thread t3(func, init_predictor("./infer_model/__model__", "./infer_model/__params__", 0.1f, true, 0)); - // //std::thread t4(func, init_predictor("./infer_model/__model__", "./infer_model/__params__", 0.1f, true, 0)); - // t1.join(); - // t2.join(); - // //t3.join(); - // //t4.join(); - // //Sleep(1); - //} - test_imgs(); - - return 0; -} diff --git a/paddle/fluid/inference/api/helper.h b/paddle/fluid/inference/api/helper.h index 270def69b81b380a2bfa1403e84e656090b7f82f..f5c83bcd546d096e5b0df0a2c5ca4e1f00633b5e 100644 --- a/paddle/fluid/inference/api/helper.h +++ b/paddle/fluid/inference/api/helper.h @@ -97,7 +97,7 @@ static void TensorAssignData(PaddleTensor *tensor, } template -static int ZeroCopyTensorAssignData(paddle::ZeroCopyTensor *tensor, +static int ZeroCopyTensorAssignData(ZeroCopyTensor *tensor, const std::vector> &data) { int size{0}; auto *ptr = tensor->mutable_data(PaddlePlace::kCPU); diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 3721d7da70448687931d773df7b3498c53ff8e5b..c43f0a21594dbed32bf15a0d7cbe4ad1f7ab2a58 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -291,7 +291,7 @@ op_library(gru_op DEPS sequence2batch gru_compute) op_library(recurrent_op DEPS executor) op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale) op_library(cos_sim_op DEPS cos_sim_functor) -op_library(parallel_do_op DEPS executor glog) +op_library(parallel_do_op DEPS executor) op_library(unsqueeze_op DEPS reshape_op) op_library(squeeze_op DEPS reshape_op) op_library(extract_rows_op DEPS memory) diff --git a/paddle/fluid/operators/batch_norm_op.cu.cc b/paddle/fluid/operators/batch_norm_op.cu.cc index 08a10757edb9630cc7993768f84e4c8d45971162..ca6cd8669352fd5814f25a04433ca97fe4abe9ff 100644 --- a/paddle/fluid/operators/batch_norm_op.cu.cc +++ b/paddle/fluid/operators/batch_norm_op.cu.cc @@ -141,27 +141,6 @@ class BatchNormKernel bias->template data>(), est_mean->template data>(), est_var->template data>(), epsilon)); - - VLOG(3) << "before tensor copy"; - Tensor mean_, var_, x_, y_; - framework::TensorCopy(*est_mean, platform::CPUPlace(), dev_ctx, &mean_); - framework::TensorCopy(*est_var, platform::CPUPlace(), dev_ctx, &var_); - framework::TensorCopy(*x, platform::CPUPlace(), dev_ctx, &x_); - framework::TensorCopy(*y, platform::CPUPlace(), dev_ctx, &y_); - VLOG(3) << "after tensor copy"; - auto check_tensor = [&](const Tensor& check) { - float sum = .0; - for(size_t i=0; i < check.numel(); ++i) { - sum += check.data()[i]; - } - return sum; - }; - VLOG(3) << "BatchNormKernel"; - VLOG(3) << "mean" << check_tensor(mean_); - VLOG(3) << "var" << check_tensor(var_); - VLOG(3) << "x" << check_tensor(x_); - VLOG(3) << "y" << check_tensor(y_); - } else { // Run training mode. // obtain running mean and running inv var, and see if we need to diff --git a/paddle/fluid/operators/conv_cudnn_op.cu.cc b/paddle/fluid/operators/conv_cudnn_op.cu.cc index 26357c4fc724d2b88da8bda32ee206a6eacdffc8..4a7a6bcf7154d5680de751e3c933be46fb09fd74 100644 --- a/paddle/fluid/operators/conv_cudnn_op.cu.cc +++ b/paddle/fluid/operators/conv_cudnn_op.cu.cc @@ -43,7 +43,6 @@ template class CUDNNConvOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - VLOG(3) << "inside cudnn"; PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), "It must use CUDAPlace."); auto* input = ctx.Input("Input"); @@ -60,7 +59,7 @@ class CUDNNConvOpKernel : public framework::OpKernel { const T* input_data = input->data(); const T* filter_data = filter->data(); T* output_data = output->mutable_data(ctx.GetPlace()); - VLOG(3) << "get all inputs"; + // ------------------- cudnn descriptors --------------------- ScopedTensorDescriptor input_desc; ScopedTensorDescriptor output_desc; @@ -73,7 +72,7 @@ class CUDNNConvOpKernel : public framework::OpKernel { cudnnConvolutionDescriptor_t cudnn_conv_desc = conv_desc.descriptor(paddings, strides, dilations); - VLOG(3) << "create tensor descriptor"; + #if CUDNN_VERSION_MIN(7, 0, 1) // cudnn 7 can support groups, no need to do it mannually // FIXME(typhoonzero): find a better way to disable groups @@ -82,7 +81,7 @@ class CUDNNConvOpKernel : public framework::OpKernel { cudnn_conv_desc, groups)); groups = 1; #endif - VLOG(3) << "before create tensor descriptor"; + cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor( layout, framework::vectorize2int(input->dims()), groups); cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor( @@ -112,7 +111,7 @@ class CUDNNConvOpKernel : public framework::OpKernel { output_height = output->dims()[2]; output_width = output->dims()[3]; } - VLOG(3) << "after create tensor descriptor"; + int group_offset_in = input_channels / groups * input_height * input_width * input_depth; int group_offset_out = @@ -129,7 +128,6 @@ class CUDNNConvOpKernel : public framework::OpKernel { auto& dev_ctx = ctx.template device_context(); auto handle = dev_ctx.cudnn_handle(); - VLOG(3) << "set cudnn algorithm"; CUDNN_ENFORCE(platform::dynload::cudnnGetConvolutionForwardAlgorithm( handle, cudnn_input_desc, cudnn_filter_desc, cudnn_conv_desc, cudnn_output_desc, CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, @@ -150,7 +148,7 @@ class CUDNNConvOpKernel : public framework::OpKernel { cudnn_conv_desc, CUDNN_DEFAULT_MATH)); } #endif - VLOG(3) << "before get workspace"; + // get workspace size able to allocate CUDNN_ENFORCE(platform::dynload::cudnnGetConvolutionForwardWorkspaceSize( handle, cudnn_input_desc, cudnn_filter_desc, cudnn_conv_desc, @@ -159,6 +157,7 @@ class CUDNNConvOpKernel : public framework::OpKernel { // the limit because the algo is overrided to use tensor core. PADDLE_ENFORCE_LE(workspace_size_in_bytes, workspace_size_limit, "workspace_size to be allocated exceeds the limit"); + // ------------------- cudnn conv forward --------------------- ScalingParamType alpha = 1.0f, beta = 0.0f; for (int i = 0; i < groups; i++) { @@ -312,6 +311,7 @@ class CUDNNConvGradOpKernel : public framework::OpKernel { cudnn_filter_desc, filter_algo, &tmp_size)); workspace_size_in_bytes = std::max(workspace_size_in_bytes, tmp_size); } + // ------------------- cudnn conv backward data --------------------- ScalingParamType alpha = 1.0f, beta = 0.0f; if (input_grad) { diff --git a/paddle/fluid/operators/fetch_op.cc b/paddle/fluid/operators/fetch_op.cc index 6c19494939cc04cc3a026f6f81e8976090638a7e..c197b45e8196a47def6465128e8ca39d8daefed6 100644 --- a/paddle/fluid/operators/fetch_op.cc +++ b/paddle/fluid/operators/fetch_op.cc @@ -42,8 +42,6 @@ class FetchOp : public framework::OperatorBase { "Cannot find out_var in scope, out_var_name is %s", out_name); - VLOG(3) << "fetch_var ptr " << fetch_var << " is " << (fetch_var == nullptr); - VLOG(3) << "out_var ptr " << out_var << " is " << (out_var == nullptr); auto col = static_cast(Attr("col")); auto *fetch_list = out_var->GetMutable(); diff --git a/paddle/fluid/operators/label_smooth_op.cc b/paddle/fluid/operators/label_smooth_op.cc index b73b373dc4291359ae0ed590d78e1c48702f0a43..da59bd53bce010d0d6ad2ab14acaffb9cc2f99e6 100644 --- a/paddle/fluid/operators/label_smooth_op.cc +++ b/paddle/fluid/operators/label_smooth_op.cc @@ -34,7 +34,7 @@ class LabelSmoothOp : public framework::OperatorWithKernel { auto in_dims = ctx->GetInputDim("X"); if (ctx->HasInput("PriorDist")) { auto noise_dims = ctx->GetInputDim("PriorDist"); - int64_t noise_numel = paddle::framework::product(noise_dims); + auto noise_numel = paddle::framework::product(noise_dims); PADDLE_ENFORCE( in_dims[1] == noise_numel, "The number of elements in Input(PriorDist) must be equal to the " diff --git a/paddle/fluid/operators/load_combine_op.cc b/paddle/fluid/operators/load_combine_op.cc index 267313b7f8ac2a69bd2d821f4d942410ce8ce939..59f44b112cddddff5ff423f462650615710856a7 100644 --- a/paddle/fluid/operators/load_combine_op.cc +++ b/paddle/fluid/operators/load_combine_op.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include -#include +#include #include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device_context.h" @@ -33,10 +33,15 @@ class LoadCombineOp : public framework::OperatorBase { const platform::Place &place) const override { auto filename = Attr("file_path"); auto load_as_fp16 = Attr("load_as_fp16"); - - std::ifstream fin(filename, std::ios_base::in | std::ios_base::binary); - //std::ifstream fin(filename, std::ios_base::in); - PADDLE_ENFORCE(!fin.bad(), + auto format = Attr("format"); + std::unique_ptr fin; + if (format == "windows") { + fin.reset(new std::ifstream(filename, + std::ios_base::in | std::ios_base::binary)); + } else { + fin.reset(new std::ifstream(filename)); + } + PADDLE_ENFORCE(static_cast(*fin), "Cannot open file %s for load_combine op", filename); auto out_var_names = Outputs("Out"); @@ -48,32 +53,20 @@ class LoadCombineOp : public framework::OperatorBase { auto &dev_ctx = *pool.Get(place); for (size_t i = 0; i < out_var_names.size(); i++) { - VLOG(3) << "load variable " << out_var_names[i]; auto *out_var = scope.FindVar(out_var_names[i]); PADDLE_ENFORCE(out_var != nullptr, "Output variable %s cannot be found", out_var_names[i]); auto *tensor = out_var->GetMutable(); - VLOG(3) << "Get Tensor"; + // Error checking - PADDLE_ENFORCE(!fin.bad(), "Cannot read more from file %s", + PADDLE_ENFORCE(static_cast(*fin), "Cannot read more from file %s", filename); - VLOG(3) << "before deserialization"; + // Get data from fin to tensor - DeserializeFromStream(fin, tensor, dev_ctx); - // VLOG(3) << "after deserialization"; - // framework::Tensor check; - // framework::TensorCopy(*tensor, platform::CPUPlace(), dev_ctx, &check); - // float sum = .0; - // for(size_t i=0; i < check.numel(); ++i) { - // if(std::type_index(check.type()) == std::type_index(typeid(int64_t))) { - // sum += static_cast(check.data()[i]); - // } else { - // sum += check.data()[i]; - // } - // } - // VLOG(3) << "sum result" << sum; + DeserializeFromStream(*fin, tensor, dev_ctx); + auto in_dtype = framework::ToDataType(tensor->type()); auto out_dtype = load_as_fp16 ? framework::proto::VarType::FP16 : in_dtype; @@ -93,9 +86,7 @@ class LoadCombineOp : public framework::OperatorBase { tensor = out_var->GetMutable(); tensor->set_lod(fp16_tensor.lod()); tensor->ShareDataWith(fp16_tensor); - } - VLOG(3) << "load " << out_var_names[i] << " finished"; } } }; @@ -119,6 +110,18 @@ class LoadCombineOpProtoMaker : public framework::OpProtoAndCheckerMaker { "LoDTensors will be loaded from \"file_path\".") .AddCustomChecker( [](const std::string &path) { return !path.empty(); }); + AddAttr("format", + R"DOC((windows|linux)" "saved model file format + windows and linux file newline symbol is +different. windows(newline is \n\r) or linux(newline is \r) +So if you set attribute format to windows, then we saved model file in binary. +It can be used both linux and windows. If you set format to linux, +it will save file in normal file, newline symbol is \r. Need to note +that these two format is not inter-compatible.)DOC") + .SetDefault("linux") + .AddCustomChecker([](const std::string &s) { + return s == "windows" || s == "linux"; + }); AddComment(R"DOC( LoadCombine Operator. diff --git a/paddle/fluid/operators/load_op.cc b/paddle/fluid/operators/load_op.cc index 51219504ffa2a778b56351f759e8a8dfb951ad91..e0e2c3dc4fa0af6bd6a58106364e21099d7bc517 100644 --- a/paddle/fluid/operators/load_op.cc +++ b/paddle/fluid/operators/load_op.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include +#include #include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/op_registry.h" @@ -34,8 +35,15 @@ class LoadOp : public framework::OperatorBase { // FIXME(yuyang18): We save variable to local file now, but we should change // it to save an output stream. auto filename = Attr("file_path"); - std::ifstream fin(filename); - PADDLE_ENFORCE(static_cast(fin), "Cannot open file %s for load op", + auto format = Attr("format"); + std::unique_ptr fin; + if (format == "windows") { + fin.reset(new std::ifstream(filename, + std::ios_base::in | std::ios_base::binary)); + } else { + fin.reset(new std::ifstream(filename)); + } + PADDLE_ENFORCE(static_cast(*fin), "Cannot open file %s for load op", filename); auto out_var_name = Output("Out"); @@ -44,9 +52,9 @@ class LoadOp : public framework::OperatorBase { out_var_name); if (out_var->IsType()) { - LoadLodTensor(fin, place, out_var); + LoadLodTensor(*fin, place, out_var); } else if (out_var->IsType()) { - LoadSelectedRows(fin, place, out_var); + LoadSelectedRows(*fin, place, out_var); } else { PADDLE_ENFORCE( false, @@ -110,6 +118,18 @@ class LoadOpProtoMaker : public framework::OpProtoAndCheckerMaker { R"(Variable will be loaded from "file_path")") .AddCustomChecker( [](const std::string &path) { return !path.empty(); }); + AddAttr("format", + R"DOC((windows|linux)" "saved model file format + windows and linux file newline symbol is +different. windows(newline is \n\r) or linux(newline is \r) +So if you set attribute format to windows, then we saved model file in binary. +It can be used both linux and windows. If you set format to linux, +it will save file in normal file, newline symbol is \r. Need to note +that these two format is not inter-compatible.)DOC") + .SetDefault("linux") + .AddCustomChecker([](const std::string &s) { + return s == "windows" || s == "linux"; + }); AddComment( "Load operator will load a LoDTensor / SelectedRows variable from disk " "file."); diff --git a/paddle/fluid/operators/save_combine_op.cc b/paddle/fluid/operators/save_combine_op.cc index 6ab50964553cd4ddd1b66d74f96df8bd591c1461..f1cd7c6ff64e43c7c2ddc25e8965e577c357894d 100644 --- a/paddle/fluid/operators/save_combine_op.cc +++ b/paddle/fluid/operators/save_combine_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include +#include #include #include #include "paddle/fluid/framework/data_type.h" @@ -41,6 +42,7 @@ class SaveCombineOp : public framework::OperatorBase { auto filename = Attr("file_path"); auto overwrite = Attr("overwrite"); auto save_as_fp16 = Attr("save_as_fp16"); + auto format = Attr("format"); bool is_present = FileExists(filename); if (is_present && !overwrite) { @@ -49,8 +51,14 @@ class SaveCombineOp : public framework::OperatorBase { } MkDirRecursively(DirName(filename).c_str()); - std::ofstream fout(filename, std::ios_base::out | std::ios_base::binary); - PADDLE_ENFORCE(static_cast(fout), "Cannot open %s to write", + std::unique_ptr fout; + if (format == "windows") { + fout.reset(new std::ofstream(filename, + std::ios_base::out | std::ios_base::binary)); + } else { + fout.reset(new std::ofstream(filename)); + } + PADDLE_ENFORCE(static_cast(*fout), "Cannot open %s to write", filename); auto inp_var_names = Inputs("X"); @@ -86,12 +94,11 @@ class SaveCombineOp : public framework::OperatorBase { // copy LoD info to the new tensor out.set_lod(tensor.lod()); framework::TransDataType(in_kernel_type, out_kernel_type, tensor, &out); - framework::SerializeToStream(fout, out, dev_ctx); + framework::SerializeToStream(*fout, out, dev_ctx); } else { - framework::SerializeToStream(fout, tensor, dev_ctx); + framework::SerializeToStream(*fout, tensor, dev_ctx); } } - fout.close(); } }; @@ -124,6 +131,18 @@ to a file on disk. "The \"file_path\" where the LoDTensor variables will be saved.") .AddCustomChecker( [](const std::string &path) { return !path.empty(); }); + AddAttr("format", + R"DOC((windows|linux)" "saved model file format + windows and linux file newline symbol is +different. windows(newline is \n\r) or linux(newline is \r) +So if you set attribute format to windows, then we saved model file in binary. +It can be used both linux and windows. If you set format to linux, +it will save file in normal file, newline symbol is \r. Need to note +that these two format is not inter-compatible.)DOC") + .SetDefault("linux") + .AddCustomChecker([](const std::string &s) { + return s == "windows" || s == "linux"; + }); } }; diff --git a/paddle/fluid/operators/save_op.cc b/paddle/fluid/operators/save_op.cc index e79cffcf498c52ed14db235f6221cfdf08399c9d..9eea9e1a9517e84edcb11695ca33c5b7bfdc66f1 100644 --- a/paddle/fluid/operators/save_op.cc +++ b/paddle/fluid/operators/save_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include +#include #include #include "paddle/fluid/framework/data_type.h" @@ -64,6 +65,7 @@ class SaveOp : public framework::OperatorBase { framework::Variable *var) const { auto filename = Attr("file_path"); auto overwrite = Attr("overwrite"); + auto format = Attr("format"); if (FileExists(filename) && !overwrite) { PADDLE_THROW("%s is existed, cannot save to it when overwrite=false", @@ -80,8 +82,14 @@ class SaveOp : public framework::OperatorBase { // FIXME(yuyang18): We save variable to local file now, but we should change // it to save an output stream. - std::ofstream fout(filename); - PADDLE_ENFORCE(static_cast(fout), "Cannot open %s to write", + std::unique_ptr fout; + if (format == "windows") { + fout.reset(new std::ofstream(filename, + std::ios_base::out | std::ios_base::binary)); + } else { + fout.reset(new std::ofstream(filename)); + } + PADDLE_ENFORCE(static_cast(*fout), "Cannot open %s to write", filename); auto save_as_fp16 = Attr("save_as_fp16"); @@ -95,11 +103,10 @@ class SaveOp : public framework::OperatorBase { framework::TransDataType(in_kernel_type, out_kernel_type, tensor, &out); // copy LoD info to the new tensor out.set_lod(tensor.lod()); - framework::SerializeToStream(fout, out, dev_ctx); + framework::SerializeToStream(*fout, out, dev_ctx); } else { - framework::SerializeToStream(fout, tensor, dev_ctx); + framework::SerializeToStream(*fout, tensor, dev_ctx); } - fout.close(); } void SaveSelectedRows(const framework::Scope &scope, @@ -110,6 +117,7 @@ class SaveOp : public framework::OperatorBase { lt_var != nullptr, "Can not find variable kLookupTablePath for SaveSelectedRows"); std::string filename = lt_var->data(); + auto format = Attr("format"); VLOG(4) << "SaveSelectedRows get File name: " << filename; MkDirRecursively(DirName(filename).c_str()); @@ -122,11 +130,16 @@ class SaveOp : public framework::OperatorBase { // FIXME(yuyang18): We save variable to local file now, but we should change // it to save an output stream. - std::ofstream fout(filename); - PADDLE_ENFORCE(static_cast(fout), "Cannot open %s to write", + std::unique_ptr fout; + if (format == "windows") { + fout.reset(new std::ofstream(filename, + std::ios_base::out | std::ios_base::binary)); + } else { + fout.reset(new std::ofstream(filename)); + } + PADDLE_ENFORCE(static_cast(*fout), "Cannot open %s to write", filename); - framework::SerializeToStream(fout, selectedRows, dev_ctx); - fout.close(); + framework::SerializeToStream(*fout, selectedRows, dev_ctx); } }; @@ -154,6 +167,18 @@ This operator will serialize and write LoDTensor / SelectedRows variable to file "The \"file_path\" where the variable will be saved.") .AddCustomChecker( [](const std::string &path) { return !path.empty(); }); + AddAttr("format", + R"DOC((windows|linux)" "saved model file format + windows and linux file newline symbol is +different. windows(newline is \n\r) or linux(newline is \r) +So if you set attribute format to windows, then we saved model file in binary. +It can be used both linux and windows. If you set format to linux, +it will save file in normal file, newline symbol is \r. Need to note +that these two format is not inter-compatible.)DOC") + .SetDefault("linux") + .AddCustomChecker([](const std::string &s) { + return s == "windows" || s == "linux"; + }); } }; diff --git a/paddle/fluid/platform/init.cc b/paddle/fluid/platform/init.cc index dd865e139dc0b8a9c40d2733b6832ccf744b7f53..c104cd40cc247ec3c6531ea4dbaebb99bdbcea45 100644 --- a/paddle/fluid/platform/init.cc +++ b/paddle/fluid/platform/init.cc @@ -94,9 +94,7 @@ void InitDevices(bool init_p2p, const std::vector devices) { int count = 0; #ifdef PADDLE_WITH_CUDA try { - VLOG(3) << "get cuda count"; count = platform::GetCUDADeviceCount(); - VLOG(3) << "get cuda pass"; } catch (const std::exception &exp) { LOG(WARNING) << "Compiled with WITH_GPU, but no GPU found in runtime."; } @@ -109,14 +107,11 @@ void InitDevices(bool init_p2p, const std::vector devices) { } places.emplace_back(platform::CUDAPlace(devices[i])); } - VLOG(3) << "before p2p"; if (init_p2p) { InitP2P(devices); } - VLOG(3) << "p2p pass"; places.emplace_back(platform::CPUPlace()); platform::DeviceContextPool::Init(places); - VLOG(3) << "init pass"; #ifndef PADDLE_WITH_MKLDNN platform::SetNumThreads(FLAGS_paddle_num_threads); #endif