diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index 30f567eaf8248a8fba1b461a2bdbf2aab13f9e08..d201fac65e0459050304195140e1aae081468f43 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -57,11 +57,14 @@ bool MKLDNNFcLayer::init(const LayerMap& layerMap, } void MKLDNNFcLayer::convertWeightsFromPaddle() { - if (FLAGS_use_mkldnn_wgt) { + if (hasInitedWgt_) { return; } - if (hasInitedWgt_) { + // TODO(TJ): dst format should get from wgtVal_ + int dstFmt = PARAM_FORMAT_MKLDNN_OI; + int srcFmt = weight_->getParameterPtr()->getHeaderFormat(); + if (srcFmt == dstFmt) { return; } @@ -78,6 +81,7 @@ void MKLDNNFcLayer::convertWeightsFromPaddle() { MatrixPtr paddleWgtT; paddleWgt->transpose(paddleWgtT, true); weight_->getW()->copyFrom(*paddleWgtT); + weight_->getParameterPtr()->setHeaderFormat(dstFmt); hasInitedWgt_ = true; } diff --git a/paddle/gserver/tests/MKLDNNTester.cpp b/paddle/gserver/tests/MKLDNNTester.cpp index 99c8c4948c9b05ad15d1217ebb70026bbd48453f..de1635be2af37cd0ba49010199a417090865b0e4 100644 --- a/paddle/gserver/tests/MKLDNNTester.cpp +++ b/paddle/gserver/tests/MKLDNNTester.cpp @@ -330,9 +330,7 @@ void MKLDNNTester::run(const TestConfig& dnn, log_ = log; lvl_ = level; - // Firstly test FLAGS_use_mkldnn_wgt = false - FLAGS_use_mkldnn_wgt = false; - // reset and run once + // Firstly test mkldnn init from PARAM_FORMAT_ORIGINAL weight reset(dnn, ref, batchSize); randomWgtDatas(); clearWgtDiffs(); @@ -342,17 +340,32 @@ void MKLDNNTester::run(const TestConfig& dnn, runOnce(); } - // Then test FLAGS_use_mkldnn_wgt = true - FLAGS_use_mkldnn_wgt = true; - // after run once the mkldnn weight has been stored in dnnlayer + if (parameters_[DNN].empty()) { + // has no paramters + return; + } + + // After run some iterations, the mkldnn weight has been stored in dnnLayer + // and we can also get the mkldnn weight parameter header format. + // Weight parameter should always be index 0 (and bias index 1). + // TODO(TJ): should also consider mean and var format when batchnorm ready + int dnnWgtFmt = parameters_[DNN][0]->getHeaderFormat(); + int refWgtFmt = parameters_[REF][0]->getHeaderFormat(); + if (dnnWgtFmt == refWgtFmt) { + // weight format are equal, so no need check more + return; + } + // then save the weights and restart again vector dnnWgts, refWgts; CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); saveWgt(parameters_[DNN], dnnWgts); saveWgt(parameters_[REF], refWgts); - // restart again with flag true + // restart again with dnn weight format reset(dnn, ref, batchSize); + // TODO(TJ): should also considerate mean and var format when batchnorm ready + parameters_[DNN][0]->setHeaderFormat(dnnWgtFmt); // restore wgt restoreWgt(dnnWgts, parameters_[DNN]); diff --git a/paddle/gserver/tests/MKLDNNTester.h b/paddle/gserver/tests/MKLDNNTester.h index 522eeaf24b1949abac057a1e59e9977610be23c0..e55e4493ffdfe45b8cfdee423febd1878b8b3d8a 100644 --- a/paddle/gserver/tests/MKLDNNTester.h +++ b/paddle/gserver/tests/MKLDNNTester.h @@ -108,7 +108,7 @@ private: * if many(>failRate) wrong(abs(dnn-ref)/abs(ref)>thres) points return the * max(diff/ref) * else return sum(abs(a-b)) / sum(abs(b)) - * The return value should smaller than eps when passing. + * The return value should be smaller than eps when passing. */ double getDelta(const real* d1, const real* d2, diff --git a/paddle/parameter/Parameter.cpp b/paddle/parameter/Parameter.cpp index ebe36d49376882fe4c1013e19dcf71f452b3e501..f0311095012d944768d80abe423d4a9bfc0e97f5 100644 --- a/paddle/parameter/Parameter.cpp +++ b/paddle/parameter/Parameter.cpp @@ -48,7 +48,8 @@ Parameter::Parameter(const ParameterConfig& config, bool useGpu, bool doInit) deviceId_(-1), sharedCount_(0), updateCounter_(0), - updated_(false) { + updated_(false), + headerFormat_(PARAM_FORMAT_ORIGINAL) { setID(-1); /* capture uninitialized id */ if (useGpu_ && FLAGS_parallel_nn) { /* gpu environment is specified by device property */ @@ -285,7 +286,7 @@ bool Parameter::save(const std::string& filename) const { bool Parameter::save(std::ostream& s) const { CpuVector vec(*bufs_[PARAMETER_VALUE].get()); Header header; - header.version = kFormatVersion; + header.format = headerFormat_; header.valueSize = sizeof(real); header.size = getSize(); @@ -344,8 +345,9 @@ bool Parameter::load(std::istream& s) { Header header; CHECK(s.read(reinterpret_cast(&header), sizeof(header))) << "Fail to read parameter " << getName(); - CHECK_EQ(header.version, kFormatVersion) << "Incorrect format version: " - << header.version; + CHECK(isHeaderFormatSupported(header.format)) << "Incorrect format version: " + << header.format; + headerFormat_ = header.format; CHECK_EQ(header.size, getSize()) << "The size (" << header.size << ") in the file does not match the size " << "(" << getSize() << ") of the parameter: " << getName(); diff --git a/paddle/parameter/Parameter.h b/paddle/parameter/Parameter.h index 0bac76f068ec22bec52766b43e331fe109a34188..e31cbc3dee6c57851c241e117dbbd9b701db9d2c 100644 --- a/paddle/parameter/Parameter.h +++ b/paddle/parameter/Parameter.h @@ -34,6 +34,20 @@ limitations under the License. */ namespace paddle { +typedef enum { + /// The paddle original basic format + PARAM_FORMAT_ORIGINAL = 0, + + /// See mkldnn_memory_format_t in + /// https://github.com/01org/mkl-dnn/blob/master/include/mkldnn_types.h + /// for a detailed description. + /// 2D weights tensor in the format (output channels, input channels). + PARAM_FORMAT_MKLDNN_OI, + + /// The total format items numbers + PARAM_FORMAT_ITEMS, +} PARAM_FORMAT; + class SparsePrefetchRowCpuMatrix; class Parameter; @@ -242,14 +256,30 @@ public: /// Initialize the value to 0 void zeroMem(); - static const int kFormatVersion = 0; /// file header structure struct Header { - int32_t version; // = 0, file format version + int32_t format; // = PARAM_FORMAT uint32_t valueSize; // = sizeof(real) uint64_t size; // = getSize() }; + /** + * @brief Is the header format supported. + */ + static bool isHeaderFormatSupported(int32_t fmt) { + return fmt < PARAM_FORMAT_ITEMS; + } + + /** + * @brief Get the format in header. + */ + int getHeaderFormat() { return headerFormat_; } + + /** + * @brief Set the format in header. + */ + void setHeaderFormat(int32_t fmt) { headerFormat_ = fmt; } + /** * @brief Parameter Update Hook. * @@ -321,6 +351,9 @@ protected: bool updated_; SparseFormat format_; + /// The header format for saving or loading param + int32_t headerFormat_; + std::vector> updaterHooks_; public: diff --git a/paddle/pserver/ParameterServer2.cpp b/paddle/pserver/ParameterServer2.cpp index d7c1d4f788f44c6bfcec040ba24bdc454348c911..54f5c4c0fb4994871edc7a1e52237c9f903ce63b 100644 --- a/paddle/pserver/ParameterServer2.cpp +++ b/paddle/pserver/ParameterServer2.cpp @@ -1032,8 +1032,8 @@ void ParameterServer2::loadValueVector(const LoadValueRequest& request, Parameter::Header header; CHECK(fs.read(reinterpret_cast(&header), sizeof(header))) << "Fail to read parameters in pserver"; - CHECK_EQ(header.version, Parameter::kFormatVersion) - << "Incorrect format version: " << header.version; + CHECK(Parameter::isHeaderFormatSupported(header.format)) + << "Incorrect format version: " << header.format; CHECK_EQ(header.size, (size_t)size_) << "The size (" << header.size << ") in the file does not match the size " << "(" << size_ << ") of the pserver: " << serverId_; @@ -1063,7 +1063,8 @@ void ParameterServer2::saveValueVector(const SaveValueRequest& request, CpuVector& vec = vectors_[PARAMETER_APPLY] ? *vectors_[PARAMETER_APPLY] : *vectors_[PARAMETER_VALUE]; Parameter::Header header; - header.version = Parameter::kFormatVersion; + // TODO(TJ): save param headerFormat_ + header.format = PARAM_FORMAT_ORIGINAL; header.valueSize = sizeof(real); header.size = size_; diff --git a/paddle/trainer/TrainerConfigHelper.cpp b/paddle/trainer/TrainerConfigHelper.cpp index eba40862b926cfe863c569e73a6a3ceabcf1f3b4..a0a365aa0bb0ac26939a02c1cd626d0c17c6a9fe 100644 --- a/paddle/trainer/TrainerConfigHelper.cpp +++ b/paddle/trainer/TrainerConfigHelper.cpp @@ -29,7 +29,6 @@ DECLARE_bool(with_gpu); DECLARE_bool(parallel_nn); DECLARE_string(config_args); DECLARE_bool(use_mkldnn); -DECLARE_bool(use_mkldnn_wgt); const char *kConfigParserModuleName = "paddle.trainer.config_parser"; const char *kConfigParserFuncName = "parse_config_and_serialize"; @@ -47,7 +46,6 @@ TrainerConfigHelper::TrainerConfigHelper(const std::string &configFilePath) << ",with_cost=" << FLAGS_with_cost << ",use_gpu=" << FLAGS_use_gpu << ",parallel_nn=" << FLAGS_parallel_nn << ",use_mkldnn=" << FLAGS_use_mkldnn - << ",use_mkldnn_wgt=" << FLAGS_use_mkldnn_wgt << ",cudnn_version=" << hl_get_cudnn_lib_version(); if (!FLAGS_config_args.empty()) { configArgs << "," << FLAGS_config_args; diff --git a/paddle/utils/Flags.cpp b/paddle/utils/Flags.cpp index 600c83a8487191895de635dd8433f6c44e86ce77..ab1c181c62cdbee8cc5f804ec9aaf63ac5464ad6 100644 --- a/paddle/utils/Flags.cpp +++ b/paddle/utils/Flags.cpp @@ -27,7 +27,6 @@ DEFINE_bool(use_mkldnn, false, "Default still keep use CPU training"); DEFINE_bool(use_mkldnn, false, "Only support CPU training"); #endif -DEFINE_bool(use_mkldnn_wgt, false, "Init weight from CPU weight"); DEFINE_bool(parallel_nn, false, "Whether to use multi-threads to calculate one neural network." diff --git a/paddle/utils/Flags.h b/paddle/utils/Flags.h index 0aca4c0ee036ee8490c0ceca7279df876dc21947..1832bb515ec85df3d7733e01b063a01ad6a3b282 100644 --- a/paddle/utils/Flags.h +++ b/paddle/utils/Flags.h @@ -41,4 +41,3 @@ DECLARE_string(predict_file); DECLARE_bool(prev_batch_state); DECLARE_string(init_model_path); DECLARE_bool(use_mkldnn); -DECLARE_bool(use_mkldnn_wgt);