提交 341486d5 编写于 作者: H hedaoyuan 提交者: GitHub

Merge pull request #107 from qingqing01/cudnn_conv

fix cudnn conv bug which occurs in image classfication demo in GTX GPU
...@@ -85,6 +85,7 @@ bool CudnnConvLayer::init(const LayerMap &layerMap, ...@@ -85,6 +85,7 @@ bool CudnnConvLayer::init(const LayerMap &layerMap,
biasOffset_ = numFilters_ / groups_[0]; biasOffset_ = numFilters_ / groups_[0];
} }
batchNum_ = 0;
isSelectAlgo_ = false; isSelectAlgo_ = false;
return true; return true;
} }
...@@ -132,6 +133,11 @@ void CudnnConvLayer::reshape(int batchSize) { ...@@ -132,6 +133,11 @@ void CudnnConvLayer::reshape(int batchSize) {
getOutput().setFrameHeight(outputH_); getOutput().setFrameHeight(outputH_);
getOutput().setFrameWidth(outputW_); getOutput().setFrameWidth(outputW_);
// if the batchSize remains the same, set isSelectAlgo_ true.
// Otherwise, set isSelectAlgo_ false and select algo again.
isSelectAlgo_ = (batchSize == batchNum_);
batchNum_ = batchSize;
size_t maxWorkSpace = 0; size_t maxWorkSpace = 0;
for (size_t i = 0; i < inputLayers_.size(); i++) { for (size_t i = 0; i < inputLayers_.size(); i++) {
CHECK_EQ(inputLayers_[i]->getOutput().value->getWidth(), CHECK_EQ(inputLayers_[i]->getOutput().value->getWidth(),
...@@ -160,6 +166,10 @@ void CudnnConvLayer::reshape(int batchSize) { ...@@ -160,6 +166,10 @@ void CudnnConvLayer::reshape(int batchSize) {
maxWorkSpace = std::max(fwdLimitBytes_[i], bwdDataLimitBytes_[i]); maxWorkSpace = std::max(fwdLimitBytes_[i], bwdDataLimitBytes_[i]);
maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_[i]); maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_[i]);
VLOG(3) << getName() << " Fwd / BwdData / BwdFilter algo: " << fwdAlgo_[i]
<< " / " << bwdDataAlgo_[i]
<< " / " << bwdFilterAlgo_[i];
} }
} }
......
...@@ -87,6 +87,10 @@ protected: ...@@ -87,6 +87,10 @@ protected:
/// Is or not select conv algorihtm. /// Is or not select conv algorihtm.
bool isSelectAlgo_; bool isSelectAlgo_;
/// batchNum is used to record batch size. If the batch size is changed,
/// the selection algorithm will be called.
int batchNum_;
public: public:
explicit CudnnConvLayer(const LayerConfig& config) : ConvBaseLayer(config) {} explicit CudnnConvLayer(const LayerConfig& config) : ConvBaseLayer(config) {}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册