提交 f27fd9dc 编写于 作者: L liaogang

follow comments

上级 8cde2d11
...@@ -44,7 +44,6 @@ if(MKL_INC_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64) ...@@ -44,7 +44,6 @@ if(MKL_INC_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON) set(CBLAS_FOUND ON)
if(${MKL_LAPACK_INC_DIR}) if(${MKL_LAPACK_INC_DIR})
add_definitions(-DPADDLE_USE_LAPACK)
message(STATUS "Found lapack in MKL (include: ${MKL_LAPACK_INC_DIR})") message(STATUS "Found lapack in MKL (include: ${MKL_LAPACK_INC_DIR})")
endif() endif()
return() # return file. return() # return file.
...@@ -80,7 +79,6 @@ if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB AND NOT CBLAS_FOUND) ...@@ -80,7 +79,6 @@ if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB AND NOT CBLAS_FOUND)
message(STATUS "Found ATLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") message(STATUS "Found ATLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON) set(CBLAS_FOUND ON)
if(ATLAS_CLAPACK_INC_DIR) if(ATLAS_CLAPACK_INC_DIR)
add_definitions(-DPADDLE_USE_LAPACK)
message(STATUS "Found lapack in ATLAS (include: ${ATLAS_CLAPACK_INC_DIR})") message(STATUS "Found lapack in ATLAS (include: ${ATLAS_CLAPACK_INC_DIR})")
endif() endif()
return() return()
...@@ -114,7 +112,6 @@ if(OPENBLAS_INC_DIR AND OPENBLAS_LIB) ...@@ -114,7 +112,6 @@ if(OPENBLAS_INC_DIR AND OPENBLAS_LIB)
message(STATUS "Found OpenBLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") message(STATUS "Found OpenBLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON) set(CBLAS_FOUND ON)
if(OPENBLAS_LAPACKE_INC_DIR) if(OPENBLAS_LAPACKE_INC_DIR)
add_definitions(-DPADDLE_USE_LAPACK)
message(STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})") message(STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})")
endif() endif()
return() return()
......
...@@ -27,8 +27,6 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -27,8 +27,6 @@ IF(NOT ${CBLAS_FOUND})
SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/libopenblas.a" CACHE FILEPATH "openblas library" FORCE) SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/libopenblas.a" CACHE FILEPATH "openblas library" FORCE)
ENDIF(WIN32) ENDIF(WIN32)
ADD_DEFINITIONS(-DPADDLE_USE_LAPACK)
ExternalProject_Add( ExternalProject_Add(
openblas openblas
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
......
...@@ -40,18 +40,18 @@ public: ...@@ -40,18 +40,18 @@ public:
namespace gpu { namespace gpu {
static __device__ Active<real>::forward forward[] = HPPL_ACTIVE_FUNCTION; static __device__ Active<real>::forward forward[] = HPPL_ACTIVE_FUNCTION;
static __device__ Active<real>::backward backward[] = HPPL_ACTIVE_FUNCTION; static __device__ Active<real>::backward backward[] = HPPL_ACTIVE_FUNCTION;
} } // namespace gpu
#else #else
namespace cpu { namespace cpu {
static Active<real>::forward forward[] = HPPL_ACTIVE_FUNCTION; static Active<real>::forward forward[] = HPPL_ACTIVE_FUNCTION;
static Active<real>::backward backward[] = HPPL_ACTIVE_FUNCTION; static Active<real>::backward backward[] = HPPL_ACTIVE_FUNCTION;
} } // namespace cpu
#ifdef __AVX__ #ifdef __AVX__
namespace avx { namespace avx {
static Active<__m256>::forward forward[] = HPPL_ACTIVE_FUNCTION; static Active<__m256>::forward forward[] = HPPL_ACTIVE_FUNCTION;
static Active<__m256>::backward backward[] = HPPL_ACTIVE_FUNCTION; static Active<__m256>::backward backward[] = HPPL_ACTIVE_FUNCTION;
} } // namespace avx
#endif #endif
#endif #endif
......
...@@ -273,23 +273,23 @@ extern void hl_bilinear_forward(const real* inData, ...@@ -273,23 +273,23 @@ extern void hl_bilinear_forward(const real* inData,
const real ratioW); const real ratioW);
/** /**
* @brief Bilinear interpolation backward. * @brief Bilinear interpolation backward.
* *
* @param[out] inGrad input gradient. * @param[out] inGrad input gradient.
* @param[in] inImgH input image height. * @param[in] inImgH input image height.
* @param[in] inImgW input image width. * @param[in] inImgW input image width.
* @param[in] inputH input batchSize. * @param[in] inputH input batchSize.
* @param[in] inputW input image data dim. * @param[in] inputW input image data dim.
* @param[in] outGrad output gradient. * @param[in] outGrad output gradient.
* @param[in] outImgH output image height. * @param[in] outImgH output image height.
* @param[in] outImgW output image width. * @param[in] outImgW output image width.
* @param[in] outputH output batchSize. * @param[in] outputH output batchSize.
* @param[in] outputW output image data dim. * @param[in] outputW output image data dim.
* @param[in] numChannels number of channels. * @param[in] numChannels number of channels.
* @param[in] ratioH inImgH / outImgH. * @param[in] ratioH inImgH / outImgH.
* @param[in] ratioW inImgW / outImgW. * @param[in] ratioW inImgW / outImgW.
* *
*/ */
extern void hl_bilinear_backward(real* inGrad, extern void hl_bilinear_backward(real* inGrad,
const size_t inImgH, const size_t inImgH,
const size_t inImgW, const size_t inImgW,
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include <sys/time.h> #include <sys/time.h>
#include "hl_cuda.h" #include "hl_cuda.h"
#include "hl_thread.ph" #include "hl_thread.ph"
#include "paddle/utils/DynamicLoad.h" #include "paddle/utils/DynamicLoader.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
namespace dynload { namespace dynload {
......
...@@ -17,7 +17,7 @@ limitations under the License. */ ...@@ -17,7 +17,7 @@ limitations under the License. */
#include <gflags/gflags.h> #include <gflags/gflags.h>
#include "hl_cuda_cudnn.ph" #include "hl_cuda_cudnn.ph"
#include "hl_thread.ph" #include "hl_thread.ph"
#include "paddle/utils/DynamicLoad.h" #include "paddle/utils/DynamicLoader.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
DEFINE_int32(cudnn_conv_workspace_limit_in_mb, DEFINE_int32(cudnn_conv_workspace_limit_in_mb,
......
...@@ -24,7 +24,7 @@ limitations under the License. */ ...@@ -24,7 +24,7 @@ limitations under the License. */
#include "hl_cuda.ph" #include "hl_cuda.ph"
#include "hl_thread.ph" #include "hl_thread.ph"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#include "paddle/utils/DynamicLoad.h" #include "paddle/utils/DynamicLoader.h"
// clang-format on // clang-format on
namespace dynload { namespace dynload {
......
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#include "hl_warpctc_wrap.h" #include "hl_warpctc_wrap.h"
#include <mutex> #include <mutex>
#include "paddle/utils/DynamicLoad.h" #include "paddle/utils/DynamicLoader.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
namespace dynload { namespace dynload {
......
...@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "BufferArg.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "BufferArg.h"
#include "paddle/math/MemoryHandle.h" #include "paddle/math/MemoryHandle.h"
namespace paddle { namespace paddle {
......
...@@ -165,12 +165,12 @@ void CosSimBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad, ...@@ -165,12 +165,12 @@ void CosSimBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad,
real reciprocal_square_sum_x = 1.0f / square_sum_x; real reciprocal_square_sum_x = 1.0f / square_sum_x;
real reciprocal_square_sum_y = 1.0f / square_sum_y; real reciprocal_square_sum_y = 1.0f / square_sum_y;
for (size_t j = 0; j < dim; ++j) { for (size_t j = 0; j < dim; ++j) {
prev_grad_x[j] += prev_grad_x[j] += out[i] * grad[i] *
out[i] * grad[i] * (prev_out_y[j] * reciprocal_xy - (prev_out_y[j] * reciprocal_xy -
prev_out_x[j] * reciprocal_square_sum_x); prev_out_x[j] * reciprocal_square_sum_x);
prev_grad_y[j] += prev_grad_y[j] += out[i] * grad[i] *
out[i] * grad[i] * (prev_out_x[j] * reciprocal_xy - (prev_out_x[j] * reciprocal_xy -
prev_out_y[j] * reciprocal_square_sum_y); prev_out_y[j] * reciprocal_square_sum_y);
} }
} }
} }
......
...@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "Function.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "Function.h"
#include "paddle/math/SparseMatrix.h" #include "paddle/math/SparseMatrix.h"
namespace paddle { namespace paddle {
......
...@@ -74,9 +74,9 @@ TEST(MulOp, DDDMatrixMul) { ...@@ -74,9 +74,9 @@ TEST(MulOp, DDDMatrixMul) {
} }
/** /**
* C += A * B, B, C dense, A sparse * C += A * B, B, C dense, A sparse
* dense = sparse * dense * dense = sparse * dense
*/ */
void testFuncDSparseDMatrix( void testFuncDSparseDMatrix(
size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) { size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
real scaleT = 1.0; real scaleT = 1.0;
...@@ -119,9 +119,9 @@ TEST(MuLOp, DSparseDMul) { ...@@ -119,9 +119,9 @@ TEST(MuLOp, DSparseDMul) {
} }
/** /**
* C += A * B, A, C dense, B sparse * C += A * B, A, C dense, B sparse
* dense = dense * sparse * dense = dense * sparse
*/ */
void testFuncDDSparseMatrix( void testFuncDDSparseMatrix(
size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) { size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
real scaleT = 1.0; real scaleT = 1.0;
...@@ -165,9 +165,9 @@ TEST(MulOp, DDSparseMul) { ...@@ -165,9 +165,9 @@ TEST(MulOp, DDSparseMul) {
} }
/** /**
* C += A * B, A sparse, B, C dense * C += A * B, A sparse, B, C dense
* sparse = dense * dense * sparse = dense * dense
*/ */
void testFuncSparseDDMatrix( void testFuncSparseDDMatrix(
size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) { size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
real scaleT = 1.0; real scaleT = 1.0;
......
...@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "TensorShape.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "TensorShape.h"
namespace paddle { namespace paddle {
......
...@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "TensorType.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "TensorType.h"
namespace paddle { namespace paddle {
......
...@@ -194,8 +194,8 @@ void PyDataProvider::fillSlotsByStr(const std::string& samples) { ...@@ -194,8 +194,8 @@ void PyDataProvider::fillSlotsByStr(const std::string& samples) {
auto& slot = slots_[j]; auto& slot = slots_[j];
CHECK(SlotDef::INDEX >= slot.type || SlotDef::STRING == slot.type) CHECK(SlotDef::INDEX >= slot.type || SlotDef::STRING == slot.type)
<< " Slot type:" << slot.type << " is out of range."; << " Slot type:" << slot.type << " is out of range.";
CHECK_GE(slot.type, SlotDef::VECTOR_DENSE) << " Slot type:" << slot.type CHECK_GE(slot.type, SlotDef::VECTOR_DENSE)
<< " is out of range."; << " Slot type:" << slot.type << " is out of range.";
switch (slot.type) { switch (slot.type) {
case SlotDef::VECTOR_DENSE: case SlotDef::VECTOR_DENSE:
fillDenseSlot(slot, data, dataEnd); fillDenseSlot(slot, data, dataEnd);
......
...@@ -446,9 +446,9 @@ real AucEvaluator::evalImp(std::vector<Argument>& arguments) { ...@@ -446,9 +446,9 @@ real AucEvaluator::evalImp(std::vector<Argument>& arguments) {
for (size_t i = 0; i < insNum; ++i) { for (size_t i = 0; i < insNum; ++i) {
real value = outputD[pos]; real value = outputD[pos];
uint32_t binIdx = static_cast<uint32_t>(value * kBinNum_); uint32_t binIdx = static_cast<uint32_t>(value * kBinNum_);
CHECK(binIdx <= kBinNum_) << "bin index [" << binIdx CHECK(binIdx <= kBinNum_)
<< "] out of range, predict value[" << value << "bin index [" << binIdx << "] out of range, predict value[" << value
<< "]"; << "]";
real w = supportWeight ? weightD[i] : 1.0; real w = supportWeight ? weightD[i] : 1.0;
if (labelD[i] == kNegativeLabel_) { if (labelD[i] == kNegativeLabel_) {
statNeg_[binIdx] += w; statNeg_[binIdx] += w;
......
...@@ -21,7 +21,6 @@ limitations under the License. */ ...@@ -21,7 +21,6 @@ limitations under the License. */
#include "MultiGradientMachine.h" #include "MultiGradientMachine.h"
#include "MultiNetwork.h" #include "MultiNetwork.h"
#include "NeuralNetwork.h" #include "NeuralNetwork.h"
#include "NeuralNetwork.h"
#include "ParallelNeuralNetwork.h" #include "ParallelNeuralNetwork.h"
#include "hl_gpu.h" #include "hl_gpu.h"
......
...@@ -637,7 +637,7 @@ void RecurrentGradientMachine::removeBeamSearchStatisticsCallbacks() { ...@@ -637,7 +637,7 @@ void RecurrentGradientMachine::removeBeamSearchStatisticsCallbacks() {
/* create scattered id infomation for all realLayer of inFrameLines one time. /* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation * If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time. * for all realLayer of inFrameLines one time.
*/ */
void RecurrentGradientMachine::createInFrameInfo(int inlinkId, void RecurrentGradientMachine::createInFrameInfo(int inlinkId,
const Argument& input, const Argument& input,
......
...@@ -107,18 +107,18 @@ public: ...@@ -107,18 +107,18 @@ public:
DropCallback; DropCallback;
/** /**
* @brief NormOrDropNodeCallback * @brief NormOrDropNodeCallback
* *
* Normalize a path's probabilities or just drop it by modifying path.logProb * Normalize a path's probabilities or just drop it by modifying path.logProb
* *
* The first parameter is sequence index in a batch * The first parameter is sequence index in a batch
* *
* The second parameter is path.ids * The second parameter is path.ids
* *
* The third parameter is probabilites for each node in this path. * The third parameter is probabilites for each node in this path.
* *
* The fourth parameter is the probability of the whole path. * The fourth parameter is the probability of the whole path.
*/ */
typedef std::function<void( typedef std::function<void(
int seqId, const std::vector<int>&, std::vector<real>&, real*)> int seqId, const std::vector<int>&, std::vector<real>&, real*)>
NormOrDropNodeCallback; NormOrDropNodeCallback;
...@@ -348,9 +348,9 @@ protected: ...@@ -348,9 +348,9 @@ protected:
int targetInfoInlinkId_; int targetInfoInlinkId_;
/* create scattered id infomation for all realLayer of inFrameLines one time. /* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation * If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time. * for all realLayer of inFrameLines one time.
*/ */
void createInFrameInfo(int inlinks_id, void createInFrameInfo(int inlinks_id,
const Argument& input, const Argument& input,
PassType passType); PassType passType);
......
...@@ -263,8 +263,9 @@ void Layer::zeroGrad() { ...@@ -263,8 +263,9 @@ void Layer::zeroGrad() {
} }
void Layer::initNeedFlags() { void Layer::initNeedFlags() {
auto initFlag = [this]( auto initFlag = [this](bool& flag,
bool& flag, bool (Layer::*flagQueryFunc)() const, ParameterType type) { bool (Layer::*flagQueryFunc)() const,
ParameterType type) {
flag = false; flag = false;
if (biasParameter_ && biasParameter_->hasType(type)) { if (biasParameter_ && biasParameter_->hasType(type)) {
flag = true; flag = true;
......
...@@ -106,9 +106,9 @@ protected: ...@@ -106,9 +106,9 @@ protected:
public: public:
/** /**
* Wait until all input value ready. * Wait until all input value ready.
* Called before Layer::forward() function. * Called before Layer::forward() function.
*/ */
virtual void waitInputValue(); virtual void waitInputValue();
/** /**
...@@ -118,9 +118,9 @@ public: ...@@ -118,9 +118,9 @@ public:
virtual void copyOutputToOtherDevice(); virtual void copyOutputToOtherDevice();
/** /**
* Wait until all output grad ready and merge them to output_.grad. * Wait until all output grad ready and merge them to output_.grad.
* Called before Layer::backward() function. * Called before Layer::backward() function.
*/ */
virtual void waitAndMergeOutputGrad(); virtual void waitAndMergeOutputGrad();
/** /**
......
...@@ -29,7 +29,7 @@ namespace paddle { ...@@ -29,7 +29,7 @@ namespace paddle {
* *
* The config file api is rotate_layer * The config file api is rotate_layer
* *
*/ */
class RotateLayer : public Layer { class RotateLayer : public Layer {
public: public:
......
...@@ -60,7 +60,7 @@ void SequencePoolLayer::forward(PassType passType) { ...@@ -60,7 +60,7 @@ void SequencePoolLayer::forward(PassType passType) {
* thus, in this case, output_ has no sequenceStartPositions. * thus, in this case, output_ has no sequenceStartPositions.
* If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this * If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this
* case, we should compute the new sequenceStartPositions. * case, we should compute the new sequenceStartPositions.
*/ */
if (type_) { if (type_) {
CHECK(input.subSequenceStartPositions) CHECK(input.subSequenceStartPositions)
<< "when trans_type = seq, input must hasSubseq"; << "when trans_type = seq, input must hasSubseq";
......
...@@ -292,26 +292,27 @@ void checkRecurrentLayer(LayerConfig layerConfig, ...@@ -292,26 +292,27 @@ void checkRecurrentLayer(LayerConfig layerConfig,
TestRecurrentLayer<T> testGpu(layerConfig, true, gpuBatch); TestRecurrentLayer<T> testGpu(layerConfig, true, gpuBatch);
testCpu.init(batchSize); testCpu.init(batchSize);
testGpu.init(batchSize); testGpu.init(batchSize);
auto checkError = []( auto checkError =
MatrixPtr cpu, MatrixPtr gpu, int numSequences, const char* str) { [](MatrixPtr cpu, MatrixPtr gpu, int numSequences, const char* str) {
CpuMatrix check(gpu->getHeight(), gpu->getWidth()); CpuMatrix check(gpu->getHeight(), gpu->getWidth());
check.copyFrom(*gpu); check.copyFrom(*gpu);
int height = cpu->getHeight(); int height = cpu->getHeight();
int width = cpu->getWidth(); int width = cpu->getWidth();
const real* data1 = cpu->getData(); const real* data1 = cpu->getData();
const real* data2 = check.getData(); const real* data2 = check.getData();
int count = 0; int count = 0;
for (int i = 0; i < height; i++) { for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) { for (int j = 0; j < width; j++) {
if (fabs(data1[i * width + j] - data2[i * width + j]) / numSequences > if (fabs(data1[i * width + j] - data2[i * width + j]) /
1e-4) { numSequences >
count++; 1e-4) {
count++;
}
}
} }
} EXPECT_EQ(count, 0) << "[" << str << "]"
} << "There are " << count << " different element.";
EXPECT_EQ(count, 0) << "[" << str << "]" };
<< "There are " << count << " different element.";
};
T* cpuLayer = dynamic_cast<T*>(testCpu.testLayer_.get()); T* cpuLayer = dynamic_cast<T*>(testCpu.testLayer_.get());
T* gpuLayer = dynamic_cast<T*>(testGpu.testLayer_.get()); T* gpuLayer = dynamic_cast<T*>(testGpu.testLayer_.get());
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include "MathFunctions.h" #include "MathFunctions.h"
#include "hl_matrix_apply.cuh" #include "hl_matrix_apply.cuh"
#include "hl_matrix_ops.cuh" #include "hl_matrix_ops.cuh"
#include "paddle/utils/DynamicLoad.h" #include "paddle/utils/DynamicLoader.h"
namespace dynload { namespace dynload {
...@@ -32,7 +32,7 @@ void* lapack_dso_handle = nullptr; ...@@ -32,7 +32,7 @@ void* lapack_dso_handle = nullptr;
#define DYNAMIC_LOAD_LAPACK_WRAP(__name) \ #define DYNAMIC_LOAD_LAPACK_WRAP(__name) \
struct DynLoad__##__name { \ struct DynLoad__##__name { \
template <typename... Args> \ template <typename... Args> \
auto operator()(Args... args)->decltype(__name(args...)) { \ auto operator()(Args... args) -> decltype(__name(args...)) { \
using lapack_func = decltype(__name(args...)) (*)(Args...); \ using lapack_func = decltype(__name(args...)) (*)(Args...); \
std::call_once(lapack_dso_flag, GetLapackDsoHandle, &lapack_dso_handle); \ std::call_once(lapack_dso_flag, GetLapackDsoHandle, &lapack_dso_handle); \
void* p_##__name = dlsym(lapack_dso_handle, #__name); \ void* p_##__name = dlsym(lapack_dso_handle, #__name); \
...@@ -41,24 +41,27 @@ void* lapack_dso_handle = nullptr; ...@@ -41,24 +41,27 @@ void* lapack_dso_handle = nullptr;
} __name; // struct DynLoad__##__name } __name; // struct DynLoad__##__name
// clang-format off // clang-format off
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS #ifdef PADDLE_USE_ATLAS
#define LAPACK_ROUTINE_EACH(__macro) \ #define PADDLE_SGETRF clapack_sgetrf
__macro(clapack_sgetrf) \ #define PADDLE_DGETRF clapack_dgetrf
__macro(clapack_dgetrf) \ #define PADDLE_SGETRI clapack_sgetri
__macro(clapack_sgetri) \ #define PADDLE_DGETRI clapack_dgetri
__macro(clapack_dgetri)
#else #else
#define LAPACK_ROUTINE_EACH(__macro) \ #define PADDLE_SGETRF LAPACKE_sgetrf
__macro(LAPACKE_sgetrf) \ #define PADDLE_DGETRF LAPACKE_dgetrf
__macro(LAPACKE_dgetrf) \ #define PADDLE_SGETRI LAPACKE_sgetri
__macro(LAPACKE_sgetri) \ #define PADDLE_DGETRI LAPACKE_dgetri
__macro(LAPACKE_dgetri) #endif
#endif
#define LAPACK_ROUTINE_EACH(__macro) \
__macro(PADDLE_SGETRF) \
__macro(PADDLE_DGETRF) \
__macro(PADDLE_SGETRI) \
__macro(PADDLE_DGETRI)
// clang-format on
LAPACK_ROUTINE_EACH(DYNAMIC_LOAD_LAPACK_WRAP) LAPACK_ROUTINE_EACH(DYNAMIC_LOAD_LAPACK_WRAP)
#endif
// clang-format on
} // namespace dynload } // namespace dynload
namespace paddle { namespace paddle {
...@@ -130,16 +133,7 @@ int getrf<float>(const CBLAS_ORDER order, ...@@ -130,16 +133,7 @@ int getrf<float>(const CBLAS_ORDER order,
float* A, float* A,
const int lda, const int lda,
int* ipiv) { int* ipiv) {
#ifdef PADDLE_USE_LAPACK return dynload::PADDLE_SGETRF(order, M, N, A, lda, ipiv);
#ifdef PADDLE_USE_ATLAS
return dynload::clapack_sgetrf(order, M, N, A, lda, ipiv);
#else
return dynload::LAPACKE_sgetrf(order, M, N, A, lda, ipiv);
#endif
#else
LOG(FATAL) << "Not implemented";
#endif
return 0;
} }
template <> template <>
...@@ -149,16 +143,7 @@ int getrf<double>(const CBLAS_ORDER order, ...@@ -149,16 +143,7 @@ int getrf<double>(const CBLAS_ORDER order,
double* A, double* A,
const int lda, const int lda,
int* ipiv) { int* ipiv) {
#ifdef PADDLE_USE_LAPACK return dynload::PADDLE_DGETRF(order, M, N, A, lda, ipiv);
#ifdef PADDLE_USE_ATLAS
return dynload::clapack_dgetrf(order, M, N, A, lda, ipiv);
#else
return dynload::LAPACKE_dgetrf(order, M, N, A, lda, ipiv);
#endif
#else
LOG(FATAL) << "Not implemented";
#endif
return 0;
} }
template <> template <>
...@@ -167,16 +152,7 @@ int getri<float>(const CBLAS_ORDER order, ...@@ -167,16 +152,7 @@ int getri<float>(const CBLAS_ORDER order,
float* A, float* A,
const int lda, const int lda,
const int* ipiv) { const int* ipiv) {
#ifdef PADDLE_USE_LAPACK return dynload::PADDLE_SGETRI(order, N, A, lda, ipiv);
#ifdef PADDLE_USE_ATLAS
return dynload::clapack_sgetri(order, N, A, lda, ipiv);
#else
return dynload::LAPACKE_sgetri(order, N, A, lda, ipiv);
#endif
#else
LOG(FATAL) << "Not implemented";
#endif
return 0;
} }
template <> template <>
...@@ -185,15 +161,7 @@ int getri<double>(const CBLAS_ORDER order, ...@@ -185,15 +161,7 @@ int getri<double>(const CBLAS_ORDER order,
double* A, double* A,
const int lda, const int lda,
const int* ipiv) { const int* ipiv) {
#ifdef PADDLE_USE_LAPACK return dynload::PADDLE_DGETRI(order, N, A, lda, ipiv);
#ifdef PADDLE_USE_ATLAS
return dynload::clapack_dgetri(order, N, A, lda, ipiv);
#else
return dynload::LAPACKE_dgetri(order, N, A, lda, ipiv);
#endif
#else
LOG(FATAL) << "Not implemented";
#endif
return 0; return 0;
} }
......
...@@ -17,14 +17,11 @@ limitations under the License. */ ...@@ -17,14 +17,11 @@ limitations under the License. */
#ifdef PADDLE_USE_MKL #ifdef PADDLE_USE_MKL
#include <mkl.h> #include <mkl.h>
#ifdef PADDLE_USE_LAPACK
#include <mkl_lapacke.h> #include <mkl_lapacke.h>
#endif
#else #else
extern "C" { extern "C" {
#include <cblas.h> #include <cblas.h>
} }
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS #ifdef PADDLE_USE_ATLAS
extern "C" { extern "C" {
#include <clapack.h> #include <clapack.h>
...@@ -33,7 +30,6 @@ extern "C" { ...@@ -33,7 +30,6 @@ extern "C" {
#include <lapacke.h> #include <lapacke.h>
#endif #endif
#endif #endif
#endif
#include <cmath> #include <cmath>
......
...@@ -174,8 +174,10 @@ void CpuMatrix::mulByBitCode(size_t numClasses, ...@@ -174,8 +174,10 @@ void CpuMatrix::mulByBitCode(size_t numClasses,
const IVector& codes, const IVector& codes,
const Matrix& weight, const Matrix& weight,
const Matrix& input) { const Matrix& input) {
auto op = []( auto op = [](real& t,
real& t, const real* weightRow, const real* inputRow, size_t inputDim) { const real* weightRow,
const real* inputRow,
size_t inputDim) {
real sum = 0; real sum = 0;
for (size_t k = 0; k < inputDim; ++k) { for (size_t k = 0; k < inputDim; ++k) {
sum += weightRow[k] * inputRow[k]; sum += weightRow[k] * inputRow[k];
...@@ -193,12 +195,12 @@ void CpuMatrix::mulByBitCodeBackwardWeight(size_t numClasses, ...@@ -193,12 +195,12 @@ void CpuMatrix::mulByBitCodeBackwardWeight(size_t numClasses,
const IVector& codes, const IVector& codes,
Matrix& weight, Matrix& weight,
const Matrix& input) { const Matrix& input) {
auto op = []( auto op =
const real t, real* weightRow, const real* inputRow, size_t inputDim) { [](const real t, real* weightRow, const real* inputRow, size_t inputDim) {
for (size_t k = 0; k < inputDim; ++k) { for (size_t k = 0; k < inputDim; ++k) {
weightRow[k] += t * inputRow[k]; weightRow[k] += t * inputRow[k];
} }
}; };
mulByBitCodeT(op, SimpleCodeTable(numClasses), codes, *this, weight, input); mulByBitCodeT(op, SimpleCodeTable(numClasses), codes, *this, weight, input);
} }
...@@ -210,12 +212,12 @@ void CpuMatrix::mulByBitCodeBackwardError(size_t numClasses, ...@@ -210,12 +212,12 @@ void CpuMatrix::mulByBitCodeBackwardError(size_t numClasses,
const IVector& codes, const IVector& codes,
const Matrix& weight, const Matrix& weight,
Matrix& input) { Matrix& input) {
auto op = []( auto op =
const real t, const real* weightRow, real* inputRow, size_t inputDim) { [](const real t, const real* weightRow, real* inputRow, size_t inputDim) {
for (size_t k = 0; k < inputDim; ++k) { for (size_t k = 0; k < inputDim; ++k) {
inputRow[k] += t * weightRow[k]; inputRow[k] += t * weightRow[k];
} }
}; };
mulByBitCodeT(op, SimpleCodeTable(numClasses), codes, *this, weight, input); mulByBitCodeT(op, SimpleCodeTable(numClasses), codes, *this, weight, input);
} }
......
...@@ -183,8 +183,8 @@ void TensorCheck(AssertEq compare, ...@@ -183,8 +183,8 @@ void TensorCheck(AssertEq compare,
template <typename AssertEq> template <typename AssertEq>
void TensorCheck(AssertEq compare, real args1, real args2) { void TensorCheck(AssertEq compare, real args1, real args2) {
EXPECT_EQ(compare(args1, args2), true) << "[Test error] args1 = " << args1 EXPECT_EQ(compare(args1, args2), true)
<< ", args2 = " << args2; << "[Test error] args1 = " << args1 << ", args2 = " << args2;
} }
template <typename AssertEq> template <typename AssertEq>
......
...@@ -37,7 +37,7 @@ limitations under the License. */ ...@@ -37,7 +37,7 @@ limitations under the License. */
* *
* AutoCompare test; * AutoCompare test;
* test.cmpWithoutArg<I...>(function, height, width) * test.cmpWithoutArg<I...>(function, height, width)
*/ */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "TensorCheck.h" #include "TensorCheck.h"
......
...@@ -126,15 +126,15 @@ TEST(SIMDFunction, decayL1_WithLR) { ...@@ -126,15 +126,15 @@ TEST(SIMDFunction, decayL1_WithLR) {
typedef std::function<void(float*, float*, float*, float, size_t)> typedef std::function<void(float*, float*, float*, float, size_t)>
DecayL1MethodType; DecayL1MethodType;
DecayL1MethodType naive = []( DecayL1MethodType naive =
float* d, float* s, float* lr, float l, size_t len) { [](float* d, float* s, float* lr, float l, size_t len) {
paddle::simd::naive::decayL1<float>(d, s, lr, l, len); paddle::simd::naive::decayL1<float>(d, s, lr, l, len);
}; };
DecayL1MethodType simd = []( DecayL1MethodType simd =
float* d, float* s, float* lr, float l, size_t len) { [](float* d, float* s, float* lr, float l, size_t len) {
paddle::simd::decayL1<float>(d, s, lr, l, len); paddle::simd::decayL1<float>(d, s, lr, l, len);
}; };
naive(dest.get(), src.get(), lr.get(), lambda, VECTOR_LEN); naive(dest.get(), src.get(), lr.get(), lambda, VECTOR_LEN);
simd(simd_dest.get(), src.get(), lr.get(), lambda, VECTOR_LEN); simd(simd_dest.get(), src.get(), lr.get(), lambda, VECTOR_LEN);
......
...@@ -21,6 +21,7 @@ limitations under the License. */ ...@@ -21,6 +21,7 @@ limitations under the License. */
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h" #include "paddle/math/SparseMatrix.h"
#include "paddle/testing/TestUtil.h" #include "paddle/testing/TestUtil.h"
#include "paddle/utils/DynamicLoader.h"
#include "paddle/utils/Stat.h" #include "paddle/utils/Stat.h"
#include "paddle/utils/Util.h" #include "paddle/utils/Util.h"
...@@ -235,10 +236,15 @@ TEST(Matrix, unary) { ...@@ -235,10 +236,15 @@ TEST(Matrix, unary) {
testMatrixTranspose(height, width); testMatrixTranspose(height, width);
testMatrixRotate(height, width); testMatrixRotate(height, width);
} }
// inverse // inverse matrix
#ifdef PADDLE_USE_LAPACK void** dso_handler = nullptr;
testMatrixInverse(height); GetLapackDsoHandle(dso_handler);
#endif if (nullptr == *dso_handler) {
LOG(WARNING) << "Failed to find liblapack.so, please specify its path "
"using LD_LIBRARY_PATH.";
} else {
testMatrixInverse(height);
}
} }
} }
......
...@@ -379,7 +379,7 @@ void Argument::concat(const std::vector<Argument>& args, ...@@ -379,7 +379,7 @@ void Argument::concat(const std::vector<Argument>& args,
} }
auto copyArg = [batchSize, stream]( auto copyArg = [batchSize, stream](
MatrixPtr& dst, MatrixPtr src, int startRow, bool useGpu) { MatrixPtr& dst, MatrixPtr src, int startRow, bool useGpu) {
if (!src) { if (!src) {
dst.reset(); dst.reset();
return; return;
...@@ -395,29 +395,31 @@ void Argument::concat(const std::vector<Argument>& args, ...@@ -395,29 +395,31 @@ void Argument::concat(const std::vector<Argument>& args,
tmpMatrix->copyFrom(*src, stream); tmpMatrix->copyFrom(*src, stream);
}; };
auto copyIds = [batchSize, stream]( auto copyIds =
IVectorPtr& dst, const IVectorPtr& src, int startRow, bool useGpu) { [batchSize, stream](
if (!src) { IVectorPtr& dst, const IVectorPtr& src, int startRow, bool useGpu) {
dst.reset(); if (!src) {
return; dst.reset();
} return;
IVector::resizeOrCreate(dst, batchSize, useGpu); }
dst->subVec(startRow, src->getSize())->copyFrom(*src, stream); IVector::resizeOrCreate(dst, batchSize, useGpu);
}; dst->subVec(startRow, src->getSize())->copyFrom(*src, stream);
};
auto copyStrs = [batchSize, stream](
SVectorPtr& dst, const SVectorPtr& src, int startRow, bool useGpu) { auto copyStrs =
if (!src) { [batchSize, stream](
dst.reset(); SVectorPtr& dst, const SVectorPtr& src, int startRow, bool useGpu) {
return; if (!src) {
} dst.reset();
if (!dst) { return;
dst = std::make_shared<std::vector<std::string>>(batchSize); }
} else { if (!dst) {
dst->resize(batchSize); dst = std::make_shared<std::vector<std::string>>(batchSize);
} } else {
std::copy(src->begin(), src->end(), dst->begin() + startRow); dst->resize(batchSize);
}; }
std::copy(src->begin(), src->end(), dst->begin() + startRow);
};
auto copySequencePos = [](ICpuGpuVectorPtr& dstSeq, auto copySequencePos = [](ICpuGpuVectorPtr& dstSeq,
const ICpuGpuVectorPtr& srcSeq, const ICpuGpuVectorPtr& srcSeq,
......
...@@ -155,8 +155,9 @@ ParameterOptimizer::TraverseCallback AverageOptimizer::restore() { ...@@ -155,8 +155,9 @@ ParameterOptimizer::TraverseCallback AverageOptimizer::restore() {
return nullptr; return nullptr;
} }
return []( return [](const VectorPtr vecs[],
const VectorPtr vecs[], const ParameterConfig& config, size_t sparseId) { const ParameterConfig& config,
size_t sparseId) {
vecs[PARAMETER_VALUE]->copyFrom(*vecs[PARAMETER_GRADIENT]); vecs[PARAMETER_VALUE]->copyFrom(*vecs[PARAMETER_GRADIENT]);
vecs[PARAMETER_GRADIENT]->zeroMem(); vecs[PARAMETER_GRADIENT]->zeroMem();
}; };
......
...@@ -126,7 +126,7 @@ protected: ...@@ -126,7 +126,7 @@ protected:
/* /*
* AdaDelta Optimization. * AdaDelta Optimization.
* http://www.matthewzeiler.com/pubs/googleTR2012/googleTR2012.pdf * http://www.matthewzeiler.com/pubs/googleTR2012/googleTR2012.pdf
*/ */
class AdaDeltaParameterOptimizer : public ParameterOptimizer { class AdaDeltaParameterOptimizer : public ParameterOptimizer {
public: public:
explicit AdaDeltaParameterOptimizer(const OptimizationConfig& optConfig) explicit AdaDeltaParameterOptimizer(const OptimizationConfig& optConfig)
......
...@@ -352,8 +352,8 @@ bool Parameter::load(std::istream& s) { ...@@ -352,8 +352,8 @@ bool Parameter::load(std::istream& s) {
Header header; Header header;
CHECK(s.read(reinterpret_cast<char*>(&header), sizeof(header))) CHECK(s.read(reinterpret_cast<char*>(&header), sizeof(header)))
<< "Fail to read parameter " << getName(); << "Fail to read parameter " << getName();
CHECK_EQ(header.version, kFormatVersion) << "Incorrect format version: " CHECK_EQ(header.version, kFormatVersion)
<< header.version; << "Incorrect format version: " << header.version;
CHECK_EQ(header.size, getSize()) CHECK_EQ(header.size, getSize())
<< "The size (" << header.size << ") in the file does not match the size " << "The size (" << header.size << ") in the file does not match the size "
<< "(" << getSize() << ") of the parameter: " << getName(); << "(" << getSize() << ") of the parameter: " << getName();
......
...@@ -359,8 +359,8 @@ void SocketClient::TcpClient(const std::string &serverAddr, int serverPort) { ...@@ -359,8 +359,8 @@ void SocketClient::TcpClient(const std::string &serverAddr, int serverPort) {
#if defined(__OSX__) || defined(__APPLE__) #if defined(__OSX__) || defined(__APPLE__)
server = getipnodebyname(serverAddr.c_str(), AF_INET, AI_DEFAULT, &errRet); server = getipnodebyname(serverAddr.c_str(), AF_INET, AI_DEFAULT, &errRet);
CHECK_NE(HOST_NOT_FOUND, errRet) << "ERROR, no such host: " << serverAddr CHECK_NE(HOST_NOT_FOUND, errRet)
<< " ret = " << errRet; << "ERROR, no such host: " << serverAddr << " ret = " << errRet;
CHECK(server) << "getipnodebyname error!"; CHECK(server) << "getipnodebyname error!";
#else #else
struct hostent hostinfo; struct hostent hostinfo;
......
...@@ -549,9 +549,9 @@ PServerVector ParameterClient2::createVector() { ...@@ -549,9 +549,9 @@ PServerVector ParameterClient2::createVector() {
if (handle == -1) { if (handle == -1) {
handle = response.handle(); handle = response.handle();
} else { } else {
CHECK_EQ(handle, response.handle()) << "Inconsistent handle from client" CHECK_EQ(handle, response.handle())
<< &response - &responses[0] << " " << "Inconsistent handle from client" << &response - &responses[0]
<< handle << " " << response.handle(); << " " << handle << " " << response.handle();
} }
} }
return PServerVector{handle}; return PServerVector{handle};
...@@ -579,9 +579,9 @@ PServerMatrix ParameterClient2::createMatrix(int32_t numCols) { ...@@ -579,9 +579,9 @@ PServerMatrix ParameterClient2::createMatrix(int32_t numCols) {
if (handle == -1) { if (handle == -1) {
handle = response.handle(); handle = response.handle();
} else { } else {
CHECK_EQ(handle, response.handle()) << "Inconsistent handle from client" CHECK_EQ(handle, response.handle())
<< &response - &responses[0] << " " << "Inconsistent handle from client" << &response - &responses[0]
<< handle << " " << response.handle(); << " " << handle << " " << response.handle();
} }
} }
return PServerMatrix{handle}; return PServerMatrix{handle};
......
...@@ -1213,8 +1213,8 @@ void ParameterServer2::loadValueVector(const LoadValueRequest& request, ...@@ -1213,8 +1213,8 @@ void ParameterServer2::loadValueVector(const LoadValueRequest& request,
CHECK_EQ(header.size, (size_t)size_) CHECK_EQ(header.size, (size_t)size_)
<< "The size (" << header.size << ") in the file does not match the size " << "The size (" << header.size << ") in the file does not match the size "
<< "(" << size_ << ") of the pserver: " << serverId_; << "(" << size_ << ") of the pserver: " << serverId_;
CHECK_EQ(header.valueSize, sizeof(real)) << "Unsupported valueSize " CHECK_EQ(header.valueSize, sizeof(real))
<< header.valueSize; << "Unsupported valueSize " << header.valueSize;
CHECK(fs.read(reinterpret_cast<char*>(vec.getData()), CHECK(fs.read(reinterpret_cast<char*>(vec.getData()),
header.size * sizeof(real))); header.size * sizeof(real)));
......
...@@ -545,11 +545,11 @@ protected: ...@@ -545,11 +545,11 @@ protected:
std::vector<ParameterServer2::Buffer>* buffers); std::vector<ParameterServer2::Buffer>* buffers);
const ParameterConfig& getParameterConfig(const ParameterBlock& block) { const ParameterConfig& getParameterConfig(const ParameterBlock& block) {
CHECK_LT(block.para_id(), -1UL) << "invalid parameter id:" CHECK_LT(block.para_id(), -1UL)
<< block.para_id(); << "invalid parameter id:" << block.para_id();
const auto it = configMap_.find(block.para_id()); const auto it = configMap_.find(block.para_id());
CHECK(it != configMap_.end()) << "can not find parameter id: " CHECK(it != configMap_.end())
<< block.para_id(); << "can not find parameter id: " << block.para_id();
return it->second; return it->second;
} }
......
...@@ -41,8 +41,8 @@ void ProtoServer::handleRequest(std::unique_ptr<MsgReader> msgReader, ...@@ -41,8 +41,8 @@ void ProtoServer::handleRequest(std::unique_ptr<MsgReader> msgReader,
void ProtoServer::registerServiceFunctionImp(const std::string& funcName, void ProtoServer::registerServiceFunctionImp(const std::string& funcName,
ServiceFunction func) { ServiceFunction func) {
CHECK(!nameToFuncMap_.count(funcName)) << "Duplicated registration: " CHECK(!nameToFuncMap_.count(funcName))
<< funcName; << "Duplicated registration: " << funcName;
nameToFuncMap_[funcName] = func; nameToFuncMap_[funcName] = func;
} }
......
...@@ -97,7 +97,7 @@ void TrainerInternal::trainOneBatch(int64_t batchId, ...@@ -97,7 +97,7 @@ void TrainerInternal::trainOneBatch(int64_t batchId,
} }
UpdateCallback updateCallback = [this, showStats, &paraStats]( UpdateCallback updateCallback = [this, showStats, &paraStats](
Parameter* para) { Parameter* para) {
if (showStats) { if (showStats) {
//! @TODO(yuyang18) Show stats is actually a ParameterHook, refactor //! @TODO(yuyang18) Show stats is actually a ParameterHook, refactor
// it // it
......
...@@ -1059,14 +1059,14 @@ inline bool operator==(const value& x, const value& y) { ...@@ -1059,14 +1059,14 @@ inline bool operator==(const value& x, const value& y) {
} }
inline bool operator!=(const value& x, const value& y) { return !(x == y); } inline bool operator!=(const value& x, const value& y) { return !(x == y); }
} } // namespace picojson
namespace std { namespace std {
template <> template <>
inline void swap(picojson::value& x, picojson::value& y) { inline void swap(picojson::value& x, picojson::value& y) {
x.swap(y); x.swap(y);
} }
} } // namespace std
inline std::istream& operator>>(std::istream& is, picojson::value& x) { inline std::istream& operator>>(std::istream& is, picojson::value& x) {
picojson::set_last_error(std::string()); picojson::set_last_error(std::string());
......
...@@ -344,14 +344,14 @@ private: ...@@ -344,14 +344,14 @@ private:
} while (0); } while (0);
// check end barrier // check end barrier
#define __CHECK_BARRIER_TIMER(set, statName, numConnThreads, ...) \ #define __CHECK_BARRIER_TIMER(set, statName, numConnThreads, ...) \
do { \ do { \
std::string internalName = \ std::string internalName = \
std::string(statName) + std::string(__VA_ARGS__); \ std::string(statName) + std::string(__VA_ARGS__); \
BarrierStatPtr __stat = \ BarrierStatPtr __stat = \
(set).getStat(numConnThreads, internalName, BARRIER_END); \ (set).getStat(numConnThreads, internalName, BARRIER_END); \
PCHECK(__stat->checkPassBarrier()) << internalName \ PCHECK(__stat->checkPassBarrier()) \
<< ": invalid barrier data"; \ << internalName << ": invalid barrier data"; \
} while (0); } while (0);
/* /*
......
...@@ -62,8 +62,8 @@ public: ...@@ -62,8 +62,8 @@ public:
// Create a class instance of type @type using args // Create a class instance of type @type using args
BaseClass* createByType(const std::string& type, CreateArgs... args) { BaseClass* createByType(const std::string& type, CreateArgs... args) {
ClassCreator creator; ClassCreator creator;
CHECK(mapGet(type, creatorMap_, &creator)) << "Unknown class type: " CHECK(mapGet(type, creatorMap_, &creator))
<< type; << "Unknown class type: " << type;
return creator(args...); return creator(args...);
} }
......
...@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "DynamicLoad.h" #include "DynamicLoader.h"
#include "Logging.h"
#include <gflags/gflags.h> #include <gflags/gflags.h>
#include "Logging.h"
DEFINE_string(cudnn_dir, DEFINE_string(cudnn_dir,
"", "",
...@@ -165,8 +165,8 @@ void GetWarpCTCDsoHandle(void** dso_handle) { ...@@ -165,8 +165,8 @@ void GetWarpCTCDsoHandle(void** dso_handle) {
void GetLapackDsoHandle(void** dso_handle) { void GetLapackDsoHandle(void** dso_handle) {
#if defined(__APPLE__) || defined(__OSX__) #if defined(__APPLE__) || defined(__OSX__)
GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "liblapack.dylib", dso_handle); GetDsoHandleFromSearchPath(FLAGS_lapack_dir, "liblapack.dylib", dso_handle);
#else #else
GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "liblapack.so", dso_handle); GetDsoHandleFromSearchPath(FLAGS_lapack_dir, "liblapack.so", dso_handle);
#endif #endif
} }
...@@ -17,8 +17,8 @@ limitations under the License. */ ...@@ -17,8 +17,8 @@ limitations under the License. */
#include <dlfcn.h> #include <dlfcn.h>
#include <memory> #include <memory>
#include <string>
#include <mutex> #include <mutex>
#include <string>
/** /**
* @brief load the DSO of CUBLAS * @brief load the DSO of CUBLAS
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册