提交 f27fd9dc 编写于 作者: L liaogang

follow comments

上级 8cde2d11
......@@ -44,7 +44,6 @@ if(MKL_INC_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON)
if(${MKL_LAPACK_INC_DIR})
add_definitions(-DPADDLE_USE_LAPACK)
message(STATUS "Found lapack in MKL (include: ${MKL_LAPACK_INC_DIR})")
endif()
return() # return file.
......@@ -80,7 +79,6 @@ if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB AND NOT CBLAS_FOUND)
message(STATUS "Found ATLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON)
if(ATLAS_CLAPACK_INC_DIR)
add_definitions(-DPADDLE_USE_LAPACK)
message(STATUS "Found lapack in ATLAS (include: ${ATLAS_CLAPACK_INC_DIR})")
endif()
return()
......@@ -114,7 +112,6 @@ if(OPENBLAS_INC_DIR AND OPENBLAS_LIB)
message(STATUS "Found OpenBLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON)
if(OPENBLAS_LAPACKE_INC_DIR)
add_definitions(-DPADDLE_USE_LAPACK)
message(STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})")
endif()
return()
......
......@@ -27,8 +27,6 @@ IF(NOT ${CBLAS_FOUND})
SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/libopenblas.a" CACHE FILEPATH "openblas library" FORCE)
ENDIF(WIN32)
ADD_DEFINITIONS(-DPADDLE_USE_LAPACK)
ExternalProject_Add(
openblas
${EXTERNAL_PROJECT_LOG_ARGS}
......
......@@ -40,18 +40,18 @@ public:
namespace gpu {
static __device__ Active<real>::forward forward[] = HPPL_ACTIVE_FUNCTION;
static __device__ Active<real>::backward backward[] = HPPL_ACTIVE_FUNCTION;
}
} // namespace gpu
#else
namespace cpu {
static Active<real>::forward forward[] = HPPL_ACTIVE_FUNCTION;
static Active<real>::backward backward[] = HPPL_ACTIVE_FUNCTION;
}
} // namespace cpu
#ifdef __AVX__
namespace avx {
static Active<__m256>::forward forward[] = HPPL_ACTIVE_FUNCTION;
static Active<__m256>::backward backward[] = HPPL_ACTIVE_FUNCTION;
}
} // namespace avx
#endif
#endif
......
......@@ -273,23 +273,23 @@ extern void hl_bilinear_forward(const real* inData,
const real ratioW);
/**
* @brief Bilinear interpolation backward.
*
* @param[out] inGrad input gradient.
* @param[in] inImgH input image height.
* @param[in] inImgW input image width.
* @param[in] inputH input batchSize.
* @param[in] inputW input image data dim.
* @param[in] outGrad output gradient.
* @param[in] outImgH output image height.
* @param[in] outImgW output image width.
* @param[in] outputH output batchSize.
* @param[in] outputW output image data dim.
* @param[in] numChannels number of channels.
* @param[in] ratioH inImgH / outImgH.
* @param[in] ratioW inImgW / outImgW.
*
*/
* @brief Bilinear interpolation backward.
*
* @param[out] inGrad input gradient.
* @param[in] inImgH input image height.
* @param[in] inImgW input image width.
* @param[in] inputH input batchSize.
* @param[in] inputW input image data dim.
* @param[in] outGrad output gradient.
* @param[in] outImgH output image height.
* @param[in] outImgW output image width.
* @param[in] outputH output batchSize.
* @param[in] outputW output image data dim.
* @param[in] numChannels number of channels.
* @param[in] ratioH inImgH / outImgH.
* @param[in] ratioW inImgW / outImgW.
*
*/
extern void hl_bilinear_backward(real* inGrad,
const size_t inImgH,
const size_t inImgW,
......
......@@ -16,7 +16,7 @@ limitations under the License. */
#include <sys/time.h>
#include "hl_cuda.h"
#include "hl_thread.ph"
#include "paddle/utils/DynamicLoad.h"
#include "paddle/utils/DynamicLoader.h"
#include "paddle/utils/Logging.h"
namespace dynload {
......
......@@ -17,7 +17,7 @@ limitations under the License. */
#include <gflags/gflags.h>
#include "hl_cuda_cudnn.ph"
#include "hl_thread.ph"
#include "paddle/utils/DynamicLoad.h"
#include "paddle/utils/DynamicLoader.h"
#include "paddle/utils/Logging.h"
DEFINE_int32(cudnn_conv_workspace_limit_in_mb,
......
......@@ -24,7 +24,7 @@ limitations under the License. */
#include "hl_cuda.ph"
#include "hl_thread.ph"
#include "paddle/utils/Logging.h"
#include "paddle/utils/DynamicLoad.h"
#include "paddle/utils/DynamicLoader.h"
// clang-format on
namespace dynload {
......
......@@ -14,7 +14,7 @@ limitations under the License. */
#include "hl_warpctc_wrap.h"
#include <mutex>
#include "paddle/utils/DynamicLoad.h"
#include "paddle/utils/DynamicLoader.h"
#include "paddle/utils/Logging.h"
namespace dynload {
......
......@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "BufferArg.h"
#include <gtest/gtest.h>
#include "BufferArg.h"
#include "paddle/math/MemoryHandle.h"
namespace paddle {
......
......@@ -165,12 +165,12 @@ void CosSimBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad,
real reciprocal_square_sum_x = 1.0f / square_sum_x;
real reciprocal_square_sum_y = 1.0f / square_sum_y;
for (size_t j = 0; j < dim; ++j) {
prev_grad_x[j] +=
out[i] * grad[i] * (prev_out_y[j] * reciprocal_xy -
prev_out_x[j] * reciprocal_square_sum_x);
prev_grad_y[j] +=
out[i] * grad[i] * (prev_out_x[j] * reciprocal_xy -
prev_out_y[j] * reciprocal_square_sum_y);
prev_grad_x[j] += out[i] * grad[i] *
(prev_out_y[j] * reciprocal_xy -
prev_out_x[j] * reciprocal_square_sum_x);
prev_grad_y[j] += out[i] * grad[i] *
(prev_out_x[j] * reciprocal_xy -
prev_out_y[j] * reciprocal_square_sum_y);
}
}
}
......
......@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Function.h"
#include <gtest/gtest.h>
#include "Function.h"
#include "paddle/math/SparseMatrix.h"
namespace paddle {
......
......@@ -74,9 +74,9 @@ TEST(MulOp, DDDMatrixMul) {
}
/**
* C += A * B, B, C dense, A sparse
* dense = sparse * dense
*/
* C += A * B, B, C dense, A sparse
* dense = sparse * dense
*/
void testFuncDSparseDMatrix(
size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
real scaleT = 1.0;
......@@ -119,9 +119,9 @@ TEST(MuLOp, DSparseDMul) {
}
/**
* C += A * B, A, C dense, B sparse
* dense = dense * sparse
*/
* C += A * B, A, C dense, B sparse
* dense = dense * sparse
*/
void testFuncDDSparseMatrix(
size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
real scaleT = 1.0;
......@@ -165,9 +165,9 @@ TEST(MulOp, DDSparseMul) {
}
/**
* C += A * B, A sparse, B, C dense
* sparse = dense * dense
*/
* C += A * B, A sparse, B, C dense
* sparse = dense * dense
*/
void testFuncSparseDDMatrix(
size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
real scaleT = 1.0;
......
......@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "TensorShape.h"
#include <gtest/gtest.h>
#include "TensorShape.h"
namespace paddle {
......
......@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "TensorType.h"
#include <gtest/gtest.h>
#include "TensorType.h"
namespace paddle {
......
......@@ -194,8 +194,8 @@ void PyDataProvider::fillSlotsByStr(const std::string& samples) {
auto& slot = slots_[j];
CHECK(SlotDef::INDEX >= slot.type || SlotDef::STRING == slot.type)
<< " Slot type:" << slot.type << " is out of range.";
CHECK_GE(slot.type, SlotDef::VECTOR_DENSE) << " Slot type:" << slot.type
<< " is out of range.";
CHECK_GE(slot.type, SlotDef::VECTOR_DENSE)
<< " Slot type:" << slot.type << " is out of range.";
switch (slot.type) {
case SlotDef::VECTOR_DENSE:
fillDenseSlot(slot, data, dataEnd);
......
......@@ -446,9 +446,9 @@ real AucEvaluator::evalImp(std::vector<Argument>& arguments) {
for (size_t i = 0; i < insNum; ++i) {
real value = outputD[pos];
uint32_t binIdx = static_cast<uint32_t>(value * kBinNum_);
CHECK(binIdx <= kBinNum_) << "bin index [" << binIdx
<< "] out of range, predict value[" << value
<< "]";
CHECK(binIdx <= kBinNum_)
<< "bin index [" << binIdx << "] out of range, predict value[" << value
<< "]";
real w = supportWeight ? weightD[i] : 1.0;
if (labelD[i] == kNegativeLabel_) {
statNeg_[binIdx] += w;
......
......@@ -21,7 +21,6 @@ limitations under the License. */
#include "MultiGradientMachine.h"
#include "MultiNetwork.h"
#include "NeuralNetwork.h"
#include "NeuralNetwork.h"
#include "ParallelNeuralNetwork.h"
#include "hl_gpu.h"
......
......@@ -637,7 +637,7 @@ void RecurrentGradientMachine::removeBeamSearchStatisticsCallbacks() {
/* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time.
*/
*/
void RecurrentGradientMachine::createInFrameInfo(int inlinkId,
const Argument& input,
......
......@@ -107,18 +107,18 @@ public:
DropCallback;
/**
* @brief NormOrDropNodeCallback
*
* Normalize a path's probabilities or just drop it by modifying path.logProb
*
* The first parameter is sequence index in a batch
*
* The second parameter is path.ids
*
* The third parameter is probabilites for each node in this path.
*
* The fourth parameter is the probability of the whole path.
*/
* @brief NormOrDropNodeCallback
*
* Normalize a path's probabilities or just drop it by modifying path.logProb
*
* The first parameter is sequence index in a batch
*
* The second parameter is path.ids
*
* The third parameter is probabilites for each node in this path.
*
* The fourth parameter is the probability of the whole path.
*/
typedef std::function<void(
int seqId, const std::vector<int>&, std::vector<real>&, real*)>
NormOrDropNodeCallback;
......@@ -348,9 +348,9 @@ protected:
int targetInfoInlinkId_;
/* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time.
*/
* If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time.
*/
void createInFrameInfo(int inlinks_id,
const Argument& input,
PassType passType);
......
......@@ -263,8 +263,9 @@ void Layer::zeroGrad() {
}
void Layer::initNeedFlags() {
auto initFlag = [this](
bool& flag, bool (Layer::*flagQueryFunc)() const, ParameterType type) {
auto initFlag = [this](bool& flag,
bool (Layer::*flagQueryFunc)() const,
ParameterType type) {
flag = false;
if (biasParameter_ && biasParameter_->hasType(type)) {
flag = true;
......
......@@ -106,9 +106,9 @@ protected:
public:
/**
* Wait until all input value ready.
* Called before Layer::forward() function.
*/
* Wait until all input value ready.
* Called before Layer::forward() function.
*/
virtual void waitInputValue();
/**
......@@ -118,9 +118,9 @@ public:
virtual void copyOutputToOtherDevice();
/**
* Wait until all output grad ready and merge them to output_.grad.
* Called before Layer::backward() function.
*/
* Wait until all output grad ready and merge them to output_.grad.
* Called before Layer::backward() function.
*/
virtual void waitAndMergeOutputGrad();
/**
......
......@@ -29,7 +29,7 @@ namespace paddle {
*
* The config file api is rotate_layer
*
*/
*/
class RotateLayer : public Layer {
public:
......
......@@ -60,7 +60,7 @@ void SequencePoolLayer::forward(PassType passType) {
* thus, in this case, output_ has no sequenceStartPositions.
* If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this
* case, we should compute the new sequenceStartPositions.
*/
*/
if (type_) {
CHECK(input.subSequenceStartPositions)
<< "when trans_type = seq, input must hasSubseq";
......
......@@ -292,26 +292,27 @@ void checkRecurrentLayer(LayerConfig layerConfig,
TestRecurrentLayer<T> testGpu(layerConfig, true, gpuBatch);
testCpu.init(batchSize);
testGpu.init(batchSize);
auto checkError = [](
MatrixPtr cpu, MatrixPtr gpu, int numSequences, const char* str) {
CpuMatrix check(gpu->getHeight(), gpu->getWidth());
check.copyFrom(*gpu);
int height = cpu->getHeight();
int width = cpu->getWidth();
const real* data1 = cpu->getData();
const real* data2 = check.getData();
int count = 0;
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
if (fabs(data1[i * width + j] - data2[i * width + j]) / numSequences >
1e-4) {
count++;
auto checkError =
[](MatrixPtr cpu, MatrixPtr gpu, int numSequences, const char* str) {
CpuMatrix check(gpu->getHeight(), gpu->getWidth());
check.copyFrom(*gpu);
int height = cpu->getHeight();
int width = cpu->getWidth();
const real* data1 = cpu->getData();
const real* data2 = check.getData();
int count = 0;
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
if (fabs(data1[i * width + j] - data2[i * width + j]) /
numSequences >
1e-4) {
count++;
}
}
}
}
}
EXPECT_EQ(count, 0) << "[" << str << "]"
<< "There are " << count << " different element.";
};
EXPECT_EQ(count, 0) << "[" << str << "]"
<< "There are " << count << " different element.";
};
T* cpuLayer = dynamic_cast<T*>(testCpu.testLayer_.get());
T* gpuLayer = dynamic_cast<T*>(testGpu.testLayer_.get());
......
......@@ -15,7 +15,7 @@ limitations under the License. */
#include "MathFunctions.h"
#include "hl_matrix_apply.cuh"
#include "hl_matrix_ops.cuh"
#include "paddle/utils/DynamicLoad.h"
#include "paddle/utils/DynamicLoader.h"
namespace dynload {
......@@ -32,7 +32,7 @@ void* lapack_dso_handle = nullptr;
#define DYNAMIC_LOAD_LAPACK_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args)->decltype(__name(args...)) { \
auto operator()(Args... args) -> decltype(__name(args...)) { \
using lapack_func = decltype(__name(args...)) (*)(Args...); \
std::call_once(lapack_dso_flag, GetLapackDsoHandle, &lapack_dso_handle); \
void* p_##__name = dlsym(lapack_dso_handle, #__name); \
......@@ -41,24 +41,27 @@ void* lapack_dso_handle = nullptr;
} __name; // struct DynLoad__##__name
// clang-format off
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS
#define LAPACK_ROUTINE_EACH(__macro) \
__macro(clapack_sgetrf) \
__macro(clapack_dgetrf) \
__macro(clapack_sgetri) \
__macro(clapack_dgetri)
#define PADDLE_SGETRF clapack_sgetrf
#define PADDLE_DGETRF clapack_dgetrf
#define PADDLE_SGETRI clapack_sgetri
#define PADDLE_DGETRI clapack_dgetri
#else
#define LAPACK_ROUTINE_EACH(__macro) \
__macro(LAPACKE_sgetrf) \
__macro(LAPACKE_dgetrf) \
__macro(LAPACKE_sgetri) \
__macro(LAPACKE_dgetri)
#endif
#define PADDLE_SGETRF LAPACKE_sgetrf
#define PADDLE_DGETRF LAPACKE_dgetrf
#define PADDLE_SGETRI LAPACKE_sgetri
#define PADDLE_DGETRI LAPACKE_dgetri
#endif
#define LAPACK_ROUTINE_EACH(__macro) \
__macro(PADDLE_SGETRF) \
__macro(PADDLE_DGETRF) \
__macro(PADDLE_SGETRI) \
__macro(PADDLE_DGETRI)
// clang-format on
LAPACK_ROUTINE_EACH(DYNAMIC_LOAD_LAPACK_WRAP)
#endif
// clang-format on
} // namespace dynload
namespace paddle {
......@@ -130,16 +133,7 @@ int getrf<float>(const CBLAS_ORDER order,
float* A,
const int lda,
int* ipiv) {
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS
return dynload::clapack_sgetrf(order, M, N, A, lda, ipiv);
#else
return dynload::LAPACKE_sgetrf(order, M, N, A, lda, ipiv);
#endif
#else
LOG(FATAL) << "Not implemented";
#endif
return 0;
return dynload::PADDLE_SGETRF(order, M, N, A, lda, ipiv);
}
template <>
......@@ -149,16 +143,7 @@ int getrf<double>(const CBLAS_ORDER order,
double* A,
const int lda,
int* ipiv) {
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS
return dynload::clapack_dgetrf(order, M, N, A, lda, ipiv);
#else
return dynload::LAPACKE_dgetrf(order, M, N, A, lda, ipiv);
#endif
#else
LOG(FATAL) << "Not implemented";
#endif
return 0;
return dynload::PADDLE_DGETRF(order, M, N, A, lda, ipiv);
}
template <>
......@@ -167,16 +152,7 @@ int getri<float>(const CBLAS_ORDER order,
float* A,
const int lda,
const int* ipiv) {
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS
return dynload::clapack_sgetri(order, N, A, lda, ipiv);
#else
return dynload::LAPACKE_sgetri(order, N, A, lda, ipiv);
#endif
#else
LOG(FATAL) << "Not implemented";
#endif
return 0;
return dynload::PADDLE_SGETRI(order, N, A, lda, ipiv);
}
template <>
......@@ -185,15 +161,7 @@ int getri<double>(const CBLAS_ORDER order,
double* A,
const int lda,
const int* ipiv) {
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS
return dynload::clapack_dgetri(order, N, A, lda, ipiv);
#else
return dynload::LAPACKE_dgetri(order, N, A, lda, ipiv);
#endif
#else
LOG(FATAL) << "Not implemented";
#endif
return dynload::PADDLE_DGETRI(order, N, A, lda, ipiv);
return 0;
}
......
......@@ -17,14 +17,11 @@ limitations under the License. */
#ifdef PADDLE_USE_MKL
#include <mkl.h>
#ifdef PADDLE_USE_LAPACK
#include <mkl_lapacke.h>
#endif
#else
extern "C" {
#include <cblas.h>
}
#ifdef PADDLE_USE_LAPACK
#ifdef PADDLE_USE_ATLAS
extern "C" {
#include <clapack.h>
......@@ -33,7 +30,6 @@ extern "C" {
#include <lapacke.h>
#endif
#endif
#endif
#include <cmath>
......
......@@ -174,8 +174,10 @@ void CpuMatrix::mulByBitCode(size_t numClasses,
const IVector& codes,
const Matrix& weight,
const Matrix& input) {
auto op = [](
real& t, const real* weightRow, const real* inputRow, size_t inputDim) {
auto op = [](real& t,
const real* weightRow,
const real* inputRow,
size_t inputDim) {
real sum = 0;
for (size_t k = 0; k < inputDim; ++k) {
sum += weightRow[k] * inputRow[k];
......@@ -193,12 +195,12 @@ void CpuMatrix::mulByBitCodeBackwardWeight(size_t numClasses,
const IVector& codes,
Matrix& weight,
const Matrix& input) {
auto op = [](
const real t, real* weightRow, const real* inputRow, size_t inputDim) {
for (size_t k = 0; k < inputDim; ++k) {
weightRow[k] += t * inputRow[k];
}
};
auto op =
[](const real t, real* weightRow, const real* inputRow, size_t inputDim) {
for (size_t k = 0; k < inputDim; ++k) {
weightRow[k] += t * inputRow[k];
}
};
mulByBitCodeT(op, SimpleCodeTable(numClasses), codes, *this, weight, input);
}
......@@ -210,12 +212,12 @@ void CpuMatrix::mulByBitCodeBackwardError(size_t numClasses,
const IVector& codes,
const Matrix& weight,
Matrix& input) {
auto op = [](
const real t, const real* weightRow, real* inputRow, size_t inputDim) {
for (size_t k = 0; k < inputDim; ++k) {
inputRow[k] += t * weightRow[k];
}
};
auto op =
[](const real t, const real* weightRow, real* inputRow, size_t inputDim) {
for (size_t k = 0; k < inputDim; ++k) {
inputRow[k] += t * weightRow[k];
}
};
mulByBitCodeT(op, SimpleCodeTable(numClasses), codes, *this, weight, input);
}
......
......@@ -183,8 +183,8 @@ void TensorCheck(AssertEq compare,
template <typename AssertEq>
void TensorCheck(AssertEq compare, real args1, real args2) {
EXPECT_EQ(compare(args1, args2), true) << "[Test error] args1 = " << args1
<< ", args2 = " << args2;
EXPECT_EQ(compare(args1, args2), true)
<< "[Test error] args1 = " << args1 << ", args2 = " << args2;
}
template <typename AssertEq>
......
......@@ -37,7 +37,7 @@ limitations under the License. */
*
* AutoCompare test;
* test.cmpWithoutArg<I...>(function, height, width)
*/
*/
#include <gtest/gtest.h>
#include "TensorCheck.h"
......
......@@ -126,15 +126,15 @@ TEST(SIMDFunction, decayL1_WithLR) {
typedef std::function<void(float*, float*, float*, float, size_t)>
DecayL1MethodType;
DecayL1MethodType naive = [](
float* d, float* s, float* lr, float l, size_t len) {
paddle::simd::naive::decayL1<float>(d, s, lr, l, len);
};
DecayL1MethodType simd = [](
float* d, float* s, float* lr, float l, size_t len) {
paddle::simd::decayL1<float>(d, s, lr, l, len);
};
DecayL1MethodType naive =
[](float* d, float* s, float* lr, float l, size_t len) {
paddle::simd::naive::decayL1<float>(d, s, lr, l, len);
};
DecayL1MethodType simd =
[](float* d, float* s, float* lr, float l, size_t len) {
paddle::simd::decayL1<float>(d, s, lr, l, len);
};
naive(dest.get(), src.get(), lr.get(), lambda, VECTOR_LEN);
simd(simd_dest.get(), src.get(), lr.get(), lambda, VECTOR_LEN);
......
......@@ -21,6 +21,7 @@ limitations under the License. */
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
#include "paddle/testing/TestUtil.h"
#include "paddle/utils/DynamicLoader.h"
#include "paddle/utils/Stat.h"
#include "paddle/utils/Util.h"
......@@ -235,10 +236,15 @@ TEST(Matrix, unary) {
testMatrixTranspose(height, width);
testMatrixRotate(height, width);
}
// inverse
#ifdef PADDLE_USE_LAPACK
testMatrixInverse(height);
#endif
// inverse matrix
void** dso_handler = nullptr;
GetLapackDsoHandle(dso_handler);
if (nullptr == *dso_handler) {
LOG(WARNING) << "Failed to find liblapack.so, please specify its path "
"using LD_LIBRARY_PATH.";
} else {
testMatrixInverse(height);
}
}
}
......
......@@ -379,7 +379,7 @@ void Argument::concat(const std::vector<Argument>& args,
}
auto copyArg = [batchSize, stream](
MatrixPtr& dst, MatrixPtr src, int startRow, bool useGpu) {
MatrixPtr& dst, MatrixPtr src, int startRow, bool useGpu) {
if (!src) {
dst.reset();
return;
......@@ -395,29 +395,31 @@ void Argument::concat(const std::vector<Argument>& args,
tmpMatrix->copyFrom(*src, stream);
};
auto copyIds = [batchSize, stream](
IVectorPtr& dst, const IVectorPtr& src, int startRow, bool useGpu) {
if (!src) {
dst.reset();
return;
}
IVector::resizeOrCreate(dst, batchSize, useGpu);
dst->subVec(startRow, src->getSize())->copyFrom(*src, stream);
};
auto copyStrs = [batchSize, stream](
SVectorPtr& dst, const SVectorPtr& src, int startRow, bool useGpu) {
if (!src) {
dst.reset();
return;
}
if (!dst) {
dst = std::make_shared<std::vector<std::string>>(batchSize);
} else {
dst->resize(batchSize);
}
std::copy(src->begin(), src->end(), dst->begin() + startRow);
};
auto copyIds =
[batchSize, stream](
IVectorPtr& dst, const IVectorPtr& src, int startRow, bool useGpu) {
if (!src) {
dst.reset();
return;
}
IVector::resizeOrCreate(dst, batchSize, useGpu);
dst->subVec(startRow, src->getSize())->copyFrom(*src, stream);
};
auto copyStrs =
[batchSize, stream](
SVectorPtr& dst, const SVectorPtr& src, int startRow, bool useGpu) {
if (!src) {
dst.reset();
return;
}
if (!dst) {
dst = std::make_shared<std::vector<std::string>>(batchSize);
} else {
dst->resize(batchSize);
}
std::copy(src->begin(), src->end(), dst->begin() + startRow);
};
auto copySequencePos = [](ICpuGpuVectorPtr& dstSeq,
const ICpuGpuVectorPtr& srcSeq,
......
......@@ -155,8 +155,9 @@ ParameterOptimizer::TraverseCallback AverageOptimizer::restore() {
return nullptr;
}
return [](
const VectorPtr vecs[], const ParameterConfig& config, size_t sparseId) {
return [](const VectorPtr vecs[],
const ParameterConfig& config,
size_t sparseId) {
vecs[PARAMETER_VALUE]->copyFrom(*vecs[PARAMETER_GRADIENT]);
vecs[PARAMETER_GRADIENT]->zeroMem();
};
......
......@@ -126,7 +126,7 @@ protected:
/*
* AdaDelta Optimization.
* http://www.matthewzeiler.com/pubs/googleTR2012/googleTR2012.pdf
*/
*/
class AdaDeltaParameterOptimizer : public ParameterOptimizer {
public:
explicit AdaDeltaParameterOptimizer(const OptimizationConfig& optConfig)
......
......@@ -352,8 +352,8 @@ bool Parameter::load(std::istream& s) {
Header header;
CHECK(s.read(reinterpret_cast<char*>(&header), sizeof(header)))
<< "Fail to read parameter " << getName();
CHECK_EQ(header.version, kFormatVersion) << "Incorrect format version: "
<< header.version;
CHECK_EQ(header.version, kFormatVersion)
<< "Incorrect format version: " << header.version;
CHECK_EQ(header.size, getSize())
<< "The size (" << header.size << ") in the file does not match the size "
<< "(" << getSize() << ") of the parameter: " << getName();
......
......@@ -359,8 +359,8 @@ void SocketClient::TcpClient(const std::string &serverAddr, int serverPort) {
#if defined(__OSX__) || defined(__APPLE__)
server = getipnodebyname(serverAddr.c_str(), AF_INET, AI_DEFAULT, &errRet);
CHECK_NE(HOST_NOT_FOUND, errRet) << "ERROR, no such host: " << serverAddr
<< " ret = " << errRet;
CHECK_NE(HOST_NOT_FOUND, errRet)
<< "ERROR, no such host: " << serverAddr << " ret = " << errRet;
CHECK(server) << "getipnodebyname error!";
#else
struct hostent hostinfo;
......
......@@ -549,9 +549,9 @@ PServerVector ParameterClient2::createVector() {
if (handle == -1) {
handle = response.handle();
} else {
CHECK_EQ(handle, response.handle()) << "Inconsistent handle from client"
<< &response - &responses[0] << " "
<< handle << " " << response.handle();
CHECK_EQ(handle, response.handle())
<< "Inconsistent handle from client" << &response - &responses[0]
<< " " << handle << " " << response.handle();
}
}
return PServerVector{handle};
......@@ -579,9 +579,9 @@ PServerMatrix ParameterClient2::createMatrix(int32_t numCols) {
if (handle == -1) {
handle = response.handle();
} else {
CHECK_EQ(handle, response.handle()) << "Inconsistent handle from client"
<< &response - &responses[0] << " "
<< handle << " " << response.handle();
CHECK_EQ(handle, response.handle())
<< "Inconsistent handle from client" << &response - &responses[0]
<< " " << handle << " " << response.handle();
}
}
return PServerMatrix{handle};
......
......@@ -1213,8 +1213,8 @@ void ParameterServer2::loadValueVector(const LoadValueRequest& request,
CHECK_EQ(header.size, (size_t)size_)
<< "The size (" << header.size << ") in the file does not match the size "
<< "(" << size_ << ") of the pserver: " << serverId_;
CHECK_EQ(header.valueSize, sizeof(real)) << "Unsupported valueSize "
<< header.valueSize;
CHECK_EQ(header.valueSize, sizeof(real))
<< "Unsupported valueSize " << header.valueSize;
CHECK(fs.read(reinterpret_cast<char*>(vec.getData()),
header.size * sizeof(real)));
......
......@@ -545,11 +545,11 @@ protected:
std::vector<ParameterServer2::Buffer>* buffers);
const ParameterConfig& getParameterConfig(const ParameterBlock& block) {
CHECK_LT(block.para_id(), -1UL) << "invalid parameter id:"
<< block.para_id();
CHECK_LT(block.para_id(), -1UL)
<< "invalid parameter id:" << block.para_id();
const auto it = configMap_.find(block.para_id());
CHECK(it != configMap_.end()) << "can not find parameter id: "
<< block.para_id();
CHECK(it != configMap_.end())
<< "can not find parameter id: " << block.para_id();
return it->second;
}
......
......@@ -41,8 +41,8 @@ void ProtoServer::handleRequest(std::unique_ptr<MsgReader> msgReader,
void ProtoServer::registerServiceFunctionImp(const std::string& funcName,
ServiceFunction func) {
CHECK(!nameToFuncMap_.count(funcName)) << "Duplicated registration: "
<< funcName;
CHECK(!nameToFuncMap_.count(funcName))
<< "Duplicated registration: " << funcName;
nameToFuncMap_[funcName] = func;
}
......
......@@ -97,7 +97,7 @@ void TrainerInternal::trainOneBatch(int64_t batchId,
}
UpdateCallback updateCallback = [this, showStats, &paraStats](
Parameter* para) {
Parameter* para) {
if (showStats) {
//! @TODO(yuyang18) Show stats is actually a ParameterHook, refactor
// it
......
......@@ -1059,14 +1059,14 @@ inline bool operator==(const value& x, const value& y) {
}
inline bool operator!=(const value& x, const value& y) { return !(x == y); }
}
} // namespace picojson
namespace std {
template <>
inline void swap(picojson::value& x, picojson::value& y) {
x.swap(y);
}
}
} // namespace std
inline std::istream& operator>>(std::istream& is, picojson::value& x) {
picojson::set_last_error(std::string());
......
......@@ -344,14 +344,14 @@ private:
} while (0);
// check end barrier
#define __CHECK_BARRIER_TIMER(set, statName, numConnThreads, ...) \
do { \
std::string internalName = \
std::string(statName) + std::string(__VA_ARGS__); \
BarrierStatPtr __stat = \
(set).getStat(numConnThreads, internalName, BARRIER_END); \
PCHECK(__stat->checkPassBarrier()) << internalName \
<< ": invalid barrier data"; \
#define __CHECK_BARRIER_TIMER(set, statName, numConnThreads, ...) \
do { \
std::string internalName = \
std::string(statName) + std::string(__VA_ARGS__); \
BarrierStatPtr __stat = \
(set).getStat(numConnThreads, internalName, BARRIER_END); \
PCHECK(__stat->checkPassBarrier()) \
<< internalName << ": invalid barrier data"; \
} while (0);
/*
......
......@@ -62,8 +62,8 @@ public:
// Create a class instance of type @type using args
BaseClass* createByType(const std::string& type, CreateArgs... args) {
ClassCreator creator;
CHECK(mapGet(type, creatorMap_, &creator)) << "Unknown class type: "
<< type;
CHECK(mapGet(type, creatorMap_, &creator))
<< "Unknown class type: " << type;
return creator(args...);
}
......
......@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "DynamicLoad.h"
#include "Logging.h"
#include "DynamicLoader.h"
#include <gflags/gflags.h>
#include "Logging.h"
DEFINE_string(cudnn_dir,
"",
......@@ -165,8 +165,8 @@ void GetWarpCTCDsoHandle(void** dso_handle) {
void GetLapackDsoHandle(void** dso_handle) {
#if defined(__APPLE__) || defined(__OSX__)
GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "liblapack.dylib", dso_handle);
GetDsoHandleFromSearchPath(FLAGS_lapack_dir, "liblapack.dylib", dso_handle);
#else
GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "liblapack.so", dso_handle);
GetDsoHandleFromSearchPath(FLAGS_lapack_dir, "liblapack.so", dso_handle);
#endif
}
......@@ -17,8 +17,8 @@ limitations under the License. */
#include <dlfcn.h>
#include <memory>
#include <string>
#include <mutex>
#include <string>
/**
* @brief load the DSO of CUBLAS
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册