remove redundant HPPL_TYPE_DOUBLE (#200)

c13bdb15 · gangliao · Yu Yang · 91df6062 · c13bdb15 · c13bdb15
12 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -104,7 +104,7 @@ else()
 endif(NOT WITH_GPU)

 if(WITH_DOUBLE)
-    add_definitions(-DPADDLE_TYPE_DOUBLE -DHPPL_TYPE_DOUBLE)
+    add_definitions(-DPADDLE_TYPE_DOUBLE)
    set(ACCURACY double)
 else(WITH_DOUBLE)
    set(ACCURACY float)

--- a/paddle/cuda/include/hl_base.h
+++ b/paddle/cuda/include/hl_base.h
@@ -185,7 +185,7 @@ typedef struct {
    size_t                  nnz;
 } _hl_sparse_matrix_s, *hl_sparse_matrix_s;

-#ifndef HPPL_TYPE_DOUBLE
+#ifndef PADDLE_TYPE_DOUBLE
 /**
 * HPPL data type: real (float or double)
 *

--- a/paddle/cuda/include/hl_cpu_gru.cuh
+++ b/paddle/cuda/include/hl_cpu_gru.cuh
@@ -20,7 +20,7 @@ limitations under the License. */

 #include "paddle/math/MathFunctions.h"

-#ifndef HPPL_TYPE_DOUBLE
+#ifndef PADDLE_TYPE_DOUBLE
 #define     CBLAS_GEMM     paddle::gemm<float>
 #else
 #define     CBLAS_GEMM     paddle::gemm<double>

--- a/paddle/cuda/include/hl_gpu_functions.cuh
+++ b/paddle/cuda/include/hl_gpu_functions.cuh
@@ -28,7 +28,7 @@ namespace hppl {
    const real min = SIGMOID_THRESHOLD_MIN;
    const real max = SIGMOID_THRESHOLD_MAX;
    real tmp = (a < min) ? min : ((a > max) ? max : a);
-#ifndef HPPL_TYPE_DOUBLE
+#ifndef PADDLE_TYPE_DOUBLE
    return __fdividef(1.0f, 1.0f + __expf(-tmp));
 #else
    return 1.0 / (1.0 + exp(-tmp));
@@ -36,7 +36,7 @@ namespace hppl {
  }

  __device__ static real tanh(const real a) {
-#ifndef HPPL_TYPE_DOUBLE
+#ifndef PADDLE_TYPE_DOUBLE
    return __fdividef(2.0f, (1.0f + __expf(-2.0f*a))) - 1.0f;
 #else
    return (2.0 / (1.0 + exp(-2.0*a))) - 1.0;

--- a/paddle/cuda/include/hl_matrix_base.cuh
+++ b/paddle/cuda/include/hl_matrix_base.cuh
@@ -30,7 +30,7 @@ limitations under the License. */
 #define INLINE   inline
 #endif

-#ifndef HPPL_TYPE_DOUBLE
+#ifndef PADDLE_TYPE_DOUBLE
 #define     DEVICE_FMAX     fmaxf
 #define     DEVICE_FMIN     fminf
 #else

--- a/paddle/cuda/include/hl_matrix_type.cuh
+++ b/paddle/cuda/include/hl_matrix_type.cuh
@@ -21,7 +21,7 @@ limitations under the License. */
 #ifdef __CUDA_ARCH__
 // typedef void*  vecType;
 #include <vector_types.h>
-#ifndef HPPL_TYPE_DOUBLE
+#ifndef PADDLE_TYPE_DOUBLE
 typedef float4 vecType;
 #else
 typedef double2 vecType;
@@ -30,7 +30,7 @@ typedef double2 vecType;
 #include <mmintrin.h>
 #include <xmmintrin.h>
 #include <emmintrin.h>
-#ifndef HPPL_TYPE_DOUBLE
+#ifndef PADDLE_TYPE_DOUBLE
 typedef __m128  vecType;
 #else
 typedef __m128d vecType;

--- a/paddle/cuda/include/hl_sse_matrix_kernel.cuh
+++ b/paddle/cuda/include/hl_sse_matrix_kernel.cuh
@@ -20,7 +20,7 @@ limitations under the License. */

 #define VECTOR_SIZE     16

-#ifndef HPPL_TYPE_DOUBLE
+#ifndef PADDLE_TYPE_DOUBLE
 /* number of float in vector */
 #define     VECTOR_LEN      4
 #define     VECTOR_SET      _mm_set_ps1
@@ -41,7 +41,7 @@ inline bool hl_check_align(void *ptr) {
  return hl_check_align(reinterpret_cast<size_t>(ptr));
 }

-#ifndef HPPL_TYPE_DOUBLE
+#ifndef PADDLE_TYPE_DOUBLE
 template <class Agg>
 inline real hl_agg_op(Agg agg, vecType mm) {
  __m128 lo = _mm_unpacklo_ps(mm, mm);

--- a/paddle/cuda/src/hl_cuda_cublas.cc
+++ b/paddle/cuda/src/hl_cuda_cublas.cc
@@ -84,7 +84,7 @@ CUBLAS_BLAS_ROUTINE_EACH(DYNAMIC_LOAD_CUBLAS_V2_WRAP)
 } /* namespace dynload */


-#ifndef HPPL_TYPE_DOUBLE
+#ifndef PADDLE_TYPE_DOUBLE
 #define     CUBLAS_GEAM     dynload::cublasSgeam
 #define     CUBLAS_GEMV     dynload::cublasSgemv
 #define     CUBLAS_GEMM     dynload::cublasSgemm

--- a/paddle/cuda/src/hl_cuda_cudnn.cc
+++ b/paddle/cuda/src/hl_cuda_cudnn.cc
@@ -340,7 +340,7 @@ void hl_create_tensor_descriptor(hl_tensor_descriptor* image_desc,
        (cudnn_tensor_descriptor)malloc(sizeof(_cudnn_tensor_descriptor));
    CHECK_NOTNULL(hl_desc);

-#ifndef HPPL_TYPE_DOUBLE
+#ifndef PADDLE_TYPE_DOUBLE
    cudnnDataType_t data_type = CUDNN_DATA_FLOAT;
 #else
    cudnnDataType_t data_type = CUDNN_DATA_DOUBLE;
@@ -373,7 +373,7 @@ void hl_create_tensor_descriptor(hl_tensor_descriptor* image_desc) {
        (cudnn_tensor_descriptor)malloc(sizeof(_cudnn_tensor_descriptor));
    CHECK_NOTNULL(hl_desc);

-#ifndef HPPL_TYPE_DOUBLE
+#ifndef PADDLE_TYPE_DOUBLE
    cudnnDataType_t data_type = CUDNN_DATA_FLOAT;
 #else
    cudnnDataType_t data_type = CUDNN_DATA_DOUBLE;
@@ -611,7 +611,7 @@ void hl_create_filter_descriptor(hl_filter_descriptor* filter,

    CHECK_CUDNN(dynload::cudnnCreateFilterDescriptor(&hl_filter->desc));

-#ifndef HPPL_TYPE_DOUBLE
+#ifndef PADDLE_TYPE_DOUBLE
    cudnnDataType_t data_type = CUDNN_DATA_FLOAT;
 #else
    cudnnDataType_t data_type = CUDNN_DATA_DOUBLE;
@@ -921,7 +921,7 @@ void hl_softmax_forward(real *input,
                        int height,
                        int width)
 {
-#ifndef HPPL_TYPE_DOUBLE
+#ifndef PADDLE_TYPE_DOUBLE
    cudnnDataType_t data_type = CUDNN_DATA_FLOAT;
 #else
    cudnnDataType_t data_type = CUDNN_DATA_DOUBLE;
@@ -955,7 +955,7 @@ void hl_softmax_backward(real *output_value,
                         int height,
                         int width)
 {
-#ifndef HPPL_TYPE_DOUBLE
+#ifndef PADDLE_TYPE_DOUBLE
    cudnnDataType_t data_type = CUDNN_DATA_FLOAT;
 #else
    cudnnDataType_t data_type = CUDNN_DATA_DOUBLE;

--- a/paddle/cuda/src/hl_cuda_device.cc
+++ b/paddle/cuda/src/hl_cuda_device.cc
@@ -626,7 +626,7 @@ void hl_specify_devices_start(int* device, int number) {
 void hl_rand(real *dest_d, size_t num) {
  pthread_mutex_lock(t_resource.gen_mutex);
  CHECK_EQ(
-#ifndef HPPL_TYPE_DOUBLE
+#ifndef PADDLE_TYPE_DOUBLE
  dynload::curandGenerateUniform(t_resource.gen, dest_d, num),
 #else
  dynload::curandGenerateUniformDouble(t_resource.gen, dest_d, num),

--- a/paddle/cuda/src/hl_cuda_matrix.cu
+++ b/paddle/cuda/src/hl_cuda_matrix.cu
@@ -47,7 +47,7 @@ void hl_matrix_add(real *A_d,
  CHECK_SYNC("hl_matrix_add failed");
 }

-#ifdef HPPL_TYPE_DOUBLE
+#ifdef PADDLE_TYPE_DOUBLE
    #define THRESHOLD   128
 #else
    #define THRESHOLD   64
@@ -102,7 +102,7 @@ void subMaxAndExp(real* I,
      val = -THRESHOLD;
    }
    I[nextIdx] = val;
-#ifndef HPPL_TYPE_DOUBLE
+#ifndef PADDLE_TYPE_DOUBLE
    O[nextIdx] = __expf(val);
 #else
    O[nextIdx] = exp(val);

--- a/paddle/cuda/src/hl_cuda_sparse.cuh
+++ b/paddle/cuda/src/hl_cuda_sparse.cuh
@@ -355,7 +355,7 @@ __global__ void KeSMatrixCscMulDense(real *C_d,
 }

 /* best perf */
-#ifndef HPPL_TYPE_DOUBLE
+#ifndef PADDLE_TYPE_DOUBLE
 #define CU_CSCMM_THREAD_M_BEST          9
 #else
 #define CU_CSCMM_THREAD_M_BEST          4