提交 903d5c7e 编写于 作者: H He 提交者: Yu Yang

bug fix for hl_matrix_classification_error

上级 fdd40e55
......@@ -266,25 +266,21 @@ template<int blockSize>
__global__ void KeMatrixClassificationError(real* in_A,
int* in_B,
real* out_C,
int dimM,
int dimN) {
__shared__ real max_s[blockSize];
__shared__ int max_l[blockSize];
int cnt = (dimN + blockSize -1) / blockSize;
int tid = threadIdx.x;
int lmt = tid;
int index = 0;
real t;
const int tid = threadIdx.x;
const int rowId = blockIdx.x;
max_s[tid] = -1e30f;
for (int ii = 0; ii < cnt && lmt < dimN; ii++) {
index = blockIdx.y*dimN + lmt;
t = in_A[index];
if (max_s[tid] < t) {
max_s[tid] = t;
max_l[tid] = lmt;
in_A += rowId * dimN;
real tmp;
for (int colId = tid; colId < dimN; colId += blockSize) {
tmp = in_A[colId];
if (max_s[tid] < tmp) {
max_s[tid] = tmp;
max_l[tid] = colId;
}
lmt += blockSize;
}
__syncthreads();
......@@ -300,7 +296,7 @@ __global__ void KeMatrixClassificationError(real* in_A,
__syncthreads();
if (tid == 0) {
out_C[blockIdx.y] = (max_l[0] == in_B[blockIdx.y] ? 0 : 1.0f);
out_C[rowId] = (max_l[0] == in_B[rowId] ? 0 : 1.0f);
}
}
......@@ -313,12 +309,9 @@ void hl_matrix_classification_error(real* A_d,
CHECK_NOTNULL(B_d);
CHECK_NOTNULL(C_d);
int blocksX = 1;
int blocksY = dimM;
dim3 threads(1024, 1);
dim3 grid(blocksX, blocksY);
KeMatrixClassificationError<1024><<< grid, threads, 0, STREAM_DEFAULT >>>
(A_d, B_d, C_d, dimM, dimN);
// each sample is calculated by one block
KeMatrixClassificationError<1024><<< dimM, 1024, 0, STREAM_DEFAULT >>>
(A_d, B_d, C_d, dimN);
CHECK_SYNC("hl_matrix_classification_error");
}
......
......@@ -1697,7 +1697,6 @@ TEST(Matrix, cosSimDerivate) {
}
}
void testParamReluForward(int height, int width, int w_height,
int w_width) {
MatrixPtr output = CpuMatrix::create(height, width, false, false);
......@@ -1736,7 +1735,6 @@ TEST(Matrix, paramReluForward) {
}
}
void testParamReluBackwardW(int height, int width, int w_height,
int w_width) {
MatrixPtr oGrad = CpuMatrix::create(height, width, false, false);
......@@ -1775,7 +1773,6 @@ TEST(Matrix, paramReluBackwardW) {
}
}
void testParamReluBackwardDiff(int height, int width, int w_height,
int w_width) {
MatrixPtr oGrad = CpuMatrix::create(height, width, false, false);
......@@ -1819,6 +1816,36 @@ TEST(Matrix, paramReluBackwardDiff) {
}
}
void testClassificationError(int numSamples, int dim) {
MatrixPtr cpuError = std::make_shared<CpuMatrix>(numSamples, 1);
MatrixPtr gpuError = std::make_shared<GpuMatrix>(numSamples, 1);
MatrixPtr cpuOutput = std::make_shared<CpuMatrix>(numSamples, dim);
MatrixPtr gpuOutput = std::make_shared<GpuMatrix>(numSamples, dim);
IVectorPtr cpuLabel = std::make_shared<CpuIVector>(numSamples);
IVectorPtr gpuLabel = std::make_shared<GpuIVector>(numSamples);
cpuOutput->randomizeUniform();
cpuLabel->rand(dim);
gpuOutput->copyFrom(*cpuOutput);
gpuLabel->copyFrom(*cpuLabel);
cpuError->classificationError(cpuOutput, cpuLabel);
gpuError->classificationError(gpuOutput, gpuLabel);
MatrixPtr check = std::make_shared<CpuMatrix>(numSamples, 1);
check->copyFrom(*gpuError);
MatrixCheckEqual(*cpuError, *check);
}
TEST(Matrix, classificationError) {
for (auto numSamples : {1, 10, 100, 1000, 70000}) {
for (auto dim : {1, 10, 100, 1000}) {
VLOG(3) << " numSamples=" << numSamples << " dim=" << dim;
testClassificationError(numSamples, dim);
}
}
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
initMain(argc, argv);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册