Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
8de4e3bd
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
8de4e3bd
编写于
8月 10, 2017
作者:
Q
qijun
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
disable gpu implementation temporarily
上级
a821fec1
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
16 addition
and
22 deletion
+16
-22
paddle/operators/math/math_function.cu
paddle/operators/math/math_function.cu
+6
-0
paddle/operators/math/math_function.h
paddle/operators/math/math_function.h
+8
-21
paddle/operators/mul_op.cu
paddle/operators/mul_op.cu
+2
-1
未找到文件。
paddle/operators/math/math_function.cu
浏览文件 @
8de4e3bd
...
...
@@ -26,6 +26,7 @@ void gemm<platform::GPUPlace, float>(
platform
::
DeviceContext
*
context
)
{
// Note that cublas follows fortran order, so the order is different from
// the cblas convention.
/*
cublasOperation_t cuTransA =
(transA == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T;
cublasOperation_t cuTransB =
...
...
@@ -34,6 +35,8 @@ void gemm<platform::GPUPlace, float>(
PADDLE_ENFORCE(platform::dynload::cublasSgemm(
reinterpret_cast<platform::CUDADeviceContext*>(context)->cublas_handle(),
cuTransB, cuTransA, N, M, K, &alpha, B, ldb, A, lda, &beta, C, ldc));
*/
PADDLE_THROW
(
"not implemented now"
);
}
template
<
>
...
...
@@ -44,6 +47,7 @@ void gemm<platform::GPUPlace, double>(
const
int
ldc
,
platform
::
DeviceContext
*
context
)
{
// Note that cublas follows fortran order, so the order is different from
// the cblas convention.
/*
cublasOperation_t cuTransA =
(transA == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T;
cublasOperation_t cuTransB =
...
...
@@ -51,6 +55,8 @@ void gemm<platform::GPUPlace, double>(
PADDLE_ENFORCE(platform::dynload::cublasDgemm(
reinterpret_cast<platform::CUDADeviceContext*>(context)->cublas_handle(),
cuTransB, cuTransA, N, M, K, &alpha, B, ldb, A, lda, &beta, C, ldc));
*/
PADDLE_THROW
(
"not implemented now"
);
}
template
<
>
...
...
paddle/operators/math/math_function.h
浏览文件 @
8de4e3bd
...
...
@@ -40,36 +40,23 @@ extern "C" {
#include <cmath>
#include "paddle/framework/tensor.h"
#include "paddle/platform/device_context.h"
#include "paddle/platform/enforce.h"
namespace
paddle
{
namespace
operators
{
namespace
math
{
template
<
typename
Place
,
typename
T
>
void
gemm
(
const
CBLAS_TRANSPOSE
transA
,
const
CBLAS_TRANSPOSE
transB
,
const
int
M
,
const
int
N
,
const
int
K
,
const
T
alpha
,
const
T
*
A
,
const
int
lda
,
const
T
*
B
,
const
int
ldb
,
const
T
beta
,
T
*
C
,
const
int
ldc
,
platform
::
DeviceContext
*
context
);
void
gemm
(
const
CBLAS_TRANSPOSE
transA
,
const
CBLAS_TRANSPOSE
transB
,
const
int
M
,
const
int
N
,
const
int
K
,
const
T
alpha
,
const
T
*
A
,
const
int
lda
,
const
T
*
B
,
const
int
ldb
,
const
T
beta
,
T
*
C
,
const
int
ldc
,
platform
::
DeviceContext
*
context
);
// matrix multiply with continous memory
template
<
typename
Place
,
typename
T
>
void
matmul
(
const
framework
::
Tensor
&
in1
,
bool
in1_T
,
const
framework
::
Tensor
&
in2
,
bool
in2_T
,
float
alpha
,
framework
::
Tensor
*
out
,
float
beta
,
void
matmul
(
const
framework
::
Tensor
&
in1
,
bool
in1_T
,
const
framework
::
Tensor
&
in2
,
bool
in2_T
,
float
alpha
,
framework
::
Tensor
*
out
,
float
beta
,
platform
::
DeviceContext
*
context
);
}
// namespace math
...
...
paddle/operators/mul_op.cu
浏览文件 @
8de4e3bd
...
...
@@ -15,4 +15,5 @@
#define EIGEN_USE_GPU
#include "paddle/operators/mul_op.h"
REGISTER_OP_GPU_KERNEL
(
mul
,
ops
::
MulKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
// REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel<paddle::platform::GPUPlace,
// float>);
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录