Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
2803cf57
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2803cf57
编写于
12月 18, 2018
作者:
Y
Yu Yang
提交者:
GitHub
12月 18, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #14868 from reyoung/feature/refine_w2v
Feature/refine w2v
上级
6aa6b8cf
4de1a8bd
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
409 addition
and
222 deletion
+409
-222
cmake/external/python.cmake
cmake/external/python.cmake
+2
-3
paddle/fluid/operators/hierarchical_sigmoid_op.h
paddle/fluid/operators/hierarchical_sigmoid_op.h
+18
-10
paddle/fluid/operators/math/blas.h
paddle/fluid/operators/math/blas.h
+8
-0
paddle/fluid/operators/math/blas_impl.h
paddle/fluid/operators/math/blas_impl.h
+21
-0
paddle/fluid/operators/math/matrix_bit_code.cc
paddle/fluid/operators/math/matrix_bit_code.cc
+321
-159
paddle/fluid/operators/math/matrix_bit_code.h
paddle/fluid/operators/math/matrix_bit_code.h
+37
-50
paddle/fluid/platform/dynload/mklml.h
paddle/fluid/platform/dynload/mklml.h
+2
-0
未找到文件。
cmake/external/python.cmake
浏览文件 @
2803cf57
...
@@ -18,8 +18,8 @@ ENDIF()
...
@@ -18,8 +18,8 @@ ENDIF()
INCLUDE
(
python_module
)
INCLUDE
(
python_module
)
FIND_PACKAGE
(
PythonInterp
${
PY_VERSION
}
)
FIND_PACKAGE
(
PythonInterp
${
PY_VERSION
}
REQUIRED
)
FIND_PACKAGE
(
PythonLibs
${
PY_VERSION
}
)
FIND_PACKAGE
(
PythonLibs
${
PY_VERSION
}
REQUIRED
)
if
(
WIN32
)
if
(
WIN32
)
execute_process
(
COMMAND
"
${
PYTHON_EXECUTABLE
}
"
"-c"
execute_process
(
COMMAND
"
${
PYTHON_EXECUTABLE
}
"
"-c"
...
@@ -79,6 +79,5 @@ IF(PYTHONINTERP_FOUND)
...
@@ -79,6 +79,5 @@ IF(PYTHONINTERP_FOUND)
"please use pip to upgrade protobuf. pip install -U protobuf"
)
"please use pip to upgrade protobuf. pip install -U protobuf"
)
ENDIF
()
ENDIF
()
ENDIF
(
PYTHONINTERP_FOUND
)
ENDIF
(
PYTHONINTERP_FOUND
)
INCLUDE_DIRECTORIES
(
${
PYTHON_INCLUDE_DIR
}
)
INCLUDE_DIRECTORIES
(
${
PYTHON_INCLUDE_DIR
}
)
INCLUDE_DIRECTORIES
(
${
PYTHON_NUMPY_INCLUDE_DIR
}
)
INCLUDE_DIRECTORIES
(
${
PYTHON_NUMPY_INCLUDE_DIR
}
)
paddle/fluid/operators/hierarchical_sigmoid_op.h
浏览文件 @
2803cf57
...
@@ -150,19 +150,27 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
...
@@ -150,19 +150,27 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
label
.
data
<
int64_t
>
()));
label
.
data
<
int64_t
>
()));
}
}
auto
&
place
=
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
// softrelu derivative
auto
pre_out_mat
=
EigenMatrix
<
T
>::
From
(
pre_out
);
auto
pre_out_grad_mat
=
EigenMatrix
<
T
>::
From
(
pre_out_grad
);
auto
out_grad_mat
=
EigenMatrix
<
T
>::
From
(
out_grad
);
Eigen
::
array
<
int
,
2
>
bcast
{
1
,
static_cast
<
int
>
(
pre_out_grad
.
dims
()[
1
])}
;
auto
blas
=
math
::
GetBlas
<
DeviceContext
,
T
>
(
ctx
)
;
// softrelu derivative
auto
*
pre_out_grad_data
=
pre_out_grad
.
data
<
T
>
();
pre_out_grad_mat
.
device
(
place
)
=
auto
*
pre_out_data
=
pre_out
.
data
<
T
>
();
static_cast
<
T
>
(
1.0
)
-
static_cast
<
T
>
(
1.0
)
/
pre_out_mat
.
exp
();
auto
n
=
pre_out
.
numel
();
blas
.
VEXP
(
n
,
pre_out_data
,
pre_out_grad_data
);
blas
.
VINV
(
n
,
pre_out_grad_data
,
pre_out_grad_data
);
for
(
int64_t
i
=
0
;
i
<
n
;
++
i
)
{
pre_out_grad_data
[
i
]
=
1.0
-
pre_out_grad_data
[
i
];
}
bit_code
->
Sub
(
&
pre_out_grad
);
// the gradient of clip(w * x + b)
bit_code
->
Sub
(
&
pre_out_grad
);
// the gradient of clip(w * x + b)
pre_out_grad_mat
.
device
(
place
)
=
auto
*
out_grad_data
=
out_grad
.
data
<
T
>
();
pre_out_grad_mat
*
out_grad_mat
.
broadcast
(
bcast
);
int64_t
dim0
=
pre_out_grad
.
dims
()[
0
];
int64_t
dim1
=
pre_out_grad
.
dims
()[
1
];
for
(
int64_t
i
=
0
;
i
<
dim0
;
++
i
)
{
T
tmp
=
out_grad_data
[
i
];
blas
.
SCAL
(
dim1
,
tmp
,
pre_out_grad_data
+
i
*
dim1
);
}
// TODO(guosheng): multiply pre_out_grad with subgradient of clipping to
// TODO(guosheng): multiply pre_out_grad with subgradient of clipping to
// be consistent with the clipping in forward.
// be consistent with the clipping in forward.
...
...
paddle/fluid/operators/math/blas.h
浏览文件 @
2803cf57
...
@@ -181,6 +181,9 @@ class Blas {
...
@@ -181,6 +181,9 @@ class Blas {
const
framework
::
Tensor
&
mat_b
,
const
MatDescriptor
&
dim_b
,
const
framework
::
Tensor
&
mat_b
,
const
MatDescriptor
&
dim_b
,
T
alpha
,
framework
::
Tensor
*
mat_out
,
T
beta
)
const
;
T
alpha
,
framework
::
Tensor
*
mat_out
,
T
beta
)
const
;
template
<
typename
T
>
void
VINV
(
int
n
,
const
T
*
a
,
T
*
y
)
const
;
private:
private:
const
DeviceContext
&
context_
;
const
DeviceContext
&
context_
;
};
};
...
@@ -282,6 +285,11 @@ class BlasT : private Blas<DeviceContext> {
...
@@ -282,6 +285,11 @@ class BlasT : private Blas<DeviceContext> {
Base
()
->
template
BatchedGEMM
<
T
>(
args
...);
Base
()
->
template
BatchedGEMM
<
T
>(
args
...);
}
}
template
<
typename
...
ARGS
>
void
VINV
(
ARGS
...
args
)
const
{
Base
()
->
template
VINV
<
T
>(
args
...);
}
private:
private:
const
Blas
<
DeviceContext
>*
Base
()
const
{
const
Blas
<
DeviceContext
>*
Base
()
const
{
return
static_cast
<
const
Blas
<
DeviceContext
>*>
(
this
);
return
static_cast
<
const
Blas
<
DeviceContext
>*>
(
this
);
...
...
paddle/fluid/operators/math/blas_impl.h
浏览文件 @
2803cf57
...
@@ -118,6 +118,11 @@ struct CBlas<float> {
...
@@ -118,6 +118,11 @@ struct CBlas<float> {
static
void
VPOW
(
ARGS
...
args
)
{
static
void
VPOW
(
ARGS
...
args
)
{
platform
::
dynload
::
vsPowx
(
args
...);
platform
::
dynload
::
vsPowx
(
args
...);
}
}
template
<
typename
...
ARGS
>
static
void
VINV
(
ARGS
...
args
)
{
platform
::
dynload
::
vsInv
(
args
...);
}
};
};
template
<
>
template
<
>
...
@@ -213,6 +218,11 @@ struct CBlas<double> {
...
@@ -213,6 +218,11 @@ struct CBlas<double> {
static
void
VPOW
(
ARGS
...
args
)
{
static
void
VPOW
(
ARGS
...
args
)
{
platform
::
dynload
::
vdPowx
(
args
...);
platform
::
dynload
::
vdPowx
(
args
...);
}
}
template
<
typename
...
ARGS
>
static
void
VINV
(
ARGS
...
args
)
{
platform
::
dynload
::
vdInv
(
args
...);
}
};
};
#else
#else
...
@@ -603,6 +613,17 @@ void Blas<DeviceContext>::MatMul(const framework::Tensor &mat_a,
...
@@ -603,6 +613,17 @@ void Blas<DeviceContext>::MatMul(const framework::Tensor &mat_a,
dim_a
.
stride_
,
dim_b
.
stride_
);
dim_a
.
stride_
,
dim_b
.
stride_
);
}
}
}
}
template
<
typename
DeviceContext
>
template
<
typename
T
>
void
Blas
<
DeviceContext
>::
VINV
(
int
n
,
const
T
*
a
,
T
*
y
)
const
{
#ifdef PADDLE_WITH_MKLML
CBlas
<
T
>::
VINV
(
n
,
a
,
y
);
#else
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
y
[
i
]
=
1.0
/
a
[
i
];
}
#endif
}
}
// namespace math
}
// namespace math
}
// namespace operators
}
// namespace operators
...
...
paddle/fluid/operators/math/matrix_bit_code.cc
浏览文件 @
2803cf57
...
@@ -14,218 +14,380 @@ limitations under the License. */
...
@@ -14,218 +14,380 @@ limitations under the License. */
#include "paddle/fluid/operators/math/matrix_bit_code.h"
#include "paddle/fluid/operators/math/matrix_bit_code.h"
#include <iostream>
#include <iostream>
#include <map>
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{
template
<
typename
T
>
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Add
(
const
framework
::
Tensor
&
vec
,
struct
MatrixBitCodeFunctorAdd
:
public
boost
::
static_visitor
<
void
>
{
framework
::
Tensor
*
tmat
)
{
const
framework
::
Tensor
&
vec_
;
size_t
batch_size
=
tmat
->
dims
()[
0
];
framework
::
Tensor
*
tmat_
;
size_t
width
=
tmat
->
dims
()[
1
];
for
(
size_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
MatrixBitCodeFunctorAdd
(
const
framework
::
Tensor
&
vec
,
framework
::
Tensor
*
tmat
)
auto
code
=
code_table_
->
get_code
(
i
);
:
vec_
(
vec
),
tmat_
(
tmat
)
{}
int
code_length
=
code
->
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
template
<
typename
CodeTable
>
size_t
index
=
code
->
calc_index
(
j
);
void
operator
()(
const
CodeTable
&
code_table
)
{
tmat
->
data
<
T
>
()[
i
*
width
+
j
]
+=
vec
.
data
<
T
>
()[
index
];
size_t
batch_size
=
tmat_
->
dims
()[
0
];
size_t
width
=
tmat_
->
dims
()[
1
];
auto
*
tmat_data
=
tmat_
->
data
<
T
>
();
auto
*
vec_data
=
vec_
.
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
auto
code
=
code_table
.
get_code
(
i
);
int
code_length
=
code
.
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
.
calc_index
(
j
);
tmat_data
[
i
*
width
+
j
]
+=
vec_data
[
index
];
}
}
}
}
}
};
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Add
(
const
framework
::
Tensor
&
vec
,
framework
::
Tensor
*
tmat
)
{
MatrixBitCodeFunctorAdd
<
T
>
func
(
vec
,
tmat
);
code_table_
.
apply_visitor
(
func
);
}
}
template
<
typename
T
>
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
AddGrad
(
const
framework
::
Tensor
&
tmat
,
struct
MatrixBitCodeFunctorAddGrad
:
public
boost
::
static_visitor
<
void
>
{
framework
::
Tensor
*
vec
)
{
const
framework
::
Tensor
&
tmat_
;
size_t
batch_size
=
tmat
.
dims
()[
0
];
framework
::
Tensor
*
vec_
;
size_t
width
=
tmat
.
dims
()[
1
];
MatrixBitCodeFunctorAddGrad
(
const
framework
::
Tensor
&
tmat
,
for
(
size_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
framework
::
Tensor
*
vec
)
auto
code
=
code_table_
->
get_code
(
i
);
:
tmat_
(
tmat
),
vec_
(
vec
)
{}
int
code_length
=
code
->
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
template
<
typename
CodeTable
>
size_t
index
=
code
->
calc_index
(
j
);
void
operator
()(
const
CodeTable
&
table
)
{
vec
->
data
<
T
>
()[
index
]
+=
tmat
.
data
<
T
>
()[
i
*
width
+
j
];
size_t
batch_size
=
tmat_
.
dims
()[
0
];
size_t
width
=
tmat_
.
dims
()[
1
];
auto
*
vec_data
=
vec_
->
data
<
T
>
();
auto
*
tmat_data
=
tmat_
.
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
auto
code
=
table
.
get_code
(
i
);
int
code_length
=
code
.
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
.
calc_index
(
j
);
vec_data
[
index
]
+=
tmat_data
[
i
*
width
+
j
];
}
}
}
}
}
};
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
AddGrad
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
*
vec
)
{
MatrixBitCodeFunctorAddGrad
<
T
>
func
(
tmat
,
vec
);
code_table_
.
apply_visitor
(
func
);
}
}
template
<
typename
T
>
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
AddGrad
(
const
framework
::
Tensor
&
tmat
,
struct
MatrixBitCodeFunctorSelectedRowsAddGrad
framework
::
SelectedRows
*
vec
)
{
:
public
boost
::
static_visitor
<
void
>
{
size_t
batch_size
=
tmat
.
dims
()[
0
];
const
framework
::
Tensor
&
tmat_
;
size_t
width
=
tmat
.
dims
()[
1
];
framework
::
SelectedRows
*
vec_
;
for
(
size_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
auto
code
=
code_table_
->
get_code
(
i
);
MatrixBitCodeFunctorSelectedRowsAddGrad
(
const
framework
::
Tensor
&
tmat
,
int
code_length
=
code
->
get_length
();
framework
::
SelectedRows
*
vec
)
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
:
tmat_
(
tmat
),
vec_
(
vec
)
{}
size_t
index
=
code
->
calc_index
(
j
);
int64_t
row_index
=
vec
->
GetIndexFromId
(
static_cast
<
int64_t
>
(
index
));
template
<
typename
CodeTable
>
vec
->
mutable_value
()
->
data
<
T
>
()[
row_index
]
+=
void
operator
()(
const
CodeTable
&
code_table
)
{
tmat
.
data
<
T
>
()[
i
*
width
+
j
];
size_t
batch_size
=
tmat_
.
dims
()[
0
];
size_t
width
=
tmat_
.
dims
()[
1
];
auto
*
vec_data
=
vec_
->
mutable_value
()
->
template
data
<
T
>();
auto
*
tmat_data
=
tmat_
.
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
auto
code
=
code_table
.
get_code
(
i
);
int
code_length
=
code
.
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
.
calc_index
(
j
);
int64_t
row_index
=
vec_
->
GetIndexFromId
(
static_cast
<
int64_t
>
(
index
));
vec_data
[
row_index
]
+=
tmat_data
[
i
*
width
+
j
];
}
}
}
}
}
};
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
AddGrad
(
const
framework
::
Tensor
&
tmat
,
framework
::
SelectedRows
*
vec
)
{
MatrixBitCodeFunctorSelectedRowsAddGrad
<
T
>
func
(
tmat
,
vec
);
code_table_
.
apply_visitor
(
func
);
}
}
template
<
typename
T
>
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Sum
(
const
framework
::
Tensor
&
tmat
,
struct
MatrixBitCodeFunctorSum
:
public
boost
::
static_visitor
<
void
>
{
framework
::
Tensor
*
sum
,
T
scale_sum
)
{
const
framework
::
Tensor
&
tmat_
;
size_t
num_samples
=
tmat
.
dims
()[
0
];
framework
::
Tensor
*
sum_
;
size_t
o_width
=
tmat
.
dims
()[
1
];
T
scale_sum_
;
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
T
sm
=
static_cast
<
T
>
(
0.0
);
MatrixBitCodeFunctorSum
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
*
sum
,
auto
code
=
code_table_
->
get_code
(
i
);
T
scale_sum
)
int
code_length
=
code
->
get_length
();
:
tmat_
(
tmat
),
sum_
(
sum
),
scale_sum_
(
scale_sum
)
{}
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
if
(
code
->
calc_bit
(
j
))
{
template
<
typename
CodeTable
>
// calc_bit starts from right most bit, while data in tmat[i] is in the
void
operator
()(
const
CodeTable
&
code_table
)
{
// reverse order.
size_t
num_samples
=
tmat_
.
dims
()[
0
];
sm
+=
tmat
.
data
<
T
>
()[
i
*
o_width
+
j
];
size_t
o_width
=
tmat_
.
dims
()[
1
];
auto
*
tmat_data
=
tmat_
.
data
<
T
>
();
auto
*
sum_data
=
sum_
->
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
T
sm
=
static_cast
<
T
>
(
0.0
);
auto
code
=
code_table
.
get_code
(
i
);
int
code_length
=
code
.
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
if
(
code
.
calc_bit
(
j
))
{
// calc_bit starts from right most bit, while data in tmat[i] is in
// the
// reverse order.
sm
+=
tmat_data
[
i
*
o_width
+
j
];
}
}
}
sum_data
[
i
]
=
scale_sum_
*
sm
;
}
}
sum
->
data
<
T
>
()[
i
]
=
scale_sum
*
sm
;
}
}
};
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Sum
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
*
sum
,
T
scale_sum
)
{
MatrixBitCodeFunctorSum
<
T
>
func
(
tmat
,
sum
,
scale_sum
);
code_table_
.
apply_visitor
(
func
);
}
}
template
<
typename
T
>
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Mul
(
framework
::
Tensor
*
tmat
,
struct
MatrixBitCodeFunctorMul
:
public
boost
::
static_visitor
<
void
>
{
const
framework
::
Tensor
&
weight
,
framework
::
Tensor
*
tmat_
;
const
framework
::
Tensor
&
input
)
{
const
framework
::
Tensor
&
weight_
;
auto
blas
=
const
framework
::
Tensor
&
input_
;
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
platform
::
CPUDeviceContext
());
size_t
num_samples
=
tmat
->
dims
()[
0
];
MatrixBitCodeFunctorMul
(
framework
::
Tensor
*
tmat
,
size_t
tmat_width
=
tmat
->
dims
()[
1
];
const
framework
::
Tensor
&
weight
,
size_t
input_width
=
input
.
dims
()[
1
];
const
framework
::
Tensor
&
input
)
size_t
weight_width
=
weight
.
dims
()[
1
];
:
tmat_
(
tmat
),
weight_
(
weight
),
input_
(
input
)
{}
auto
tmat_value
=
tmat
->
data
<
T
>
();
auto
weight_value
=
weight
.
data
<
T
>
();
template
<
typename
CodeTable
>
auto
input_value
=
input
.
data
<
T
>
();
void
operator
()(
const
CodeTable
&
code_table
)
{
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
blas
=
auto
code
=
code_table_
->
get_code
(
i
);
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
platform
::
CPUDeviceContext
());
int
code_length
=
code
->
get_length
();
size_t
num_samples
=
tmat_
->
dims
()[
0
];
const
T
*
input_row
=
input_value
+
input_width
*
i
;
size_t
tmat_width
=
tmat_
->
dims
()[
1
];
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
input_width
=
input_
.
dims
()[
1
];
size_t
index
=
code
->
calc_index
(
j
);
size_t
weight_width
=
weight_
.
dims
()[
1
];
const
T
*
weight_row
=
weight_value
+
weight_width
*
index
;
auto
tmat_value
=
tmat_
->
data
<
T
>
();
T
sum
=
static_cast
<
T
>
(
0.0
);
auto
weight_value
=
weight_
.
data
<
T
>
();
sum
=
blas
.
DOT
(
input_width
,
weight_row
,
input_row
);
auto
input_value
=
input_
.
data
<
T
>
();
tmat_value
[
i
*
tmat_width
+
j
]
+=
sum
;
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table
.
get_code
(
i
);
int
code_length
=
code
.
get_length
();
const
T
*
input_row
=
input_value
+
input_width
*
i
;
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
.
calc_index
(
j
);
const
T
*
weight_row
=
weight_value
+
weight_width
*
index
;
T
sum
=
blas
.
DOT
(
input_width
,
weight_row
,
input_row
);
tmat_value
[
i
*
tmat_width
+
j
]
+=
sum
;
}
}
}
}
}
};
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Mul
(
framework
::
Tensor
*
tmat
,
const
framework
::
Tensor
&
weight
,
const
framework
::
Tensor
&
input
)
{
MatrixBitCodeFunctorMul
<
T
>
func
(
tmat
,
weight
,
input
);
code_table_
.
apply_visitor
(
func
);
}
}
template
<
typename
T
,
size_t
N
>
class
ReservedVector
:
public
std
::
vector
<
T
>
{
public:
ReservedVector
()
{
this
->
reserve
(
N
);
}
};
template
<
typename
T
>
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
MulGradWeight
(
const
framework
::
Tensor
&
tmat
,
struct
MatrixBitCodeFunctorMulGradWeight
:
public
boost
::
static_visitor
<
void
>
{
framework
::
Tensor
*
weight
,
const
framework
::
Tensor
&
tmat_
;
const
framework
::
Tensor
&
input
)
{
framework
::
Tensor
*
weight_
;
auto
blas
=
const
framework
::
Tensor
&
input_
;
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
platform
::
CPUDeviceContext
());
MatrixBitCodeFunctorMulGradWeight
(
const
framework
::
Tensor
&
tmat
,
size_t
num_samples
=
tmat
.
dims
()[
0
];
framework
::
Tensor
*
weight
,
size_t
input_width
=
input
.
dims
()[
1
];
const
framework
::
Tensor
&
input
)
size_t
tmat_width
=
tmat
.
dims
()[
1
];
:
tmat_
(
tmat
),
weight_
(
weight
),
input_
(
input
)
{}
size_t
weight_width
=
weight
->
dims
()[
1
];
template
<
typename
CodeTable
>
auto
tmat_value
=
tmat
.
data
<
T
>
();
void
operator
()(
const
CodeTable
&
code_table
)
{
auto
weight_value
=
weight
->
data
<
T
>
();
auto
blas
=
auto
input_value
=
input
.
data
<
T
>
();
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
platform
::
CPUDeviceContext
());
size_t
num_samples
=
tmat_
.
dims
()[
0
];
std
::
unordered_map
<
int
,
std
::
vector
<
std
::
pair
<
T
,
const
T
*>>>
ops
;
size_t
input_width
=
input_
.
dims
()[
1
];
size_t
tmat_width
=
tmat_
.
dims
()[
1
];
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
size_t
weight_width
=
weight_
->
dims
()[
1
];
auto
code
=
code_table_
->
get_code
(
i
);
auto
tmat_value
=
tmat_
.
data
<
T
>
();
int
code_length
=
code
->
get_length
();
auto
weight_value
=
weight_
->
data
<
T
>
();
const
T
*
input_value_row
=
input_value
+
input_width
*
i
;
auto
input_value
=
input_
.
data
<
T
>
();
const
T
*
tmat_row
=
tmat_value
+
i
*
tmat_width
;
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
std
::
map
<
int
,
ReservedVector
<
std
::
pair
<
T
,
const
T
*>
,
8u
>>
ops
;
ops
[
code
->
calc_index
(
j
)].
emplace_back
(
tmat_row
[
j
],
input_value_row
);
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table
.
get_code
(
i
);
int
code_length
=
code
.
get_length
();
const
T
*
input_value_row
=
input_value
+
input_width
*
i
;
const
T
*
tmat_row
=
tmat_value
+
i
*
tmat_width
;
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
ops
[
code
.
calc_index
(
j
)].
emplace_back
(
tmat_row
[
j
],
input_value_row
);
}
}
}
}
for
(
auto
&
op
:
ops
)
{
for
(
auto
&
op
:
ops
)
{
auto
&
op_in_row
=
op
.
second
;
auto
&
op_in_row
=
op
.
second
;
for
(
auto
&
pair
:
op_in_row
)
{
for
(
auto
&
pair
:
op_in_row
)
{
auto
&
scale
=
pair
.
first
;
auto
&
scale
=
pair
.
first
;
auto
*
input_row
=
pair
.
second
;
auto
*
input_row
=
pair
.
second
;
T
*
weight_row
=
weight_value
+
op
.
first
*
weight_width
;
T
*
weight_row
=
weight_value
+
op
.
first
*
weight_width
;
blas
.
AXPY
(
input_width
,
scale
,
input_row
,
weight_row
)
;
blas
.
AXPY
(
input_width
,
scale
,
input_row
,
weight_row
);
}
}
}
}
}
};
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
MulGradWeight
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
*
weight
,
const
framework
::
Tensor
&
input
)
{
MatrixBitCodeFunctorMulGradWeight
<
T
>
func
(
tmat
,
weight
,
input
);
code_table_
.
apply_visitor
(
func
);
}
}
template
<
typename
T
>
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
MulGradWeight
(
const
framework
::
Tensor
&
tmat
,
struct
MatrixBitCodeFunctorMulGradWeightSR
framework
::
SelectedRows
*
weight
,
:
public
boost
::
static_visitor
<
void
>
{
const
framework
::
Tensor
&
input
)
{
const
framework
::
Tensor
&
tmat_
;
auto
blas
=
framework
::
SelectedRows
*
weight_
;
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
platform
::
CPUDeviceContext
());
const
framework
::
Tensor
&
input_
;
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
input_width
=
input
.
dims
()[
1
];
MatrixBitCodeFunctorMulGradWeightSR
(
const
framework
::
Tensor
&
tmat
,
size_t
tmat_width
=
tmat
.
dims
()[
1
];
framework
::
SelectedRows
*
weight
,
size_t
weight_width
=
weight
->
value
().
dims
()[
1
];
const
framework
::
Tensor
&
input
)
auto
tmat_value
=
tmat
.
data
<
T
>
();
:
tmat_
(
tmat
),
weight_
(
weight
),
input_
(
input
)
{}
auto
weight_value
=
weight
->
mutable_value
()
->
data
<
T
>
();
auto
input_value
=
input
.
data
<
T
>
();
template
<
typename
CodeTable
>
void
operator
()(
const
CodeTable
&
code_table
)
{
std
::
unordered_map
<
int
,
std
::
vector
<
std
::
pair
<
T
,
const
T
*>>>
ops
;
auto
blas
=
ops
.
reserve
(
weight
->
rows
().
size
());
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
platform
::
CPUDeviceContext
());
size_t
num_samples
=
tmat_
.
dims
()[
0
];
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
size_t
input_width
=
input_
.
dims
()[
1
];
auto
code
=
code_table_
->
get_code
(
i
);
size_t
tmat_width
=
tmat_
.
dims
()[
1
];
int
code_length
=
code
->
get_length
();
size_t
weight_width
=
weight_
->
value
().
dims
()[
1
];
const
T
*
input_value_row
=
input_value
+
input_width
*
i
;
auto
tmat_value
=
tmat_
.
data
<
T
>
();
const
T
*
tmat_row
=
tmat_value
+
i
*
tmat_width
;
auto
weight_value
=
weight_
->
mutable_value
()
->
data
<
T
>
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
auto
input_value
=
input_
.
data
<
T
>
();
ops
[
code
->
calc_index
(
j
)].
emplace_back
(
tmat_row
[
j
],
input_value_row
);
std
::
unordered_map
<
int
,
std
::
vector
<
std
::
pair
<
T
,
const
T
*>>>
ops
;
ops
.
reserve
(
weight_
->
rows
().
size
());
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table
.
get_code
(
i
);
int
code_length
=
code
.
get_length
();
const
T
*
input_value_row
=
input_value
+
input_width
*
i
;
const
T
*
tmat_row
=
tmat_value
+
i
*
tmat_width
;
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
ops
[
code
.
calc_index
(
j
)].
emplace_back
(
tmat_row
[
j
],
input_value_row
);
}
}
}
}
for
(
auto
&
row
:
weight
->
rows
())
{
for
(
auto
&
row
:
weight_
->
rows
())
{
auto
&
op_in_row
=
ops
[
row
];
auto
&
op_in_row
=
ops
[
row
];
for
(
auto
&
pair
:
op_in_row
)
{
for
(
auto
&
pair
:
op_in_row
)
{
auto
&
scale
=
pair
.
first
;
auto
&
scale
=
pair
.
first
;
auto
*
input_row
=
pair
.
second
;
auto
*
input_row
=
pair
.
second
;
blas
.
AXPY
(
input_width
,
scale
,
input_row
,
weight_value
);
blas
.
AXPY
(
input_width
,
scale
,
input_row
,
weight_value
);
}
weight_value
+=
weight_width
;
}
}
weight_value
+=
weight_width
;
}
}
};
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
MulGradWeight
(
const
framework
::
Tensor
&
tmat
,
framework
::
SelectedRows
*
weight
,
const
framework
::
Tensor
&
input
)
{
MatrixBitCodeFunctorMulGradWeightSR
<
T
>
func
(
tmat
,
weight
,
input
);
code_table_
.
apply_visitor
(
func
);
}
}
template
<
typename
T
>
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
MulGradError
(
const
framework
::
Tensor
&
tmat
,
struct
MatrixBitCodeFunctorMulGradError
:
public
boost
::
static_visitor
<
void
>
{
const
framework
::
Tensor
&
weight
,
const
framework
::
Tensor
&
tmat_
;
framework
::
Tensor
*
input
)
{
const
framework
::
Tensor
&
weight_
;
size_t
num_samples
=
tmat
.
dims
()[
0
];
framework
::
Tensor
*
input_
;
size_t
tmat_width
=
tmat
.
dims
()[
1
];
size_t
input_width
=
input
->
dims
()[
1
];
MatrixBitCodeFunctorMulGradError
(
const
framework
::
Tensor
&
tmat
,
size_t
weight_width
=
weight
.
dims
()[
1
];
const
framework
::
Tensor
&
weight
,
auto
tmat_value
=
tmat
.
data
<
T
>
();
framework
::
Tensor
*
input
)
auto
weight_value
=
weight
.
data
<
T
>
();
:
tmat_
(
tmat
),
weight_
(
weight
),
input_
(
input
)
{}
auto
input_value
=
input
->
data
<
T
>
();
template
<
typename
CodeTable
>
void
operator
()(
const
CodeTable
&
code_table
)
{
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
size_t
num_samples
=
tmat_
.
dims
()[
0
];
auto
code
=
code_table_
->
get_code
(
i
);
size_t
tmat_width
=
tmat_
.
dims
()[
1
];
int
code_length
=
code
->
get_length
();
size_t
input_width
=
input_
->
dims
()[
1
];
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
weight_width
=
weight_
.
dims
()[
1
];
size_t
index
=
code
->
calc_index
(
j
);
auto
tmat_value
=
tmat_
.
data
<
T
>
();
auto
weight_value
=
weight_
.
data
<
T
>
();
for
(
size_t
k
=
0
;
k
<
input_width
;
++
k
)
{
auto
input_value
=
input_
->
data
<
T
>
();
input_value
[
input_width
*
i
+
k
]
+=
tmat_value
[
i
*
tmat_width
+
j
]
*
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
weight_value
[
weight_width
*
index
+
k
];
auto
code
=
code_table
.
get_code
(
i
);
int
code_length
=
code
.
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
.
calc_index
(
j
);
for
(
size_t
k
=
0
;
k
<
input_width
;
++
k
)
{
input_value
[
input_width
*
i
+
k
]
+=
tmat_value
[
i
*
tmat_width
+
j
]
*
weight_value
[
weight_width
*
index
+
k
];
}
}
}
}
}
}
}
};
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
MulGradError
(
const
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
weight
,
framework
::
Tensor
*
input
)
{
MatrixBitCodeFunctorMulGradError
<
T
>
func
(
tmat
,
weight
,
input
);
code_table_
.
apply_visitor
(
func
);
}
}
template
<
typename
T
>
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Sub
(
framework
::
Tensor
*
tmat
)
{
struct
MatrixBitCodeFunctorSub
:
public
boost
::
static_visitor
<
void
>
{
size_t
num_samples
=
tmat
->
dims
()[
0
];
framework
::
Tensor
*
tmat_
;
size_t
o_width
=
tmat
->
dims
()[
1
];
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
explicit
MatrixBitCodeFunctorSub
(
framework
::
Tensor
*
tmat
)
:
tmat_
(
tmat
)
{}
auto
code
=
code_table_
->
get_code
(
i
);
int
code_length
=
code
->
get_length
();
template
<
typename
CodeTable
>
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
void
operator
()(
const
CodeTable
&
code_table
)
{
if
(
code
->
calc_bit
(
j
))
{
size_t
num_samples
=
tmat_
->
dims
()[
0
];
tmat
->
data
<
T
>
()[
i
*
o_width
+
j
]
-=
1
;
size_t
o_width
=
tmat_
->
dims
()[
1
];
auto
*
tmat_data
=
tmat_
->
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table
.
get_code
(
i
);
int
code_length
=
code
.
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
if
(
code
.
calc_bit
(
j
))
{
tmat_data
[
i
*
o_width
+
j
]
-=
1
;
}
}
}
}
}
}
}
};
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Sub
(
framework
::
Tensor
*
tmat
)
{
MatrixBitCodeFunctorSub
<
T
>
func
(
tmat
);
code_table_
.
apply_visitor
(
func
);
}
}
template
class
MatrixBitCodeFunctor
<
float
>;
template
class
MatrixBitCodeFunctor
<
float
>;
...
...
paddle/fluid/operators/math/matrix_bit_code.h
浏览文件 @
2803cf57
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#include <map>
#include <unordered_map>
#include <unordered_map>
#include <utility>
#include <utility>
#include <vector>
#include <vector>
...
@@ -22,6 +23,7 @@ limitations under the License. */
...
@@ -22,6 +23,7 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/variant.h"
#if defined(_WIN32)
#if defined(_WIN32)
#include <intrin.h>
#include <intrin.h>
...
@@ -98,24 +100,7 @@ inline int clz(const T& value) {
...
@@ -98,24 +100,7 @@ inline int clz(const T& value) {
inline
size_t
FindLastSet
(
size_t
x
)
{
return
sizeof
(
size_t
)
*
8
-
clz
(
x
);
}
inline
size_t
FindLastSet
(
size_t
x
)
{
return
sizeof
(
size_t
)
*
8
-
clz
(
x
);
}
#endif // !_WIN32
#endif // !_WIN32
// set a code interface to create multiple code
class
SimpleCode
{
class
Code
{
public:
virtual
~
Code
()
{}
virtual
size_t
calc_index
(
int
bit
)
const
=
0
;
virtual
bool
calc_bit
(
int
bit
)
const
=
0
;
virtual
int
get_length
()
const
=
0
;
};
// set a CodeTable interface to create multiple code table
class
CodeTable
{
public:
virtual
std
::
unique_ptr
<
Code
>
get_code
(
int64_t
code
)
const
=
0
;
virtual
size_t
size
()
const
=
0
;
virtual
int
get_max_code_length
()
const
=
0
;
virtual
~
CodeTable
()
{}
};
class
SimpleCode
:
public
Code
{
public:
public:
SimpleCode
(
size_t
code
,
size_t
num_classes
,
const
int64_t
*
ids
)
SimpleCode
(
size_t
code
,
size_t
num_classes
,
const
int64_t
*
ids
)
:
c_
(
static_cast
<
size_t
>
(
ids
[
code
])
+
num_classes
)
{}
:
c_
(
static_cast
<
size_t
>
(
ids
[
code
])
+
num_classes
)
{}
...
@@ -137,16 +122,16 @@ class SimpleCode : public Code {
...
@@ -137,16 +122,16 @@ class SimpleCode : public Code {
};
};
template
<
typename
T
>
template
<
typename
T
>
class
CustomCode
:
public
Code
{
class
CustomCode
{
public:
public:
CustomCode
(
const
framework
::
Tensor
&
ptable
,
const
framework
::
Tensor
&
pcode
,
CustomCode
(
const
framework
::
Tensor
&
ptable
,
const
framework
::
Tensor
&
pcode
,
const
int64_t
*
ids
,
int
index
)
const
int64_t
*
ids
,
int
index
)
{
:
ids_
(
ids
),
index_
(
index
)
{
seq_len_
=
ptable
.
dims
()[
1
];
ptable_
=
ptable
.
Slice
(
index
,
index
+
1
)
;
ptable_
data_
=
ptable
.
data
<
T
>
()
+
seq_len_
*
index
;
pcode_
=
pcode
.
Slice
(
index
,
index
+
1
)
;
pcode_
data_
=
pcode
.
data
<
T
>
()
+
seq_len_
*
index
;
}
}
/**
/**
* Here the id of root shoud be 1 rather than 0, thus the encoding of class c
* Here the id of root shou
l
d be 1 rather than 0, thus the encoding of class c
* is `c + num_classes` and all siblings can get the same weight indice using
* is `c + num_classes` and all siblings can get the same weight indice using
* prefixes.
* prefixes.
* Weight index is the prefixes of encoding, thus leave out the right most
* Weight index is the prefixes of encoding, thus leave out the right most
...
@@ -154,36 +139,37 @@ class CustomCode : public Code {
...
@@ -154,36 +139,37 @@ class CustomCode : public Code {
* Binary classification path is the suffixes of encoding, thus leave out the
* Binary classification path is the suffixes of encoding, thus leave out the
* left most bit in calc_bit.
* left most bit in calc_bit.
*/
*/
size_t
calc_index
(
int
bit
)
const
{
return
ptable_
.
data
<
T
>
()[
bit
];
}
size_t
calc_index
(
int
bit
)
const
{
return
ptable_data_
[
bit
];
}
bool
calc_bit
(
int
bit
)
const
{
return
pcode_
.
data
<
T
>
()[
bit
];
}
bool
calc_bit
(
int
bit
)
const
{
return
pcode_data_
[
bit
];
}
int
get_length
()
const
{
int
length
=
0
;
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
ptable_
.
dims
()[
1
]);
i
++
)
{
// NOTE: this function is not thread-safe.
if
(
ptable_
.
data
<
T
>
()[
i
]
>=
0
)
{
int
get_length
()
const
{
length
++
;
if
(
length_
<
0
)
{
}
else
{
auto
len
=
seq_len_
;
return
length
;
length_
=
}
static_cast
<
int
>
(
std
::
find_if
(
ptable_data_
,
ptable_data_
+
len
,
[](
const
T
&
val
)
{
return
val
<
0
;
})
-
ptable_data_
);
}
}
return
length
;
return
length
_
;
}
}
private:
private:
framework
::
Tensor
ptable
_
;
int64_t
seq_len
_
;
framework
::
Tensor
pcode
_
;
const
T
*
ptable_data
_
;
const
int64_t
*
ids
_
;
const
T
*
pcode_data
_
;
const
int
index_
;
mutable
int
length_
{
-
1
}
;
};
};
class
SimpleCodeTable
:
public
CodeTable
{
class
SimpleCodeTable
{
public:
public:
SimpleCodeTable
(
size_t
num_classes
,
const
int64_t
*
ids
)
SimpleCodeTable
(
size_t
num_classes
,
const
int64_t
*
ids
)
:
num_classes_
(
num_classes
),
ids_
(
ids
)
{}
:
num_classes_
(
num_classes
),
ids_
(
ids
)
{}
std
::
unique_ptr
<
Code
>
get_code
(
int64_t
code
)
const
{
std
::
unique_ptr
<
Code
>
coder
(
new
SimpleCode
(
code
,
num_classes_
,
ids_
));
SimpleCode
get_code
(
int64_t
code
)
const
{
return
coder
;
return
SimpleCode
(
code
,
num_classes_
,
ids_
)
;
}
}
size_t
size
()
const
{
return
num_classes_
;
}
size_t
size
()
const
{
return
num_classes_
;
}
int
get_max_code_length
()
const
{
return
FindLastSet
(
num_classes_
-
1
);
}
int
get_max_code_length
()
const
{
return
FindLastSet
(
num_classes_
-
1
);
}
...
@@ -193,15 +179,14 @@ class SimpleCodeTable : public CodeTable {
...
@@ -193,15 +179,14 @@ class SimpleCodeTable : public CodeTable {
};
};
template
<
typename
T
>
template
<
typename
T
>
class
CustomCodeTable
:
public
CodeTable
{
class
CustomCodeTable
{
public:
public:
CustomCodeTable
(
const
framework
::
Tensor
&
ptable
,
CustomCodeTable
(
const
framework
::
Tensor
&
ptable
,
const
framework
::
Tensor
&
pcode
,
const
int64_t
*
ids
)
const
framework
::
Tensor
&
pcode
,
const
int64_t
*
ids
)
:
ptable_
(
ptable
),
pcode_
(
pcode
),
ids_
(
ids
)
{}
:
ptable_
(
ptable
),
pcode_
(
pcode
),
ids_
(
ids
)
{}
std
::
unique_ptr
<
Code
>
get_code
(
int64_t
code
)
const
{
CustomCode
<
T
>
get_code
(
int64_t
code
)
const
{
std
::
unique_ptr
<
Code
>
coder
(
new
CustomCode
<
T
>
(
ptable_
,
pcode_
,
ids_
,
code
));
return
CustomCode
<
T
>
(
ptable_
,
pcode_
,
ids_
,
code
);
return
coder
;
}
}
size_t
size
()
const
{
return
static_cast
<
size_t
>
(
ptable_
.
dims
()[
1
]);
}
size_t
size
()
const
{
return
static_cast
<
size_t
>
(
ptable_
.
dims
()[
1
]);
}
...
@@ -215,19 +200,21 @@ class CustomCodeTable : public CodeTable {
...
@@ -215,19 +200,21 @@ class CustomCodeTable : public CodeTable {
const
int64_t
*
ids_
;
const
int64_t
*
ids_
;
};
};
using
CodeTable
=
boost
::
variant
<
SimpleCodeTable
,
CustomCodeTable
<
int64_t
>>
;
template
<
typename
T
>
template
<
typename
T
>
class
MatrixBitCodeFunctor
{
class
MatrixBitCodeFunctor
{
public:
public:
MatrixBitCodeFunctor
(
size_t
num_classes
,
const
int64_t
*
ids
)
MatrixBitCodeFunctor
(
size_t
num_classes
,
const
int64_t
*
ids
)
:
num_classes_
(
num_classes
),
:
num_classes_
(
num_classes
),
ids_
(
ids
),
ids_
(
ids
),
code_table_
(
new
SimpleCodeTable
(
num_classes
,
ids
))
{}
code_table_
(
SimpleCodeTable
(
num_classes
,
ids
))
{}
MatrixBitCodeFunctor
(
const
framework
::
Tensor
&
ptable
,
MatrixBitCodeFunctor
(
const
framework
::
Tensor
&
ptable
,
const
framework
::
Tensor
&
pcode
,
const
int64_t
*
ids
)
const
framework
::
Tensor
&
pcode
,
const
int64_t
*
ids
)
:
num_classes_
(
static_cast
<
size_t
>
(
ptable
.
dims
()[
1
])),
:
num_classes_
(
static_cast
<
size_t
>
(
ptable
.
dims
()[
1
])),
ids_
(
ids
),
ids_
(
ids
),
code_table_
(
new
CustomCodeTable
<
int64_t
>
(
ptable
,
pcode
,
ids
))
{}
code_table_
(
CustomCodeTable
<
int64_t
>
(
ptable
,
pcode
,
ids
))
{}
/* For j < code_length
/* For j < code_length
tmat(i, j) += vec(0, index(i, j))
tmat(i, j) += vec(0, index(i, j))
*/
*/
...
@@ -277,7 +264,7 @@ class MatrixBitCodeFunctor {
...
@@ -277,7 +264,7 @@ class MatrixBitCodeFunctor {
size_t
num_classes_
;
size_t
num_classes_
;
const
int64_t
*
ids_
;
const
int64_t
*
ids_
;
std
::
unique_ptr
<
CodeTable
>
code_table_
;
CodeTable
code_table_
;
};
};
}
// namespace math
}
// namespace math
}
// namespace operators
}
// namespace operators
...
...
paddle/fluid/platform/dynload/mklml.h
浏览文件 @
2803cf57
...
@@ -82,6 +82,8 @@ extern void* mklml_dso_handle;
...
@@ -82,6 +82,8 @@ extern void* mklml_dso_handle;
__macro(vdSqr); \
__macro(vdSqr); \
__macro(vsPowx); \
__macro(vsPowx); \
__macro(vdPowx); \
__macro(vdPowx); \
__macro(vsInv); \
__macro(vdInv); \
__macro(MKL_Set_Num_Threads)
__macro(MKL_Set_Num_Threads)
MKLML_ROUTINE_EACH
(
DECLARE_DYNAMIC_LOAD_MKLML_WRAP
);
MKLML_ROUTINE_EACH
(
DECLARE_DYNAMIC_LOAD_MKLML_WRAP
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录