Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
15550a27
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
15550a27
编写于
12月 13, 2018
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Polish code
上级
9e0b33d7
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
340 addition
and
215 deletion
+340
-215
cmake/external/python.cmake
cmake/external/python.cmake
+3
-3
paddle/fluid/operators/math/matrix_bit_code.cc
paddle/fluid/operators/math/matrix_bit_code.cc
+320
-166
paddle/fluid/operators/math/matrix_bit_code.h
paddle/fluid/operators/math/matrix_bit_code.h
+17
-46
未找到文件。
cmake/external/python.cmake
浏览文件 @
15550a27
...
...
@@ -18,8 +18,8 @@ ENDIF()
INCLUDE
(
python_module
)
FIND_PACKAGE
(
PythonInterp
${
PY_VERSION
}
)
FIND_PACKAGE
(
PythonLibs
${
PY_VERSION
}
)
FIND_PACKAGE
(
PythonInterp
${
PY_VERSION
}
REQUIRED
)
FIND_PACKAGE
(
PythonLibs
${
PY_VERSION
}
REQUIRED
)
if
(
WIN32
)
execute_process
(
COMMAND
"
${
PYTHON_EXECUTABLE
}
"
"-c"
...
...
@@ -79,6 +79,6 @@ IF(PYTHONINTERP_FOUND)
"please use pip to upgrade protobuf. pip install -U protobuf"
)
ENDIF
()
ENDIF
(
PYTHONINTERP_FOUND
)
message
(
STATUS
${
PYTHON_INCLUDE_DIR
}
)
INCLUDE_DIRECTORIES
(
${
PYTHON_INCLUDE_DIR
}
)
INCLUDE_DIRECTORIES
(
${
PYTHON_NUMPY_INCLUDE_DIR
}
)
paddle/fluid/operators/math/matrix_bit_code.cc
浏览文件 @
15550a27
...
...
@@ -15,225 +15,379 @@ limitations under the License. */
#include "paddle/fluid/operators/math/matrix_bit_code.h"
#include <iostream>
#include <map>
namespace
paddle
{
namespace
operators
{
namespace
math
{
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Add
(
const
framework
::
Tensor
&
vec
,
framework
::
Tensor
*
tmat
)
{
size_t
batch_size
=
tmat
->
dims
()[
0
];
size_t
width
=
tmat
->
dims
()[
1
];
auto
*
tmat_data
=
tmat
->
data
<
T
>
();
auto
*
vec_data
=
vec
.
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
auto
code
=
code_table_
->
get_code
(
i
);
int
code_length
=
code
->
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
->
calc_index
(
j
);
tmat_data
[
i
*
width
+
j
]
+=
vec_data
[
index
];
struct
MatrixBitCodeFunctorAdd
:
public
boost
::
static_visitor
<
void
>
{
const
framework
::
Tensor
&
vec_
;
framework
::
Tensor
*
tmat_
;
MatrixBitCodeFunctorAdd
(
const
framework
::
Tensor
&
vec
,
framework
::
Tensor
*
tmat
)
:
vec_
(
vec
),
tmat_
(
tmat
)
{}
template
<
typename
CodeTable
>
void
operator
()(
const
CodeTable
&
code_table
)
{
size_t
batch_size
=
tmat_
->
dims
()[
0
];
size_t
width
=
tmat_
->
dims
()[
1
];
auto
*
tmat_data
=
tmat_
->
data
<
T
>
();
auto
*
vec_data
=
vec_
.
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
auto
code
=
code_table
.
get_code
(
i
);
int
code_length
=
code
.
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
.
calc_index
(
j
);
tmat_data
[
i
*
width
+
j
]
+=
vec_data
[
index
];
}
}
}
};
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Add
(
const
framework
::
Tensor
&
vec
,
framework
::
Tensor
*
tmat
)
{
MatrixBitCodeFunctorAdd
<
T
>
func
(
vec
,
tmat
);
code_table_
.
apply_visitor
(
func
);
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
AddGrad
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
*
vec
)
{
size_t
batch_size
=
tmat
.
dims
()[
0
];
size_t
width
=
tmat
.
dims
()[
1
];
auto
*
vec_data
=
vec
->
data
<
T
>
();
auto
*
tmat_data
=
tmat
.
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
auto
code
=
code_table_
->
get_code
(
i
);
int
code_length
=
code
->
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
->
calc_index
(
j
);
vec_data
[
index
]
+=
tmat_data
[
i
*
width
+
j
];
struct
MatrixBitCodeFunctorAddGrad
:
public
boost
::
static_visitor
<
void
>
{
const
framework
::
Tensor
&
tmat_
;
framework
::
Tensor
*
vec_
;
MatrixBitCodeFunctorAddGrad
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
*
vec
)
:
tmat_
(
tmat
),
vec_
(
vec
)
{}
template
<
typename
CodeTable
>
void
operator
()(
const
CodeTable
&
table
)
{
size_t
batch_size
=
tmat_
.
dims
()[
0
];
size_t
width
=
tmat_
.
dims
()[
1
];
auto
*
vec_data
=
vec_
->
data
<
T
>
();
auto
*
tmat_data
=
tmat_
.
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
auto
code
=
table
.
get_code
(
i
);
int
code_length
=
code
.
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
.
calc_index
(
j
);
vec_data
[
index
]
+=
tmat_data
[
i
*
width
+
j
];
}
}
}
};
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
AddGrad
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
*
vec
)
{
MatrixBitCodeFunctorAddGrad
<
T
>
func
(
tmat
,
vec
);
code_table_
.
apply_visitor
(
func
);
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
AddGrad
(
const
framework
::
Tensor
&
tmat
,
framework
::
SelectedRows
*
vec
)
{
size_t
batch_size
=
tmat
.
dims
()[
0
];
size_t
width
=
tmat
.
dims
()[
1
];
auto
*
vec_data
=
vec
->
mutable_value
()
->
data
<
T
>
();
auto
*
tmat_data
=
tmat
.
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
auto
code
=
code_table_
->
get_code
(
i
);
int
code_length
=
code
->
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
->
calc_index
(
j
);
int64_t
row_index
=
vec
->
GetIndexFromId
(
static_cast
<
int64_t
>
(
index
));
vec_data
[
row_index
]
+=
tmat_data
[
i
*
width
+
j
];
struct
MatrixBitCodeFunctorSelectedRowsAddGrad
:
public
boost
::
static_visitor
<
void
>
{
const
framework
::
Tensor
&
tmat_
;
framework
::
SelectedRows
*
vec_
;
MatrixBitCodeFunctorSelectedRowsAddGrad
(
const
framework
::
Tensor
&
tmat
,
framework
::
SelectedRows
*
vec
)
:
tmat_
(
tmat
),
vec_
(
vec
)
{}
template
<
typename
CodeTable
>
void
operator
()(
const
CodeTable
&
code_table
)
{
size_t
batch_size
=
tmat_
.
dims
()[
0
];
size_t
width
=
tmat_
.
dims
()[
1
];
auto
*
vec_data
=
vec_
->
mutable_value
()
->
template
data
<
T
>();
auto
*
tmat_data
=
tmat_
.
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
auto
code
=
code_table
.
get_code
(
i
);
int
code_length
=
code
.
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
.
calc_index
(
j
);
int64_t
row_index
=
vec_
->
GetIndexFromId
(
static_cast
<
int64_t
>
(
index
));
vec_data
[
row_index
]
+=
tmat_data
[
i
*
width
+
j
];
}
}
}
};
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
AddGrad
(
const
framework
::
Tensor
&
tmat
,
framework
::
SelectedRows
*
vec
)
{
MatrixBitCodeFunctorSelectedRowsAddGrad
<
T
>
func
(
tmat
,
vec
);
code_table_
.
apply_visitor
(
func
);
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Sum
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
*
sum
,
T
scale_sum
)
{
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
o_width
=
tmat
.
dims
()[
1
];
auto
*
tmat_data
=
tmat
.
data
<
T
>
();
auto
*
sum_data
=
sum
->
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
T
sm
=
static_cast
<
T
>
(
0.0
);
auto
code
=
code_table_
->
get_code
(
i
);
int
code_length
=
code
->
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
if
(
code
->
calc_bit
(
j
))
{
// calc_bit starts from right most bit, while data in tmat[i] is in the
// reverse order.
sm
+=
tmat_data
[
i
*
o_width
+
j
];
struct
MatrixBitCodeFunctorSum
:
public
boost
::
static_visitor
<
void
>
{
const
framework
::
Tensor
&
tmat_
;
framework
::
Tensor
*
sum_
;
T
scale_sum_
;
MatrixBitCodeFunctorSum
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
*
sum
,
T
scale_sum
)
:
tmat_
(
tmat
),
sum_
(
sum
),
scale_sum_
(
scale_sum
)
{}
template
<
typename
CodeTable
>
void
operator
()(
const
CodeTable
&
code_table
)
{
size_t
num_samples
=
tmat_
.
dims
()[
0
];
size_t
o_width
=
tmat_
.
dims
()[
1
];
auto
*
tmat_data
=
tmat_
.
data
<
T
>
();
auto
*
sum_data
=
sum_
->
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
T
sm
=
static_cast
<
T
>
(
0.0
);
auto
code
=
code_table
.
get_code
(
i
);
int
code_length
=
code
.
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
if
(
code
.
calc_bit
(
j
))
{
// calc_bit starts from right most bit, while data in tmat[i] is in
// the
// reverse order.
sm
+=
tmat_data
[
i
*
o_width
+
j
];
}
}
sum_data
[
i
]
=
scale_sum_
*
sm
;
}
sum_data
[
i
]
=
scale_sum
*
sm
;
}
};
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Sum
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
*
sum
,
T
scale_sum
)
{
MatrixBitCodeFunctorSum
<
T
>
func
(
tmat
,
sum
,
scale_sum
);
code_table_
.
apply_visitor
(
func
);
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Mul
(
framework
::
Tensor
*
tmat
,
const
framework
::
Tensor
&
weight
,
const
framework
::
Tensor
&
input
)
{
auto
blas
=
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
platform
::
CPUDeviceContext
());
size_t
num_samples
=
tmat
->
dims
()[
0
];
size_t
tmat_width
=
tmat
->
dims
()[
1
];
size_t
input_width
=
input
.
dims
()[
1
];
size_t
weight_width
=
weight
.
dims
()[
1
];
auto
tmat_value
=
tmat
->
data
<
T
>
();
auto
weight_value
=
weight
.
data
<
T
>
();
auto
input_value
=
input
.
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table_
->
get_code
(
i
);
int
code_length
=
code
->
get_length
();
const
T
*
input_row
=
input_value
+
input_width
*
i
;
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
->
calc_index
(
j
);
const
T
*
weight_row
=
weight_value
+
weight_width
*
index
;
T
sum
=
static_cast
<
T
>
(
0.0
);
sum
=
blas
.
DOT
(
input_width
,
weight_row
,
input_row
);
tmat_value
[
i
*
tmat_width
+
j
]
+=
sum
;
struct
MatrixBitCodeFunctorMul
:
public
boost
::
static_visitor
<
void
>
{
framework
::
Tensor
*
tmat_
;
const
framework
::
Tensor
&
weight_
;
const
framework
::
Tensor
&
input_
;
MatrixBitCodeFunctorMul
(
framework
::
Tensor
*
tmat
,
const
framework
::
Tensor
&
weight
,
const
framework
::
Tensor
&
input
)
:
tmat_
(
tmat
),
weight_
(
weight
),
input_
(
input
)
{}
template
<
typename
CodeTable
>
void
operator
()(
const
CodeTable
&
code_table
)
{
auto
blas
=
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
platform
::
CPUDeviceContext
());
size_t
num_samples
=
tmat_
->
dims
()[
0
];
size_t
tmat_width
=
tmat_
->
dims
()[
1
];
size_t
input_width
=
input_
.
dims
()[
1
];
size_t
weight_width
=
weight_
.
dims
()[
1
];
auto
tmat_value
=
tmat_
->
data
<
T
>
();
auto
weight_value
=
weight_
.
data
<
T
>
();
auto
input_value
=
input_
.
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table
.
get_code
(
i
);
int
code_length
=
code
.
get_length
();
const
T
*
input_row
=
input_value
+
input_width
*
i
;
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
.
calc_index
(
j
);
const
T
*
weight_row
=
weight_value
+
weight_width
*
index
;
T
sum
=
blas
.
DOT
(
input_width
,
weight_row
,
input_row
);
tmat_value
[
i
*
tmat_width
+
j
]
+=
sum
;
}
}
}
};
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Mul
(
framework
::
Tensor
*
tmat
,
const
framework
::
Tensor
&
weight
,
const
framework
::
Tensor
&
input
)
{
MatrixBitCodeFunctorMul
<
T
>
func
(
tmat
,
weight
,
input
);
code_table_
.
apply_visitor
(
func
);
}
template
<
typename
T
,
size_t
N
>
class
ReservedVector
:
public
std
::
vector
<
T
>
{
public:
ReservedVector
()
{
this
->
reserve
(
N
);
}
};
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
MulGradWeight
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
*
weight
,
const
framework
::
Tensor
&
input
)
{
auto
blas
=
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
platform
::
CPUDeviceContext
());
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
input_width
=
input
.
dims
()[
1
];
size_t
tmat_width
=
tmat
.
dims
()[
1
];
size_t
weight_width
=
weight
->
dims
()[
1
];
auto
tmat_value
=
tmat
.
data
<
T
>
();
auto
weight_value
=
weight
->
data
<
T
>
();
auto
input_value
=
input
.
data
<
T
>
();
std
::
map
<
int
,
std
::
vector
<
std
::
pair
<
T
,
const
T
*>>>
ops
;
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table_
->
get_code
(
i
);
int
code_length
=
code
->
get_length
();
const
T
*
input_value_row
=
input_value
+
input_width
*
i
;
const
T
*
tmat_row
=
tmat_value
+
i
*
tmat_width
;
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
ops
[
code
->
calc_index
(
j
)].
emplace_back
(
tmat_row
[
j
],
input_value_row
);
struct
MatrixBitCodeFunctorMulGradWeight
:
public
boost
::
static_visitor
<
void
>
{
const
framework
::
Tensor
&
tmat_
;
framework
::
Tensor
*
weight_
;
const
framework
::
Tensor
&
input_
;
MatrixBitCodeFunctorMulGradWeight
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
*
weight
,
const
framework
::
Tensor
&
input
)
:
tmat_
(
tmat
),
weight_
(
weight
),
input_
(
input
)
{}
template
<
typename
CodeTable
>
void
operator
()(
const
CodeTable
&
code_table
)
{
auto
blas
=
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
platform
::
CPUDeviceContext
());
size_t
num_samples
=
tmat_
.
dims
()[
0
];
size_t
input_width
=
input_
.
dims
()[
1
];
size_t
tmat_width
=
tmat_
.
dims
()[
1
];
size_t
weight_width
=
weight_
->
dims
()[
1
];
auto
tmat_value
=
tmat_
.
data
<
T
>
();
auto
weight_value
=
weight_
->
data
<
T
>
();
auto
input_value
=
input_
.
data
<
T
>
();
std
::
map
<
int
,
ReservedVector
<
std
::
pair
<
T
,
const
T
*>
,
8u
>>
ops
;
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table
.
get_code
(
i
);
int
code_length
=
code
.
get_length
();
const
T
*
input_value_row
=
input_value
+
input_width
*
i
;
const
T
*
tmat_row
=
tmat_value
+
i
*
tmat_width
;
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
ops
[
code
.
calc_index
(
j
)].
emplace_back
(
tmat_row
[
j
],
input_value_row
);
}
}
}
for
(
auto
&
op
:
ops
)
{
auto
&
op_in_row
=
op
.
second
;
for
(
auto
&
pair
:
op_in_row
)
{
auto
&
scale
=
pair
.
first
;
auto
*
input_row
=
pair
.
second
;
T
*
weight_row
=
weight_value
+
op
.
first
*
weight_width
;
blas
.
AXPY
(
input_width
,
scale
,
input_row
,
weight_row
);
for
(
auto
&
op
:
ops
)
{
auto
&
op_in_row
=
op
.
second
;
for
(
auto
&
pair
:
op_in_row
)
{
auto
&
scale
=
pair
.
first
;
auto
*
input_row
=
pair
.
second
;
T
*
weight_row
=
weight_value
+
op
.
first
*
weight_width
;
blas
.
AXPY
(
input_width
,
scale
,
input_row
,
weight_row
)
;
}
}
}
};
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
MulGradWeight
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
*
weight
,
const
framework
::
Tensor
&
input
)
{
MatrixBitCodeFunctorMulGradWeight
<
T
>
func
(
tmat
,
weight
,
input
);
code_table_
.
apply_visitor
(
func
);
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
MulGradWeight
(
const
framework
::
Tensor
&
tmat
,
framework
::
SelectedRows
*
weight
,
const
framework
::
Tensor
&
input
)
{
auto
blas
=
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
platform
::
CPUDeviceContext
());
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
input_width
=
input
.
dims
()[
1
];
size_t
tmat_width
=
tmat
.
dims
()[
1
];
size_t
weight_width
=
weight
->
value
().
dims
()[
1
];
auto
tmat_value
=
tmat
.
data
<
T
>
();
auto
weight_value
=
weight
->
mutable_value
()
->
data
<
T
>
();
auto
input_value
=
input
.
data
<
T
>
();
std
::
unordered_map
<
int
,
std
::
vector
<
std
::
pair
<
T
,
const
T
*>>>
ops
;
ops
.
reserve
(
weight
->
rows
().
size
());
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table_
->
get_code
(
i
);
int
code_length
=
code
->
get_length
();
const
T
*
input_value_row
=
input_value
+
input_width
*
i
;
const
T
*
tmat_row
=
tmat_value
+
i
*
tmat_width
;
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
ops
[
code
->
calc_index
(
j
)].
emplace_back
(
tmat_row
[
j
],
input_value_row
);
struct
MatrixBitCodeFunctorMulGradWeightSR
:
public
boost
::
static_visitor
<
void
>
{
const
framework
::
Tensor
&
tmat_
;
framework
::
SelectedRows
*
weight_
;
const
framework
::
Tensor
&
input_
;
MatrixBitCodeFunctorMulGradWeightSR
(
const
framework
::
Tensor
&
tmat
,
framework
::
SelectedRows
*
weight
,
const
framework
::
Tensor
&
input
)
:
tmat_
(
tmat
),
weight_
(
weight
),
input_
(
input
)
{}
template
<
typename
CodeTable
>
void
operator
()(
const
CodeTable
&
code_table
)
{
auto
blas
=
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
platform
::
CPUDeviceContext
());
size_t
num_samples
=
tmat_
.
dims
()[
0
];
size_t
input_width
=
input_
.
dims
()[
1
];
size_t
tmat_width
=
tmat_
.
dims
()[
1
];
size_t
weight_width
=
weight_
->
value
().
dims
()[
1
];
auto
tmat_value
=
tmat_
.
data
<
T
>
();
auto
weight_value
=
weight_
->
mutable_value
()
->
data
<
T
>
();
auto
input_value
=
input_
.
data
<
T
>
();
std
::
unordered_map
<
int
,
std
::
vector
<
std
::
pair
<
T
,
const
T
*>>>
ops
;
ops
.
reserve
(
weight_
->
rows
().
size
());
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table
.
get_code
(
i
);
int
code_length
=
code
.
get_length
();
const
T
*
input_value_row
=
input_value
+
input_width
*
i
;
const
T
*
tmat_row
=
tmat_value
+
i
*
tmat_width
;
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
ops
[
code
.
calc_index
(
j
)].
emplace_back
(
tmat_row
[
j
],
input_value_row
);
}
}
}
for
(
auto
&
row
:
weight
->
rows
())
{
auto
&
op_in_row
=
ops
[
row
];
for
(
auto
&
pair
:
op_in_row
)
{
auto
&
scale
=
pair
.
first
;
auto
*
input_row
=
pair
.
second
;
blas
.
AXPY
(
input_width
,
scale
,
input_row
,
weight_value
);
for
(
auto
&
row
:
weight_
->
rows
())
{
auto
&
op_in_row
=
ops
[
row
];
for
(
auto
&
pair
:
op_in_row
)
{
auto
&
scale
=
pair
.
first
;
auto
*
input_row
=
pair
.
second
;
blas
.
AXPY
(
input_width
,
scale
,
input_row
,
weight_value
);
}
weight_value
+=
weight_width
;
}
weight_value
+=
weight_width
;
}
};
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
MulGradWeight
(
const
framework
::
Tensor
&
tmat
,
framework
::
SelectedRows
*
weight
,
const
framework
::
Tensor
&
input
)
{
MatrixBitCodeFunctorMulGradWeightSR
<
T
>
func
(
tmat
,
weight
,
input
);
code_table_
.
apply_visitor
(
func
);
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
MulGradError
(
const
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
weight
,
framework
::
Tensor
*
input
)
{
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
tmat_width
=
tmat
.
dims
()[
1
];
size_t
input_width
=
input
->
dims
()[
1
];
size_t
weight_width
=
weight
.
dims
()[
1
];
auto
tmat_value
=
tmat
.
data
<
T
>
();
auto
weight_value
=
weight
.
data
<
T
>
();
auto
input_value
=
input
->
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table_
->
get_code
(
i
);
int
code_length
=
code
->
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
->
calc_index
(
j
);
for
(
size_t
k
=
0
;
k
<
input_width
;
++
k
)
{
input_value
[
input_width
*
i
+
k
]
+=
tmat_value
[
i
*
tmat_width
+
j
]
*
weight_value
[
weight_width
*
index
+
k
];
struct
MatrixBitCodeFunctorMulGradError
:
public
boost
::
static_visitor
<
void
>
{
const
framework
::
Tensor
&
tmat_
;
const
framework
::
Tensor
&
weight_
;
framework
::
Tensor
*
input_
;
MatrixBitCodeFunctorMulGradError
(
const
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
weight
,
framework
::
Tensor
*
input
)
:
tmat_
(
tmat
),
weight_
(
weight
),
input_
(
input
)
{}
template
<
typename
CodeTable
>
void
operator
()(
const
CodeTable
&
code_table
)
{
size_t
num_samples
=
tmat_
.
dims
()[
0
];
size_t
tmat_width
=
tmat_
.
dims
()[
1
];
size_t
input_width
=
input_
->
dims
()[
1
];
size_t
weight_width
=
weight_
.
dims
()[
1
];
auto
tmat_value
=
tmat_
.
data
<
T
>
();
auto
weight_value
=
weight_
.
data
<
T
>
();
auto
input_value
=
input_
->
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table
.
get_code
(
i
);
int
code_length
=
code
.
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
.
calc_index
(
j
);
for
(
size_t
k
=
0
;
k
<
input_width
;
++
k
)
{
input_value
[
input_width
*
i
+
k
]
+=
tmat_value
[
i
*
tmat_width
+
j
]
*
weight_value
[
weight_width
*
index
+
k
];
}
}
}
}
};
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
MulGradError
(
const
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
weight
,
framework
::
Tensor
*
input
)
{
MatrixBitCodeFunctorMulGradError
<
T
>
func
(
tmat
,
weight
,
input
);
code_table_
.
apply_visitor
(
func
);
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Sub
(
framework
::
Tensor
*
tmat
)
{
size_t
num_samples
=
tmat
->
dims
()[
0
];
size_t
o_width
=
tmat
->
dims
()[
1
];
auto
*
tmat_data
=
tmat
->
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table_
->
get_code
(
i
);
int
code_length
=
code
->
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
if
(
code
->
calc_bit
(
j
))
{
tmat_data
[
i
*
o_width
+
j
]
-=
1
;
struct
MatrixBitCodeFunctorSub
:
public
boost
::
static_visitor
<
void
>
{
framework
::
Tensor
*
tmat_
;
explicit
MatrixBitCodeFunctorSub
(
framework
::
Tensor
*
tmat
)
:
tmat_
(
tmat
)
{}
template
<
typename
CodeTable
>
void
operator
()(
const
CodeTable
&
code_table
)
{
size_t
num_samples
=
tmat_
->
dims
()[
0
];
size_t
o_width
=
tmat_
->
dims
()[
1
];
auto
*
tmat_data
=
tmat_
->
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table
.
get_code
(
i
);
int
code_length
=
code
.
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
if
(
code
.
calc_bit
(
j
))
{
tmat_data
[
i
*
o_width
+
j
]
-=
1
;
}
}
}
}
};
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Sub
(
framework
::
Tensor
*
tmat
)
{
MatrixBitCodeFunctorSub
<
T
>
func
(
tmat
);
code_table_
.
apply_visitor
(
func
);
}
template
class
MatrixBitCodeFunctor
<
float
>;
...
...
paddle/fluid/operators/math/matrix_bit_code.h
浏览文件 @
15550a27
...
...
@@ -23,6 +23,7 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/variant.h"
#if defined(_WIN32)
#include <intrin.h>
...
...
@@ -99,24 +100,7 @@ inline int clz(const T& value) {
inline
size_t
FindLastSet
(
size_t
x
)
{
return
sizeof
(
size_t
)
*
8
-
clz
(
x
);
}
#endif // !_WIN32
// set a code interface to create multiple code
class
Code
{
public:
virtual
~
Code
()
{}
virtual
size_t
calc_index
(
int
bit
)
const
=
0
;
virtual
bool
calc_bit
(
int
bit
)
const
=
0
;
virtual
int
get_length
()
const
=
0
;
};
// set a CodeTable interface to create multiple code table
class
CodeTable
{
public:
virtual
Code
*
get_code
(
int64_t
code
)
const
=
0
;
virtual
size_t
size
()
const
=
0
;
virtual
int
get_max_code_length
()
const
=
0
;
virtual
~
CodeTable
()
{}
};
class
SimpleCode
:
public
Code
{
class
SimpleCode
{
public:
SimpleCode
(
size_t
code
,
size_t
num_classes
,
const
int64_t
*
ids
)
:
c_
(
static_cast
<
size_t
>
(
ids
[
code
])
+
num_classes
)
{}
...
...
@@ -138,7 +122,7 @@ class SimpleCode : public Code {
};
template
<
typename
T
>
class
CustomCode
:
public
Code
{
class
CustomCode
{
public:
CustomCode
(
const
framework
::
Tensor
&
ptable
,
const
framework
::
Tensor
&
pcode
,
const
int64_t
*
ids
,
int
index
)
{
...
...
@@ -155,11 +139,11 @@ class CustomCode : public Code {
* Binary classification path is the suffixes of encoding, thus leave out the
* left most bit in calc_bit.
*/
size_t
calc_index
(
int
bit
)
const
override
{
return
ptable_data_
[
bit
];
}
bool
calc_bit
(
int
bit
)
const
override
{
return
pcode_data_
[
bit
];
}
size_t
calc_index
(
int
bit
)
const
{
return
ptable_data_
[
bit
];
}
bool
calc_bit
(
int
bit
)
const
{
return
pcode_data_
[
bit
];
}
// NOTE: this function is not thread-safe.
int
get_length
()
const
override
{
int
get_length
()
const
{
if
(
length_
<
0
)
{
auto
len
=
seq_len_
;
length_
=
...
...
@@ -177,46 +161,32 @@ class CustomCode : public Code {
mutable
int
length_
{
-
1
};
};
class
SimpleCodeTable
:
public
CodeTable
{
class
SimpleCodeTable
{
public:
SimpleCodeTable
(
size_t
num_classes
,
const
int64_t
*
ids
)
:
num_classes_
(
num_classes
),
ids_
(
ids
)
{}
Code
*
get_code
(
int64_t
code
)
const
{
auto
it
=
codes_
.
find
(
code
);
if
(
it
!=
codes_
.
end
())
{
return
it
->
second
.
get
();
}
auto
*
result
=
new
SimpleCode
(
code
,
num_classes_
,
ids_
);
codes_
.
emplace
(
code
,
std
::
unique_ptr
<
Code
>
(
result
));
return
result
;
SimpleCode
get_code
(
int64_t
code
)
const
{
return
SimpleCode
(
code
,
num_classes_
,
ids_
);
}
size_t
size
()
const
{
return
num_classes_
;
}
int
get_max_code_length
()
const
{
return
FindLastSet
(
num_classes_
-
1
);
}
private:
mutable
std
::
map
<
int64_t
,
std
::
unique_ptr
<
Code
>>
codes_
;
size_t
num_classes_
;
const
int64_t
*
ids_
;
};
template
<
typename
T
>
class
CustomCodeTable
:
public
CodeTable
{
class
CustomCodeTable
{
public:
CustomCodeTable
(
const
framework
::
Tensor
&
ptable
,
const
framework
::
Tensor
&
pcode
,
const
int64_t
*
ids
)
:
ptable_
(
ptable
),
pcode_
(
pcode
),
ids_
(
ids
)
{}
Code
*
get_code
(
int64_t
code
)
const
{
auto
it
=
codes_
.
find
(
code
);
if
(
it
!=
codes_
.
end
())
{
return
it
->
second
.
get
();
}
auto
*
result
=
new
CustomCode
<
T
>
(
ptable_
,
pcode_
,
ids_
,
code
);
codes_
.
emplace
(
code
,
std
::
unique_ptr
<
Code
>
(
result
));
return
result
;
CustomCode
<
T
>
get_code
(
int64_t
code
)
const
{
return
CustomCode
<
T
>
(
ptable_
,
pcode_
,
ids_
,
code
);
}
size_t
size
()
const
{
return
static_cast
<
size_t
>
(
ptable_
.
dims
()[
1
]);
}
...
...
@@ -225,25 +195,26 @@ class CustomCodeTable : public CodeTable {
}
private:
mutable
std
::
unordered_map
<
int64_t
,
std
::
unique_ptr
<
Code
>>
codes_
;
const
framework
::
Tensor
&
ptable_
;
const
framework
::
Tensor
&
pcode_
;
const
int64_t
*
ids_
;
};
using
CodeTable
=
boost
::
variant
<
SimpleCodeTable
,
CustomCodeTable
<
int64_t
>>
;
template
<
typename
T
>
class
MatrixBitCodeFunctor
{
public:
MatrixBitCodeFunctor
(
size_t
num_classes
,
const
int64_t
*
ids
)
:
num_classes_
(
num_classes
),
ids_
(
ids
),
code_table_
(
new
SimpleCodeTable
(
num_classes
,
ids
))
{}
code_table_
(
SimpleCodeTable
(
num_classes
,
ids
))
{}
MatrixBitCodeFunctor
(
const
framework
::
Tensor
&
ptable
,
const
framework
::
Tensor
&
pcode
,
const
int64_t
*
ids
)
:
num_classes_
(
static_cast
<
size_t
>
(
ptable
.
dims
()[
1
])),
ids_
(
ids
),
code_table_
(
new
CustomCodeTable
<
int64_t
>
(
ptable
,
pcode
,
ids
))
{}
code_table_
(
CustomCodeTable
<
int64_t
>
(
ptable
,
pcode
,
ids
))
{}
/* For j < code_length
tmat(i, j) += vec(0, index(i, j))
*/
...
...
@@ -293,7 +264,7 @@ class MatrixBitCodeFunctor {
size_t
num_classes_
;
const
int64_t
*
ids_
;
std
::
unique_ptr
<
CodeTable
>
code_table_
;
CodeTable
code_table_
;
};
}
// namespace math
}
// namespace operators
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录