Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
qq_38905368
tensorflow
提交
fb83aa5d
T
tensorflow
项目概览
qq_38905368
/
tensorflow
与 Fork 源项目一致
从无法访问的项目Fork
通知
5
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
tensorflow
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
fb83aa5d
编写于
12月 13, 2018
作者:
J
Jiri Simsa
提交者:
TensorFlower Gardener
12月 13, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[tf.data] Reduce the overhead of performance modeling when there are no autotunable knobs.
PiperOrigin-RevId: 225405978
上级
7b4bfd90
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
72 addition
and
41 deletion
+72
-41
tensorflow/core/framework/dataset.h
tensorflow/core/framework/dataset.h
+19
-16
tensorflow/core/framework/model.cc
tensorflow/core/framework/model.cc
+4
-2
tensorflow/core/framework/model.h
tensorflow/core/framework/model.h
+29
-3
tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc
...ore/kernels/data/experimental/map_and_batch_dataset_op.cc
+5
-5
tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc
...ernels/data/experimental/numa_map_and_batch_dataset_op.cc
+5
-5
tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
...rflow/core/kernels/data/parallel_interleave_dataset_op.cc
+5
-5
tensorflow/core/kernels/data/parallel_map_dataset_op.cc
tensorflow/core/kernels/data/parallel_map_dataset_op.cc
+4
-3
tensorflow/core/kernels/data/parallel_map_iterator.cc
tensorflow/core/kernels/data/parallel_map_iterator.cc
+1
-2
未找到文件。
tensorflow/core/framework/dataset.h
浏览文件 @
fb83aa5d
...
...
@@ -50,8 +50,6 @@ class GraphDefBuilder;
class
Node
;
namespace
data
{
// A constant that can be used to enable auto-tuning.
constexpr
int
kAutoTune
=
-
1
;
constexpr
int
kInfiniteCardinality
=
-
1
;
constexpr
int
kUnknownCardinality
=
-
2
;
...
...
@@ -723,36 +721,36 @@ class DatasetBaseIterator : public IteratorBase {
return
model
::
MakeUnknownNode
(
std
::
move
(
args
));
}
// When
performance modeling is enabled, this method records the fact that
//
this iterator has dequeued a
element from an internal buffer.
// When
modeling is enabled, this method records the fact that this iterator
//
has dequeued an
element from an internal buffer.
void
RecordBufferDequeue
(
IteratorContext
*
ctx
,
const
std
::
vector
<
Tensor
>&
element
)
{
if
(
node_
)
{
if
(
collect_resource_usage
(
ctx
)
)
{
node_
->
add_buffered_bytes
(
-
GetAllocatedBytes
(
element
));
}
}
// When
performance modeling is enabled, this method records the fact that
//
this iterator has enqueued a
element in an internal buffer.
// When
modeling is enabled, this method records the fact that this iterator
//
has enqueued an
element in an internal buffer.
void
RecordBufferEnqueue
(
IteratorContext
*
ctx
,
const
std
::
vector
<
Tensor
>&
element
)
{
if
(
node_
)
{
if
(
collect_resource_usage
(
ctx
)
)
{
node_
->
add_buffered_bytes
(
GetAllocatedBytes
(
element
));
}
}
// When
performance modeling is enabled, this method records the fact that
//
this iterator
has produced an element.
// When
modeling is enabled, this method records the fact that this iterator
// has produced an element.
void
RecordElement
(
IteratorContext
*
ctx
)
{
if
(
node_
)
{
node_
->
record_element
();
}
}
// When
performance modeling is enabled, this method records the fact that
//
a thread of
this iterator has started work.
// When
modeling is enabled, this method records the fact that a thread of
// this iterator has started work.
void
RecordStart
(
IteratorContext
*
ctx
,
bool
stop_output
=
false
)
{
if
(
node_
)
{
if
(
collect_resource_usage
(
ctx
)
)
{
int64
now_nanos
=
Env
::
Default
()
->
NowNanos
();
if
(
stop_output
&&
node_
->
output
())
{
node_
->
output
()
->
record_stop
(
now_nanos
);
...
...
@@ -761,10 +759,10 @@ class DatasetBaseIterator : public IteratorBase {
}
}
// When
performance modeling is enabled, this method records the fact that
//
a thread of
this iterator has stopped work.
// When
modeling is enabled, this method records the fact that a thread of
// this iterator has stopped work.
void
RecordStop
(
IteratorContext
*
ctx
,
bool
start_output
=
false
)
{
if
(
node_
)
{
if
(
collect_resource_usage
(
ctx
)
)
{
int64
now_nanos
=
Env
::
Default
()
->
NowNanos
();
node_
->
record_stop
(
now_nanos
);
if
(
start_output
&&
node_
->
output
())
{
...
...
@@ -774,6 +772,11 @@ class DatasetBaseIterator : public IteratorBase {
}
private:
inline
bool
collect_resource_usage
(
IteratorContext
*
ctx
)
{
auto
model
=
ctx
->
model
();
return
model
&&
model
->
collect_resource_usage
()
&&
node_
;
}
BaseParams
params_
;
};
...
...
tensorflow/core/framework/model.cc
浏览文件 @
fb83aa5d
...
...
@@ -356,6 +356,8 @@ std::shared_ptr<Node> Model::AddNode(Node::Factory factory, const string& name,
if
(
output
)
{
output
->
add_input
(
node
);
}
collect_resource_usage_
=
collect_resource_usage_
||
node
->
has_tunable_parameters
();
lookup_table_
.
insert
(
std
::
make_pair
(
name
,
node
));
return
node
;
}
...
...
@@ -441,7 +443,7 @@ void Model::RecordElement(const string& name) {
void
Model
::
RecordStart
(
const
string
&
name
,
bool
stop_output
)
{
tf_shared_lock
l
(
mu_
);
auto
node
=
gtl
::
FindOrNull
(
lookup_table_
,
name
);
if
(
node
)
{
if
(
collect_resource_usage_
&&
node
)
{
int64
now_nanos
=
Env
::
Default
()
->
NowNanos
();
if
(
stop_output
&&
(
*
node
)
->
output
())
{
(
*
node
)
->
output
()
->
record_stop
(
now_nanos
);
...
...
@@ -453,7 +455,7 @@ void Model::RecordStart(const string& name, bool stop_output) {
void
Model
::
RecordStop
(
const
string
&
name
,
bool
start_output
)
{
tf_shared_lock
l
(
mu_
);
auto
node
=
gtl
::
FindOrNull
(
lookup_table_
,
name
);
if
(
node
)
{
if
(
collect_resource_usage_
&&
node
)
{
int64
now_nanos
=
Env
::
Default
()
->
NowNanos
();
(
*
node
)
->
record_stop
(
now_nanos
);
if
(
start_output
&&
(
*
node
)
->
output
())
{
...
...
tensorflow/core/framework/model.h
浏览文件 @
fb83aa5d
...
...
@@ -34,18 +34,24 @@ namespace tensorflow {
namespace
data
{
namespace
model
{
// A constant that can be used to enable auto-tuning.
constexpr
int
kAutoTune
=
-
1
;
// Represents thread-safe state that can be shared between an input pipeline and
// the performance model.
struct
SharedState
{
public:
SharedState
(
int64
value
,
std
::
shared_ptr
<
mutex
>
mu
,
std
::
shared_ptr
<
condition_variable
>
cond_var
)
:
value
(
value
),
mu
(
std
::
move
(
mu
)),
cond_var
(
std
::
move
(
cond_var
))
{}
:
value
(
value
),
mu
(
std
::
move
(
mu
)),
cond_var
(
std
::
move
(
cond_var
)),
tunable
(
value
==
kAutoTune
)
{}
int64
value
;
std
::
shared_ptr
<
mutex
>
mu
;
std
::
shared_ptr
<
condition_variable
>
cond_var
;
bool
tunable
=
fals
e
;
const
bool
tunabl
e
;
};
// Represents a parameter.
...
...
@@ -136,6 +142,15 @@ class Node {
return
buffered_bytes_
;
}
// Indicates whether the node has tunable parameters.
bool
has_tunable_parameters
()
const
LOCKS_EXCLUDED
(
mu_
)
{
tf_shared_lock
l
(
mu_
);
for
(
const
auto
&
pair
:
parameters_
)
{
if
(
pair
.
second
->
state
->
tunable
)
return
true
;
}
return
false
;
}
// Returns the unique node ID.
int64
id
()
const
LOCKS_EXCLUDED
(
mu_
)
{
return
id_
;
}
...
...
@@ -344,7 +359,10 @@ std::shared_ptr<Node> MakeUnknownNode(Node::Args args);
// implementation of `DatasetBase` and `DatasetBaseIterator` respectively.
class
Model
{
public:
Model
()
=
default
;
Model
()
:
collect_resource_usage_
(
false
)
{}
// Indicates whether to collect resource usage.
bool
collect_resource_usage
()
const
{
return
collect_resource_usage_
;
}
// Adds a node with the given name and given output.
std
::
shared_ptr
<
Node
>
AddNode
(
Node
::
Factory
factory
,
const
string
&
name
,
...
...
@@ -388,6 +406,14 @@ class Model {
int64
id_counter_
GUARDED_BY
(
mu_
)
=
1
;
std
::
shared_ptr
<
Node
>
output_
GUARDED_BY
(
mu_
);
std
::
map
<
string
,
std
::
shared_ptr
<
Node
>>
lookup_table_
GUARDED_BY
(
mu_
);
// Indicates whether the modeling framework should collect resource usage
// (e.g. CPU, memory). The logic for collecting this information assumes that
// the collection is not repeatedly disabled and enabled. As a consequence,
// the implementation starts collecting resource usage when it encounters a
// tunable parameter (because the information is used for for tuning the value
// of the parameter) and never stops.
std
::
atomic
<
bool
>
collect_resource_usage_
;
};
}
// namespace model
...
...
tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc
浏览文件 @
fb83aa5d
...
...
@@ -71,9 +71,10 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
int64
num_parallel_calls
;
OP_REQUIRES_OK
(
ctx
,
ParseScalarArgument
(
ctx
,
"num_parallel_calls"
,
&
num_parallel_calls
));
OP_REQUIRES
(
ctx
,
num_parallel_calls
>
0
||
num_parallel_calls
==
kAutoTune
,
errors
::
InvalidArgument
(
"num_parallel_calls must be greater than zero."
));
OP_REQUIRES
(
ctx
,
num_parallel_calls
>
0
||
num_parallel_calls
==
model
::
kAutoTune
,
errors
::
InvalidArgument
(
"num_parallel_calls must be greater than zero."
));
bool
drop_remainder
;
OP_REQUIRES_OK
(
ctx
,
...
...
@@ -268,9 +269,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
Status
Initialize
(
IteratorContext
*
ctx
)
override
{
mutex_lock
l
(
*
mu_
);
if
(
num_parallel_calls_
->
value
==
kAutoTune
)
{
if
(
num_parallel_calls_
->
value
==
model
::
kAutoTune
)
{
num_parallel_calls_
->
value
=
ctx
->
runner_threadpool_size
();
num_parallel_calls_
->
tunable
=
true
;
}
TF_RETURN_IF_ERROR
(
dataset
()
->
input_
->
MakeIterator
(
ctx
,
prefix
(),
&
input_impl_
));
...
...
tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc
浏览文件 @
fb83aa5d
...
...
@@ -76,9 +76,10 @@ class NumaMapAndBatchDatasetOp : public UnaryDatasetOpKernel {
int64
num_parallel_calls
;
OP_REQUIRES_OK
(
ctx
,
ParseScalarArgument
(
ctx
,
"num_parallel_calls"
,
&
num_parallel_calls
));
OP_REQUIRES
(
ctx
,
num_parallel_calls
>
0
||
num_parallel_calls
==
kAutoTune
,
errors
::
InvalidArgument
(
"num_parallel_calls must be greater than zero."
));
OP_REQUIRES
(
ctx
,
num_parallel_calls
>
0
||
num_parallel_calls
==
model
::
kAutoTune
,
errors
::
InvalidArgument
(
"num_parallel_calls must be greater than zero."
));
bool
drop_remainder
;
OP_REQUIRES_OK
(
ctx
,
...
...
@@ -214,9 +215,8 @@ class NumaMapAndBatchDatasetOp : public UnaryDatasetOpKernel {
Status
Initialize
(
IteratorContext
*
ctx
)
override
{
mutex_lock
l
(
*
mu_
);
if
(
num_parallel_calls_
->
value
==
kAutoTune
)
{
if
(
num_parallel_calls_
->
value
==
model
::
kAutoTune
)
{
num_parallel_calls_
->
value
=
ctx
->
runner_threadpool_size
();
num_parallel_calls_
->
tunable
=
true
;
}
TF_RETURN_IF_ERROR
(
dataset
()
->
input_
->
MakeIterator
(
ctx
,
prefix
(),
&
input_impl_
));
...
...
tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
浏览文件 @
fb83aa5d
...
...
@@ -76,9 +76,10 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
int64
num_parallel_calls
;
OP_REQUIRES_OK
(
ctx
,
ParseScalarArgument
(
ctx
,
"num_parallel_calls"
,
&
num_parallel_calls
));
OP_REQUIRES
(
ctx
,
num_parallel_calls
>
0
||
num_parallel_calls
==
kAutoTune
,
errors
::
InvalidArgument
(
"num_parallel_calls must be greater than zero."
));
OP_REQUIRES
(
ctx
,
num_parallel_calls
>
0
||
num_parallel_calls
==
model
::
kAutoTune
,
errors
::
InvalidArgument
(
"num_parallel_calls must be greater than zero."
));
OP_REQUIRES
(
ctx
,
num_parallel_calls
<=
cycle_length
,
errors
::
InvalidArgument
(
...
...
@@ -220,9 +221,8 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
Status
Initialize
(
IteratorContext
*
ctx
)
override
{
mutex_lock
l
(
*
mu_
);
if
(
num_parallel_calls_
->
value
==
kAutoTune
)
{
if
(
num_parallel_calls_
->
value
==
model
::
kAutoTune
)
{
num_parallel_calls_
->
value
=
dataset
()
->
cycle_length_
;
num_parallel_calls_
->
tunable
=
true
;
}
TF_RETURN_IF_ERROR
(
dataset
()
->
input_
->
MakeIterator
(
ctx
,
prefix
(),
&
input_impl_
));
...
...
tensorflow/core/kernels/data/parallel_map_dataset_op.cc
浏览文件 @
fb83aa5d
...
...
@@ -51,9 +51,10 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
int32
num_parallel_calls
;
OP_REQUIRES_OK
(
ctx
,
ParseScalarArgument
(
ctx
,
"num_parallel_calls"
,
&
num_parallel_calls
));
OP_REQUIRES
(
ctx
,
num_parallel_calls
>
0
||
num_parallel_calls
==
kAutoTune
,
errors
::
InvalidArgument
(
"num_parallel_calls must be greater than zero."
));
OP_REQUIRES
(
ctx
,
num_parallel_calls
>
0
||
num_parallel_calls
==
model
::
kAutoTune
,
errors
::
InvalidArgument
(
"num_parallel_calls must be greater than zero."
));
std
::
unique_ptr
<
CapturedFunction
>
captured_func
;
OP_REQUIRES_OK
(
ctx
,
CapturedFunction
::
Create
(
func_
,
ctx
,
"other_arguments"
,
...
...
tensorflow/core/kernels/data/parallel_map_iterator.cc
浏览文件 @
fb83aa5d
...
...
@@ -76,9 +76,8 @@ class ParallelMapIterator : public DatasetBaseIterator {
Status
Initialize
(
IteratorContext
*
ctx
)
override
{
mutex_lock
l
(
*
mu_
);
if
(
num_parallel_calls_
->
value
==
kAutoTune
)
{
if
(
num_parallel_calls_
->
value
==
model
::
kAutoTune
)
{
num_parallel_calls_
->
value
=
ctx
->
runner_threadpool_size
();
num_parallel_calls_
->
tunable
=
true
;
}
TF_RETURN_IF_ERROR
(
input_dataset_
->
MakeIterator
(
ctx
,
prefix
(),
&
input_impl_
));
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录