提交 cb4e3951 编写于 作者: A A. Unique TensorFlower 提交者: TensorFlower Gardener

Add a convenience method "forward_input_or_allocate_temp" to use buffer...

Add a convenience method "forward_input_or_allocate_temp" to use buffer forwarding for temporary tensors.

Add buffer forwarding in a few additional places, including pooling ops and fused RNN kernels.
Change: 149958948
上级 35a4183c
......@@ -122,9 +122,9 @@ class GRUCellBlockOp : public OpKernel {
&c_tensor));
Tensor* h_tensor = nullptr;
OP_REQUIRES_OK(
ctx, ctx->allocate_output("h", TensorShape({batch_size, cell_size}),
&h_tensor));
OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output(
{"h_prev"}, "h",
TensorShape({batch_size, cell_size}), &h_tensor));
// Allocate temp tensors.
Tensor x_h_prev_tensor;
......@@ -304,14 +304,15 @@ class GRUBlockCellGradOp : public OpKernel {
// Create output tensors.
Tensor* d_x_tensor = nullptr;
OP_REQUIRES_OK(
ctx, ctx->allocate_output("d_x", TensorShape({batch_size, input_size}),
&d_x_tensor));
OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output(
{"x"}, "d_x", TensorShape({batch_size, input_size}),
&d_x_tensor));
Tensor* d_h_prev_tensor = nullptr;
OP_REQUIRES_OK(ctx, ctx->allocate_output(
"d_h_prev", TensorShape({batch_size, cell_size}),
&d_h_prev_tensor));
OP_REQUIRES_OK(
ctx, ctx->forward_input_or_allocate_output(
{"h_prev"}, "d_h_prev", TensorShape({batch_size, cell_size}),
&d_h_prev_tensor));
Tensor* d_c_bar_tensor;
OP_REQUIRES_OK(ctx, ctx->allocate_output(
......
......@@ -112,9 +112,9 @@ class LSTMBlockCellOp : public OpKernel {
// Allocate our output tensors.
Tensor* i_tensor = nullptr;
OP_REQUIRES_OK(
ctx, ctx->allocate_output("i", TensorShape({batch_size, cell_size}),
&i_tensor));
OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output(
{"h_prev"}, "i",
TensorShape({batch_size, cell_size}), &i_tensor));
Tensor* cs_tensor = nullptr;
OP_REQUIRES_OK(
......@@ -127,9 +127,9 @@ class LSTMBlockCellOp : public OpKernel {
&f_tensor));
Tensor* o_tensor = nullptr;
OP_REQUIRES_OK(
ctx, ctx->allocate_output("o", TensorShape({batch_size, cell_size}),
&o_tensor));
OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output(
{"cs_prev"}, "o",
TensorShape({batch_size, cell_size}), &o_tensor));
Tensor* ci_tensor = nullptr;
OP_REQUIRES_OK(
......@@ -387,10 +387,10 @@ class LSTMBlockCellGradOp : public OpKernel {
// Allocate our output tensors.
Tensor* cs_prev_grad_tensor = nullptr;
OP_REQUIRES_OK(ctx,
ctx->allocate_output("cs_prev_grad",
TensorShape({batch_size, cell_size}),
&cs_prev_grad_tensor));
OP_REQUIRES_OK(
ctx, ctx->forward_input_or_allocate_output(
{"cs_grad"}, "cs_prev_grad",
TensorShape({batch_size, cell_size}), &cs_prev_grad_tensor));
Tensor* dicfo_tensor = nullptr;
OP_REQUIRES_OK(ctx, ctx->allocate_output(
......@@ -398,16 +398,19 @@ class LSTMBlockCellGradOp : public OpKernel {
&dicfo_tensor));
Tensor* wci_grad_tensor = nullptr;
OP_REQUIRES_OK(ctx, ctx->allocate_output("wci_grad", wci_tensor->shape(),
&wci_grad_tensor));
OP_REQUIRES_OK(
ctx, ctx->forward_input_or_allocate_output(
{"wci"}, "wci_grad", wci_tensor->shape(), &wci_grad_tensor));
Tensor* wcf_grad_tensor = nullptr;
OP_REQUIRES_OK(ctx, ctx->allocate_output("wcf_grad", wcf_tensor->shape(),
&wcf_grad_tensor));
OP_REQUIRES_OK(
ctx, ctx->forward_input_or_allocate_output(
{"wcf"}, "wcf_grad", wcf_tensor->shape(), &wcf_grad_tensor));
Tensor* wco_grad_tensor = nullptr;
OP_REQUIRES_OK(ctx, ctx->allocate_output("wco_grad", wco_tensor->shape(),
&wco_grad_tensor));
OP_REQUIRES_OK(
ctx, ctx->forward_input_or_allocate_output(
{"wco"}, "wco_grad", wco_tensor->shape(), &wco_grad_tensor));
// Allocate our temp tensors.
Tensor do_tensor;
......
......@@ -438,6 +438,21 @@ std::unique_ptr<Tensor> OpKernelContext::forward_input(
return output_tensor;
}
Status OpKernelContext::forward_input_or_allocate_temp(
gtl::ArraySlice<int> candidate_input_indices, DataType type,
const TensorShape& shape, const AllocatorAttributes& allocator_attr,
Tensor* out_temp) {
for (int input_index : candidate_input_indices) {
std::unique_ptr<Tensor> new_tensor =
forward_input(input_index, type, shape, DEVICE_MEMORY, allocator_attr);
if (new_tensor != nullptr) {
*out_temp = std::move(*new_tensor);
return Status::OK();
}
}
return allocate_temp(type, shape, out_temp, allocator_attr);
}
void OpKernelContext::delete_ref_input(int index, bool lock_held) {
DCHECK_GE(index, 0);
DCHECK_LT(index, num_inputs());
......
......@@ -715,6 +715,21 @@ class OpKernelContext {
StringPiece output_name, const TensorShape& output_shape,
Tensor** output) TF_MUST_USE_RESULT;
// Tries to reuse one of of the inputs given in input_indices as a temporary.
// If none of the given inputs can be forwarded, calls
// allocate_temp() to allocate a new temporary buffer.
Status forward_input_or_allocate_temp(
gtl::ArraySlice<int> candidate_input_indices, DataType type,
const TensorShape& shape, const AllocatorAttributes& allocator_attr,
Tensor* out_temp) TF_MUST_USE_RESULT;
Status forward_input_or_allocate_temp(
gtl::ArraySlice<int> candidate_input_indices, DataType type,
const TensorShape& shape, Tensor* out_temp) TF_MUST_USE_RESULT {
return forward_input_or_allocate_temp(candidate_input_indices, type, shape,
AllocatorAttributes(), out_temp);
}
// Output
// Returns the named list-valued output in "list", as defined in the OpDef.
......
......@@ -271,9 +271,9 @@ class FractionalAvgPoolGradOp : public OpKernel {
// Create intermediate in_backprop.
Tensor in_backprop_tensor_temp;
OP_REQUIRES_OK(context,
context->allocate_temp(DataTypeToEnum<double>::v(), in_shape,
&in_backprop_tensor_temp));
OP_REQUIRES_OK(context, context->forward_input_or_allocate_temp(
{0}, DataTypeToEnum<double>::v(), in_shape,
&in_backprop_tensor_temp));
in_backprop_tensor_temp.flat<double>().setZero();
// Transform 4D tensor to 2D matrix.
EigenDoubleMatrixMap in_backprop_tensor_temp_mat(
......
......@@ -256,9 +256,9 @@ class FractionalMaxPoolGradOp : public OpKernel {
// Step 1
// ---------
Tensor tensor_out_dup;
OP_REQUIRES_OK(context,
context->allocate_temp(DataTypeToEnum<T>::v(),
tensor_out.shape(), &tensor_out_dup));
OP_REQUIRES_OK(context, context->forward_input_or_allocate_temp(
{1}, DataTypeToEnum<T>::v(), tensor_out.shape(),
&tensor_out_dup));
Tensor tensor_out_arg_max;
OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum<int64>::v(),
tensor_out.shape(),
......
......@@ -275,9 +275,9 @@ class MaxPoolingGradOp : public OpKernel {
const TensorShape& output_shape = tensor_in.shape();
Tensor tensor_out_dup;
OP_REQUIRES_OK(context,
context->allocate_temp(DataTypeToEnum<T>::v(),
tensor_out.shape(), &tensor_out_dup));
OP_REQUIRES_OK(context, context->forward_input_or_allocate_temp(
{1}, DataTypeToEnum<T>::v(), tensor_out.shape(),
&tensor_out_dup));
Tensor tensor_out_arg_max;
OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum<int64>::v(),
tensor_out.shape(),
......@@ -552,7 +552,8 @@ class MaxPoolingGradWithArgmaxOp : public OpKernel {
TensorShape out_shape({params.tensor_in_batch, params.tensor_in_rows,
params.tensor_in_cols, params.depth});
Tensor* grad_out = nullptr;
OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &grad_out));
OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
{1}, 0, out_shape, &grad_out));
LaunchMaxPoolingGradWithArgmax<Device, T>::launch(context, params, grad_in,
argmax, grad_out);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册