提交 d9cfc64a 编写于 作者: V Vijay Vasudevan

TensorFlow: merge changes from internal

Change 109945903
	Make unsorted_segment_sum detect negative indices

	Previously it crashed.  This fixes #466.

	Also improve the error message to say which index is problematic.
Change 109942557
	Fix the conv_grad_input with stride 2.
	+ We always call the Cudnn implementation even if we have an incompatible
	padding.

Base CL: 109948577
上级 475edf8e
......@@ -337,6 +337,7 @@ Status DirectSession::GetOrCreateExecutors(
for (const auto& graph : graphs) {
const string& partition_name = graph.first;
Graph* partition_graph = graph.second;
const int graph_def_version = partition_graph->version();
Device* d;
s = device_mgr_->LookupDevice(partition_name, &d);
......@@ -347,8 +348,10 @@ Status DirectSession::GetOrCreateExecutors(
LocalExecutorParams params;
params.has_control_flow = has_control_flow;
params.device = d;
params.create_kernel = [this, d](const NodeDef& ndef, OpKernel** kernel) {
return CreateCachedKernel(d, session_handle_, nullptr, ndef, kernel);
params.create_kernel = [this, d, graph_def_version](const NodeDef& ndef,
OpKernel** kernel) {
return CreateCachedKernel(d, session_handle_, nullptr, ndef,
graph_def_version, kernel);
};
params.delete_kernel = [this, d](OpKernel* kernel) {
DeleteCachedKernel(d, session_handle_, kernel);
......
......@@ -2140,20 +2140,22 @@ Status NewLocalExecutor(const LocalExecutorParams& params, const Graph* graph,
}
Status CreateNonCachedKernel(Device* device, FunctionLibraryRuntime* flib,
const NodeDef& ndef, OpKernel** kernel) {
const NodeDef& ndef, int graph_def_version,
OpKernel** kernel) {
auto device_type = DeviceType(device->attributes().device_type());
auto allocator = device->GetAllocator(AllocatorAttributes());
return CreateOpKernel(device_type, device, allocator, flib, ndef, kernel);
return CreateOpKernel(device_type, device, allocator, flib, ndef,
graph_def_version, kernel);
}
void DeleteNonCachedKernel(OpKernel* kernel) { delete kernel; }
Status CreateCachedKernel(Device* device, const string& session,
FunctionLibraryRuntime* flib, const NodeDef& ndef,
OpKernel** kernel) {
int graph_def_version, OpKernel** kernel) {
auto op_seg = device->op_segment();
auto create_fn = [device, flib, &ndef](OpKernel** kernel) {
return CreateNonCachedKernel(device, flib, ndef, kernel);
auto create_fn = [device, flib, &ndef, graph_def_version](OpKernel** kernel) {
return CreateNonCachedKernel(device, flib, ndef, graph_def_version, kernel);
};
return op_seg->FindOrCreate(session, ndef.name(), kernel, create_fn);
}
......
......@@ -202,7 +202,8 @@ class ExecutorBarrier {
// access the functions in the "flib". The caller takes ownership of
// returned "*kernel".
Status CreateNonCachedKernel(Device* device, FunctionLibraryRuntime* flib,
const NodeDef& ndef, OpKernel** kernel);
const NodeDef& ndef, int graph_def_version,
OpKernel** kernel);
// Deletes "kernel" returned by CreateKernel.
void DeleteNonCachedKernel(OpKernel* kernel);
......@@ -213,7 +214,7 @@ void DeleteNonCachedKernel(OpKernel* kernel);
// ndef.name(), returns the same kernel instance.
Status CreateCachedKernel(Device* device, const string& session,
FunctionLibraryRuntime* flib, const NodeDef& ndef,
OpKernel** kernel);
int graph_def_version, OpKernel** kernel);
// Deletes "kernel" returned by CreateCachedKernel.
void DeleteCachedKernel(Device* device, const string& session,
......
......@@ -241,6 +241,7 @@ static const FunctionLibraryRuntime::Handle kInvalidHandle = -1;
class FunctionLibraryRuntimeImpl : public FunctionLibraryRuntime {
public:
FunctionLibraryRuntimeImpl(Device* device, Runner runner,
int graph_def_version,
const FunctionLibraryDefinition* lib_def);
~FunctionLibraryRuntimeImpl() override;
......@@ -263,6 +264,7 @@ class FunctionLibraryRuntimeImpl : public FunctionLibraryRuntime {
Device* const device_;
Runner runner_ = nullptr;
const int graph_def_version_;
const FunctionLibraryDefinition* const lib_def_;
std::function<Status(const string&, const OpDef**)> get_func_sig_;
std::function<Status(const NodeDef&, OpKernel**)> create_kernel_;
......@@ -298,8 +300,12 @@ class FunctionLibraryRuntimeImpl : public FunctionLibraryRuntime {
};
FunctionLibraryRuntimeImpl::FunctionLibraryRuntimeImpl(
Device* device, Runner runner, const FunctionLibraryDefinition* lib_def)
: device_(device), runner_(runner), lib_def_(lib_def) {
Device* device, Runner runner, int graph_def_version,
const FunctionLibraryDefinition* lib_def)
: device_(device),
runner_(runner),
graph_def_version_(graph_def_version),
lib_def_(lib_def) {
get_func_sig_ = [this](const string& op, const OpDef** sig) {
Status s;
*sig = lib_def_->LookUp(op, &s);
......@@ -368,7 +374,8 @@ const FunctionBody* FunctionLibraryRuntimeImpl::GetFunctionBody(Handle h) {
Status FunctionLibraryRuntimeImpl::CreateKernel(const NodeDef& ndef,
OpKernel** kernel) {
if (ndef.op() != kGradientOp && (lib_def_->Find(ndef.op()) == nullptr)) {
return CreateNonCachedKernel(device_, this, ndef, kernel);
return CreateNonCachedKernel(device_, this, ndef, graph_def_version_,
kernel);
}
// Try to instantiate this function for the func/attr. Maybe its
......@@ -384,7 +391,8 @@ Status FunctionLibraryRuntimeImpl::CreateKernel(const NodeDef& ndef,
auto device_type = DeviceType(device_->attributes().device_type());
OpKernelConstruction construction(
device_type, device_, device_->GetAllocator(AllocatorAttributes()), &ndef,
&fbody->fdef.signature(), this, fbody->arg_types, fbody->ret_types, &s);
&fbody->fdef.signature(), this, fbody->arg_types, fbody->ret_types,
graph_def_version_, &s);
*kernel = new CallOp(handle, &construction);
if (!s.ok()) {
delete kernel;
......@@ -628,8 +636,10 @@ bool FunctionLibraryRuntimeImpl::IsDefined(const string& function_name) {
}
FunctionLibraryRuntime* NewFunctionLibraryRuntime(
Device* device, Runner runner, const FunctionLibraryDefinition* lib_def) {
return new FunctionLibraryRuntimeImpl(device, runner, lib_def);
Device* device, Runner runner, int graph_def_version,
const FunctionLibraryDefinition* lib_def) {
return new FunctionLibraryRuntimeImpl(device, runner, graph_def_version,
lib_def);
}
bool RemoveDeadNodes(Graph* g) {
......
......@@ -33,7 +33,8 @@ namespace tensorflow {
typedef std::function<void()> Closure;
typedef std::function<void(Closure)> Runner;
FunctionLibraryRuntime* NewFunctionLibraryRuntime(
Device* device, Runner runner, const FunctionLibraryDefinition* lib_def);
Device* device, Runner runner, int graph_def_version,
const FunctionLibraryDefinition* lib_def);
// FunctionLibraryRuntime::GetFunctionBody returns a description of an
// instantiated function that is represented as a Graph with arg/ret
......
......@@ -29,6 +29,7 @@ limitations under the License.
#include "tensorflow/core/platform/port.h"
#include "tensorflow/core/platform/test_benchmark.h"
#include "tensorflow/core/public/session_options.h"
#include "tensorflow/core/public/version.h"
#include "tensorflow/core/util/device_name_utils.h"
#if defined(PLATFORM_GOOGLE)
......@@ -66,13 +67,16 @@ Benchmark::Benchmark(const string& device, Graph* g,
rendez_ = NewLocalRendezvous();
const int graph_def_version = g->version();
if (init) {
Executor* init_exec;
TF_CHECK_OK(NewLocalExecutor(
{
device_, nullptr, false,
[this](const NodeDef& ndef, OpKernel** kernel) {
return CreateNonCachedKernel(device_, nullptr, ndef, kernel);
[this, graph_def_version](const NodeDef& ndef, OpKernel** kernel) {
return CreateNonCachedKernel(device_, nullptr, ndef,
graph_def_version, kernel);
},
[](OpKernel* kernel) { DeleteNonCachedKernel(kernel); },
},
......@@ -87,8 +91,9 @@ Benchmark::Benchmark(const string& device, Graph* g,
TF_CHECK_OK(NewLocalExecutor(
{
device_, nullptr, false,
[this](const NodeDef& ndef, OpKernel** kernel) {
return CreateNonCachedKernel(device_, nullptr, ndef, kernel);
[this, graph_def_version](const NodeDef& ndef, OpKernel** kernel) {
return CreateNonCachedKernel(device_, nullptr, ndef,
graph_def_version, kernel);
},
[](OpKernel* kernel) { DeleteNonCachedKernel(kernel); },
},
......
......@@ -578,20 +578,19 @@ Status SupportedDeviceTypesForNode(
return Status::OK();
}
std::unique_ptr<OpKernel> CreateOpKernel(DeviceType device_type,
DeviceBase* device,
Allocator* allocator,
const NodeDef& node_def,
Status* status) {
std::unique_ptr<OpKernel> CreateOpKernel(
DeviceType device_type, DeviceBase* device, Allocator* allocator,
const NodeDef& node_def, int graph_def_version, Status* status) {
OpKernel* kernel = nullptr;
*status = CreateOpKernel(device_type, device, allocator, nullptr, node_def,
&kernel);
graph_def_version, &kernel);
return std::unique_ptr<OpKernel>(kernel);
}
Status CreateOpKernel(DeviceType device_type, DeviceBase* device,
Allocator* allocator, FunctionLibraryRuntime* flib,
const NodeDef& node_def, OpKernel** kernel) {
const NodeDef& node_def, int graph_def_version,
OpKernel** kernel) {
VLOG(1) << "Instantiating kernel for node: " << SummarizeNodeDef(node_def);
// Look up the Op registered for this op name.
......@@ -629,7 +628,8 @@ Status CreateOpKernel(DeviceType device_type, DeviceBase* device,
// Everything needed for OpKernel construction.
OpKernelConstruction context(device_type, device, allocator, &node_def,
op_def, flib, inputs, outputs, &s);
op_def, flib, inputs, outputs, graph_def_version,
&s);
*kernel = (*registration->factory)(&context);
if (!s.ok()) {
delete *kernel;
......
......@@ -179,7 +179,8 @@ class OpKernelConstruction {
Allocator* allocator, const NodeDef* node_def,
const OpDef* op_def, FunctionLibraryRuntime* flib,
const DataTypeSlice& input_types,
const DataTypeSlice& output_types, Status* status)
const DataTypeSlice& output_types, int graph_def_version,
Status* status)
: device_type_(device_type),
device_(device),
allocator_(allocator),
......@@ -188,6 +189,7 @@ class OpKernelConstruction {
flib_(flib),
input_types_(input_types),
output_types_(output_types),
graph_def_version_(graph_def_version),
status_(status) {}
Env* env() const { return device_->env(); }
......@@ -270,6 +272,9 @@ class OpKernelConstruction {
// CHECK_NOTNULL(function_library())->Instantiate("Foo", ...).
FunctionLibraryRuntime* function_library() const { return flib_; }
// The GraphDef version whose behavior we should follow.
const int graph_def_version() const { return graph_def_version_; }
private:
const DeviceType device_type_;
DeviceBase* const device_;
......@@ -279,6 +284,7 @@ class OpKernelConstruction {
FunctionLibraryRuntime* flib_;
DataTypeSlice input_types_;
DataTypeSlice output_types_;
const int graph_def_version_;
Status* status_;
TF_DISALLOW_COPY_AND_ASSIGN(OpKernelConstruction);
......@@ -903,10 +909,12 @@ class OpKernelContext {
std::unique_ptr<OpKernel> CreateOpKernel(DeviceType device_type,
DeviceBase* device,
Allocator* allocator,
const NodeDef& def, Status* status);
const NodeDef& def,
int graph_def_version, Status* status);
Status CreateOpKernel(DeviceType device_type, DeviceBase* device,
Allocator* allocator, FunctionLibraryRuntime* flib,
const NodeDef& def, OpKernel** kernel);
const NodeDef& def, int graph_def_version,
OpKernel** kernel);
// Returns into 'device_types' the subset of prioritized_types that this
// binary has registered for the given NodeDef.
......
......@@ -30,6 +30,7 @@ limitations under the License.
#include "tensorflow/core/lib/strings/strcat.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/protobuf.h"
#include "tensorflow/core/public/version.h"
class DummyKernel : public tensorflow::OpKernel {
public:
......@@ -131,9 +132,9 @@ class OpKernelTest : public ::testing::Test {
const DataTypeVector& inputs,
const DataTypeVector& outputs) {
Status status;
std::unique_ptr<OpKernel> op(
CreateOpKernel(device_type, &device_, cpu_allocator(),
CreateNodeDef(op_type, inputs), &status));
std::unique_ptr<OpKernel> op(CreateOpKernel(
device_type, &device_, cpu_allocator(), CreateNodeDef(op_type, inputs),
TF_GRAPH_DEF_VERSION, &status));
EXPECT_TRUE(status.ok()) << status;
EXPECT_TRUE(op != nullptr);
if (op != nullptr) {
......@@ -147,8 +148,9 @@ class OpKernelTest : public ::testing::Test {
NodeDef node_def;
protobuf::TextFormat::ParseFromString(ascii_node_def, &node_def);
Status status;
std::unique_ptr<OpKernel> op(CreateOpKernel(
device_type, &device_, cpu_allocator(), node_def, &status));
std::unique_ptr<OpKernel> op(CreateOpKernel(device_type, &device_,
cpu_allocator(), node_def,
TF_GRAPH_DEF_VERSION, &status));
EXPECT_TRUE(op == nullptr);
EXPECT_FALSE(status.ok());
if (!status.ok()) {
......@@ -286,7 +288,8 @@ TEST_F(OpKernelTest, SaveTempFalse) {
Status status;
std::unique_ptr<OpKernel> op(
CreateOpKernel(DEVICE_CPU, params.device, cpu_allocator(),
CreateNodeDef("Test1", {DT_FLOAT, DT_INT32}), &status));
CreateNodeDef("Test1", {DT_FLOAT, DT_INT32}),
TF_GRAPH_DEF_VERSION, &status));
EXPECT_TRUE(status.ok());
params.op_kernel = op.get();
OpKernelContext* ctx = new OpKernelContext(params);
......@@ -307,7 +310,8 @@ TEST_F(OpKernelTest, SaveTempTrue) {
Status status;
std::unique_ptr<OpKernel> op(
CreateOpKernel(DEVICE_CPU, params.device, cpu_allocator(),
CreateNodeDef("Test1", {DT_FLOAT, DT_INT32}), &status));
CreateNodeDef("Test1", {DT_FLOAT, DT_INT32}),
TF_GRAPH_DEF_VERSION, &status));
EXPECT_TRUE(status.ok());
params.op_kernel = op.get();
OpKernelContext* ctx = new OpKernelContext(params);
......@@ -354,8 +358,9 @@ class OpKernelBuilderTest : public ::testing::Test {
DeviceBase device(env);
// Test CreateOpKernel()
std::unique_ptr<OpKernel> op(
CreateOpKernel(device_type, &device, cpu_allocator(), def, &status));
std::unique_ptr<OpKernel> op(CreateOpKernel(device_type, &device,
cpu_allocator(), def,
TF_GRAPH_DEF_VERSION, &status));
EXPECT_TRUE(status.ok()) << status;
EXPECT_TRUE(op != nullptr);
if (op != nullptr) {
......@@ -387,8 +392,9 @@ class OpKernelBuilderTest : public ::testing::Test {
DeviceBase device(env);
// Test CreateOpKernel().
std::unique_ptr<OpKernel> op(
CreateOpKernel(device_type, &device, cpu_allocator(), def, &status));
std::unique_ptr<OpKernel> op(CreateOpKernel(device_type, &device,
cpu_allocator(), def,
TF_GRAPH_DEF_VERSION, &status));
EXPECT_TRUE(op == nullptr);
EXPECT_FALSE(status.ok());
if (!status.ok()) {
......
......@@ -25,6 +25,7 @@ limitations under the License.
#include "tensorflow/core/lib/core/status_test_util.h"
#include "tensorflow/core/lib/strings/strcat.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/public/version.h"
namespace tensorflow {
......@@ -64,8 +65,8 @@ class OpSegmentTest : public ::testing::Test {
OpSegment::CreateKernelFn GetFn(const NodeDef* ndef) {
return [this, ndef](OpKernel** kernel) {
Status s;
auto created =
CreateOpKernel(DEVICE_CPU, &device_, cpu_allocator(), *ndef, &s);
auto created = CreateOpKernel(DEVICE_CPU, &device_, cpu_allocator(),
*ndef, TF_GRAPH_DEF_VERSION, &s);
if (s.ok()) {
*kernel = created.release();
}
......
......@@ -808,13 +808,13 @@ class Conv2DSlowBackpropInputOp : public OpKernel {
// TODO(keveman): cuDNN only supports equal padding on both sides, so only
// calling it when that is true. Remove this check when (if?) cuDNN starts
// supporting different padding.
bool padding_compatible =
(padding_rows % 2 == 0) && (padding_cols % 2 == 0);
bool rows_odd = (padding_rows % 2 != 0);
bool cols_odd = (padding_cols % 2 != 0);
auto* stream = context->op_device_context<GPUDeviceContext>()->stream();
OP_REQUIRES(context, stream, errors::Internal("No GPU stream available."));
if (use_cudnn_ && padding_compatible) {
if (use_cudnn_) {
if (filter_rows == 1 && filter_cols == 1 && stride == 1) {
// 1x1 filter, so call cublas directly.
const uint64 m = batch * input_rows * input_cols;
......@@ -842,10 +842,22 @@ class Conv2DSlowBackpropInputOp : public OpKernel {
return;
}
TensorShape compatible_input_shape;
if (rows_odd || cols_odd) {
// If a padding dimension is odd, we have one more element on the right
// side or the bottom side. This is unsupported in cudnn. Therefore,
// we pad that extra element and make it compatible.
compatible_input_shape = TensorShape(
{input_shape.dim_size(0), input_shape.dim_size(1) + rows_odd,
input_shape.dim_size(2) + cols_odd, input_shape.dim_size(3)});
} else {
compatible_input_shape = input_shape;
}
perftools::gputools::dnn::BatchDescriptor input_desc;
input_desc.set_count(batch)
.set_height(input_rows)
.set_width(input_cols)
.set_height(compatible_input_shape.dim_size(1))
.set_width(compatible_input_shape.dim_size(2))
.set_feature_map_count(in_depth)
.set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX);
perftools::gputools::dnn::BatchDescriptor output_desc;
......@@ -903,11 +915,15 @@ class Conv2DSlowBackpropInputOp : public OpKernel {
transformed_out_backprop.tensor<T, 4>());
Tensor pre_transformed_in_backprop;
OP_REQUIRES_OK(context,
context->allocate_temp(
DataTypeToEnum<T>::value,
TensorShape({batch, in_depth, input_rows, input_cols}),
&pre_transformed_in_backprop));
OP_REQUIRES_OK(context, context->allocate_temp(
DataTypeToEnum<T>::value,
TensorShape({
compatible_input_shape.dim_size(0),
compatible_input_shape.dim_size(3),
compatible_input_shape.dim_size(1),
compatible_input_shape.dim_size(2),
}),
&pre_transformed_in_backprop));
auto out_backprop_ptr =
AsDeviceMemory(transformed_out_backprop.template flat<T>().data(),
......@@ -937,6 +953,28 @@ class Conv2DSlowBackpropInputOp : public OpKernel {
filter_shape.DebugString(), ")"));
}
if (rows_odd || cols_odd) {
Tensor in_backprop_remove_padding;
OP_REQUIRES_OK(context,
context->allocate_temp(
DataTypeToEnum<T>::value,
TensorShape({
input_shape.dim_size(0), input_shape.dim_size(3),
input_shape.dim_size(1), input_shape.dim_size(2),
}),
&in_backprop_remove_padding));
// Remove the padding for odd rows or cols.
functor::PadInput<GPUDevice, T, int>()(
context->template eigen_device<GPUDevice>(),
To32Bit(const_cast<const Tensor&>(pre_transformed_in_backprop)
.tensor<T, 4>()),
0, -rows_odd, 0, -cols_odd,
To32Bit(in_backprop_remove_padding.tensor<T, 4>()));
pre_transformed_in_backprop = in_backprop_remove_padding;
}
auto toConstTensor = [](const Tensor& x) -> const Tensor { return x; };
functor::NCHWToNHWC<Device, T>()(
context->eigen_device<Device>(),
......
......@@ -47,6 +47,7 @@ limitations under the License.
#include "tensorflow/core/platform/test_benchmark.h"
#include "tensorflow/core/public/session.h"
#include "tensorflow/core/public/tensor.h"
#include "tensorflow/core/public/version.h"
#include "tensorflow/core/util/padding.h"
#include "tensorflow/core/util/port.h"
......@@ -432,8 +433,9 @@ static void BM_LRNFloat(int iters, int depth, int cols, int rows,
.Finalize(&lrn_node_def));
Status status;
std::unique_ptr<OpKernel> op(CreateOpKernel(
DEVICE_CPU, device.get(), cpu_allocator(), lrn_node_def, &status));
std::unique_ptr<OpKernel> op(CreateOpKernel(DEVICE_CPU, device.get(),
cpu_allocator(), lrn_node_def,
TF_GRAPH_DEF_VERSION, &status));
TF_CHECK_OK(status);
OpKernelContext::Params params;
......@@ -516,8 +518,9 @@ static void BM_AvgPool(int iters, int batch_size, int rows, int cols, int depth,
.Finalize(&avgpool_node_def);
TF_CHECK_OK(status);
std::unique_ptr<OpKernel> op(CreateOpKernel(
DEVICE_CPU, device.get(), cpu_allocator(), avgpool_node_def, &status));
std::unique_ptr<OpKernel> op(CreateOpKernel(DEVICE_CPU, device.get(),
cpu_allocator(), avgpool_node_def,
TF_GRAPH_DEF_VERSION, &status));
TF_CHECK_OK(status);
OpKernelContext::Params params;
params.device = device.get();
......@@ -623,8 +626,9 @@ static void BM_AvgPoolBk(int iters, int batch_size, int rows, int cols,
.Attr("padding", padding == VALID ? "VALID" : "SAME")
.Finalize(&avgpool_grad_node_def);
TF_CHECK_OK(status);
std::unique_ptr<OpKernel> op(CreateOpKernel(
DEVICE_CPU, nullptr, cpu_allocator(), avgpool_grad_node_def, &status));
std::unique_ptr<OpKernel> op(
CreateOpKernel(DEVICE_CPU, nullptr, cpu_allocator(),
avgpool_grad_node_def, TF_GRAPH_DEF_VERSION, &status));
TF_CHECK_OK(status);
OpKernelContext::Params params;
params.device = device.get();
......@@ -712,8 +716,9 @@ static void BM_MaxPool(int iters, int batch_size, int rows, int cols, int depth,
.Attr("padding", padding == VALID ? "VALID" : "SAME")
.Finalize(&maxpool_node_def);
TF_CHECK_OK(status);
std::unique_ptr<OpKernel> op(CreateOpKernel(
DEVICE_CPU, device.get(), cpu_allocator(), maxpool_node_def, &status));
std::unique_ptr<OpKernel> op(CreateOpKernel(DEVICE_CPU, device.get(),
cpu_allocator(), maxpool_node_def,
TF_GRAPH_DEF_VERSION, &status));
TF_CHECK_OK(status);
OpKernelContext::Params params;
params.device = device.get();
......@@ -889,8 +894,9 @@ static void BM_ReluFloat(int iters, int batch_size, int rows, int cols,
.Input(FakeInput(DT_FLOAT))
.Finalize(&relu_node_def);
TF_CHECK_OK(status);
std::unique_ptr<OpKernel> op(CreateOpKernel(
DEVICE_CPU, device.get(), cpu_allocator(), relu_node_def, &status));
std::unique_ptr<OpKernel> op(CreateOpKernel(DEVICE_CPU, device.get(),
cpu_allocator(), relu_node_def,
TF_GRAPH_DEF_VERSION, &status));
TF_CHECK_OK(status);
OpKernelContext::Params params;
params.device = device.get();
......@@ -960,8 +966,9 @@ static void BM_ImageNetSoftmaxFwd(int iters, int batch_size, int node_depth,
.Input("input", 0, DT_FLOAT)
.Finalize(&softmax_node_def));
Status status;
std::unique_ptr<OpKernel> op(CreateOpKernel(
DEVICE_CPU, device.get(), cpu_allocator(), softmax_node_def, &status));
std::unique_ptr<OpKernel> op(CreateOpKernel(DEVICE_CPU, device.get(),
cpu_allocator(), softmax_node_def,
TF_GRAPH_DEF_VERSION, &status));
TF_CHECK_OK(status);
OpKernelContext::Params params;
params.device = device.get();
......
......@@ -20,6 +20,7 @@ limitations under the License.
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/lib/gtl/inlined_vector.h"
#include "tensorflow/core/public/tensor.h"
#include "tensorflow/core/util/util.h"
namespace tensorflow {
......@@ -57,7 +58,7 @@ class DynamicPartitionOp_Shared : public OpKernel {
const int32 p = e_partitions(i);
OP_REQUIRES(c, p >= 0 && p < num_partitions_,
errors::InvalidArgument(
"partitions", SliceString((*partitions)->shape(), i),
"partitions", SliceDebugString((*partitions)->shape(), i),
" = ", p, " is not in [0, ", num_partitions_, ")"));
partition_count[p]++;
}
......@@ -77,30 +78,6 @@ class DynamicPartitionOp_Shared : public OpKernel {
protected:
int num_partitions_;
static string SliceString(const TensorShape& shape, const int64 flat) {
// Special case rank 0 and 1
const int dims = shape.dims();
if (dims == 0) return "";
if (dims == 1) return strings::StrCat("[", flat, "]");
// Compute strides
gtl::InlinedVector<int64, 32> strides(dims);
strides.back() = 1;
for (int i = dims - 2; i >= 0; i--) {
strides[i] = strides[i + 1] * shape.dim_size(i + 1);
}
// Unflatten index
int64 left = flat;
string result;
for (int i = 0; i < dims; i++) {
strings::StrAppend(&result, i ? "," : "[", left / strides[i]);
left %= strides[i];
}
strings::StrAppend(&result, "]");
return result;
}
};
template <class T>
......
......@@ -39,6 +39,7 @@ limitations under the License.
#include "tensorflow/core/public/session_options.h"
#include "tensorflow/core/public/status.h"
#include "tensorflow/core/public/tensor.h"
#include "tensorflow/core/public/version.h"
#include "tensorflow/core/util/tensor_slice_reader_cache.h"
namespace tensorflow {
......@@ -80,7 +81,7 @@ class OpsTestBase : public ::testing::Test {
Status InitOp() {
Status status;
kernel_ = CreateOpKernel(device_type_, device_.get(), allocator(),
node_def_, &status);
node_def_, TF_GRAPH_DEF_VERSION, &status);
if (kernel_ != nullptr) input_types_ = kernel_->input_types();
return status;
}
......
......@@ -72,8 +72,9 @@ TEST_F(RestoreOpTest, RestoreInt) {
gtl::InlinedVector<TensorValue, 4> inputs;
Status status;
std::unique_ptr<OpKernel> op(CreateOpKernel(
DEVICE_CPU, device.get(), cpu_allocator(), save, &status));
std::unique_ptr<OpKernel> op(CreateOpKernel(DEVICE_CPU, device.get(),
cpu_allocator(), save,
TF_GRAPH_DEF_VERSION, &status));
EXPECT_OK(status);
// Run it
......@@ -153,8 +154,9 @@ TEST_F(RestoreOpTest, RestoreFloat) {
gtl::InlinedVector<TensorValue, 4> inputs;
Status status;
std::unique_ptr<OpKernel> op(CreateOpKernel(
DEVICE_CPU, device.get(), cpu_allocator(), save, &status));
std::unique_ptr<OpKernel> op(CreateOpKernel(DEVICE_CPU, device.get(),
cpu_allocator(), save,
TF_GRAPH_DEF_VERSION, &status));
EXPECT_OK(status);
// Run it
......@@ -249,8 +251,9 @@ TEST_F(RestoreSliceOpTest, RestoreInt) {
gtl::InlinedVector<TensorValue, 4> inputs;
Status status;
std::unique_ptr<OpKernel> op(CreateOpKernel(
DEVICE_CPU, device.get(), cpu_allocator(), save, &status));
std::unique_ptr<OpKernel> op(CreateOpKernel(DEVICE_CPU, device.get(),
cpu_allocator(), save,
TF_GRAPH_DEF_VERSION, &status));
EXPECT_OK(status);
// Run it
......
......@@ -27,6 +27,7 @@ limitations under the License.
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/public/status.h"
#include "tensorflow/core/public/tensor.h"
#include "tensorflow/core/util/util.h"
namespace tensorflow {
......@@ -198,11 +199,12 @@ class UnsortedSegmentSumOp : public OpKernel {
const int32 N = segment_flat.dimension(0);
const int32 output_rows = num_segments.scalar<int32>()();
if (N > 0) {
Eigen::Tensor<Index, 0, Eigen::RowMajor> m = segment_flat.maximum();
OP_REQUIRES(
context, m() < output_rows,
errors::InvalidArgument("More segments found than output size"));
for (int i = 0; i < N; i++) {
int j = segment_flat(i);
OP_REQUIRES(context, 0 <= j && j < output_rows,
errors::InvalidArgument(
"segment_ids", SliceDebugString(segment_ids.shape(), i),
" = ", j, " is out of range [0, ", output_rows, ")"));
}
TensorShape output_shape;
......
......@@ -34,6 +34,7 @@ limitations under the License.
#include "tensorflow/core/kernels/ops_util.h"
#include "tensorflow/core/platform/test_benchmark.h"
#include "tensorflow/core/public/tensor.h"
#include "tensorflow/core/public/version.h"
namespace tensorflow {
......@@ -63,8 +64,9 @@ static void BM_SegmentReduction(int iters, string reduction, Index num_rows,
.Input(FakeInput(DataTypeToEnum<Index>::v()))
.Finalize(&reduction_node_def));
Status status;
std::unique_ptr<OpKernel> reduction_op(CreateOpKernel(
DEVICE_CPU, device.get(), cpu_allocator(), reduction_node_def, &status));
std::unique_ptr<OpKernel> reduction_op(
CreateOpKernel(DEVICE_CPU, device.get(), cpu_allocator(),
reduction_node_def, TF_GRAPH_DEF_VERSION, &status));
OpKernelContext::Params params;
params.device = device.get();
params.frame_iter = FrameAndIter(0, 0);
......
......@@ -246,8 +246,9 @@ static void BM_SparseToDense(int iters, const int bm_arg) {
.Finalize(&sparse_node_def));
Status status;
std::unique_ptr<OpKernel> op(CreateOpKernel(
DEVICE_CPU, device.get(), cpu_allocator(), sparse_node_def, &status));
std::unique_ptr<OpKernel> op(CreateOpKernel(DEVICE_CPU, device.get(),
cpu_allocator(), sparse_node_def,
TF_GRAPH_DEF_VERSION, &status));
OpKernelContext::Params params;
params.device = device.get();
......
......@@ -15,7 +15,10 @@ limitations under the License.
#include "tensorflow/core/util/util.h"
#include "tensorflow/core/lib/gtl/inlined_vector.h"
#include "tensorflow/core/lib/strings/strcat.h"
#include "tensorflow/core/platform/logging.h"
namespace tensorflow {
StringPiece NodeNamePrefix(const StringPiece& op_name) {
......@@ -93,4 +96,28 @@ string PrintMemory(const char* ptr, int n) {
return ret;
}
string SliceDebugString(const TensorShape& shape, const int64 flat) {
// Special case rank 0 and 1
const int dims = shape.dims();
if (dims == 0) return "";
if (dims == 1) return strings::StrCat("[", flat, "]");
// Compute strides
gtl::InlinedVector<int64, 32> strides(dims);
strides.back() = 1;
for (int i = dims - 2; i >= 0; i--) {
strides[i] = strides[i + 1] * shape.dim_size(i + 1);
}
// Unflatten index
int64 left = flat;
string result;
for (int i = 0; i < dims; i++) {
strings::StrAppend(&result, i ? "," : "[", left / strides[i]);
left %= strides[i];
}
strings::StrAppend(&result, "]");
return result;
}
} // namespace tensorflow
......@@ -17,6 +17,7 @@ limitations under the License.
#define TENSORFLOW_UTIL_UTIL_H_
#include "tensorflow/core/lib/core/stringpiece.h"
#include "tensorflow/core/public/tensor_shape.h"
namespace tensorflow {
......@@ -50,6 +51,11 @@ class MovingAverage {
// like "00 01 ef cd cd ef".
string PrintMemory(const char* ptr, int n);
// Given a flattened index into a tensor, computes a string s so that
// StrAppend("tensor", s) is a Python indexing expression. E.g.,
// "tensor", "tensor[i]", "tensor[i, j]", etc.
string SliceDebugString(const TensorShape& shape, const int64 flat);
} // namespace tensorflow
#endif // TENSORFLOW_UTIL_UTIL_H_
......@@ -236,14 +236,14 @@ py_test(
)
tf_gen_op_wrapper_py(
name = "test_kernel_label_op",
out = "framework/test_kernel_label_op.py",
deps = [":test_kernel_label_op_kernel"],
name = "test_ops",
out = "framework/test_ops.py",
deps = [":test_ops_kernels"],
)
cc_library(
name = "test_kernel_label_op_kernel",
srcs = ["framework/test_kernel_label_op.cc"],
name = "test_ops_kernels",
srcs = ["framework/test_ops.cc"],
linkstatic = 1,
deps = ["//tensorflow/core:framework"],
alwayslink = 1,
......@@ -259,7 +259,7 @@ py_test(
":ops",
":platform_test",
":session",
":test_kernel_label_op",
":test_ops",
],
)
......@@ -751,7 +751,7 @@ tf_cuda_library(
hdrs = ["client/tf_session_helper.h"],
deps = [
":construction_fails_op",
":test_kernel_label_op_kernel",
":test_ops_kernels",
"//tensorflow/core",
"//tensorflow/core:direct_session",
"//tensorflow/core:kernels",
......
......@@ -24,8 +24,9 @@ from tensorflow.python.framework import device as pydev
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.framework import test_kernel_label_op
from tensorflow.python.framework import test_ops
from tensorflow.python.framework import test_util
from tensorflow.python.framework import versions
from tensorflow.python.ops import common_shapes
from tensorflow.python.ops import variables
from tensorflow.python.platform import googletest
......@@ -927,21 +928,21 @@ class KernelLabelTest(test_util.TensorFlowTestCase):
def testNoLabel(self):
with self.test_session():
self.assertAllEqual(b"My label is: default",
test_kernel_label_op.kernel_label().eval())
test_ops.kernel_label().eval())
def testLabelMap(self):
with self.test_session() as sess:
default_1 = test_kernel_label_op.kernel_label()
default_1 = test_ops.kernel_label()
# pylint: disable=protected-access
with sess.graph._kernel_label_map({"KernelLabel": "overload_1"}):
overload_1_1 = test_kernel_label_op.kernel_label()
overload_1_1 = test_ops.kernel_label()
with sess.graph._kernel_label_map({"KernelLabel": "overload_2"}):
overload_2 = test_kernel_label_op.kernel_label()
overload_2 = test_ops.kernel_label()
with sess.graph._kernel_label_map({"KernelLabel": ""}):
default_2 = test_kernel_label_op.kernel_label()
overload_1_2 = test_kernel_label_op.kernel_label()
default_2 = test_ops.kernel_label()
overload_1_2 = test_ops.kernel_label()
# pylint: enable=protected-access
default_3 = test_kernel_label_op.kernel_label()
default_3 = test_ops.kernel_label()
self.assertAllEqual(b"My label is: default", default_1.eval())
self.assertAllEqual(b"My label is: default", default_2.eval())
......@@ -951,5 +952,18 @@ class KernelLabelTest(test_util.TensorFlowTestCase):
self.assertAllEqual(b"My label is: overload_2", overload_2.eval())
class GraphDefVersionTest(test_util.TensorFlowTestCase):
def testGraphDefVersion(self):
"""Test that the graphdef version is plumbed through to kernels."""
for version in range(versions.GRAPH_DEF_VERSION_MIN,
versions.GRAPH_DEF_VERSION_MAX + 1):
with ops.Graph().as_default() as g:
g.graph_def_version = version
with self.test_session(graph=g):
v = test_ops.graph_def_version().eval()
self.assertEqual(version, v)
if __name__ == "__main__":
googletest.main()
......@@ -21,6 +21,8 @@ namespace tensorflow {
REGISTER_OP("KernelLabel").Output("result: string");
REGISTER_OP("GraphDefVersion").Output("version: int32");
namespace {
enum KernelLabel { DEFAULT_LABEL, OVERLOAD_1_LABEL, OVERLOAD_2_LABEL };
} // namespace
......@@ -59,4 +61,22 @@ REGISTER_KERNEL_BUILDER(Name("KernelLabel")
.Label("overload_2"),
KernelLabelOp<OVERLOAD_2_LABEL>);
class GraphDefVersionOp : public OpKernel {
public:
GraphDefVersionOp(OpKernelConstruction* ctx)
: OpKernel(ctx), graph_def_version_(ctx->graph_def_version()) {}
void Compute(OpKernelContext* ctx) override {
Tensor* output;
OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &output));
output->scalar<int>()() = graph_def_version_;
}
private:
const int graph_def_version_;
};
REGISTER_KERNEL_BUILDER(Name("GraphDefVersion").Device(DEVICE_CPU),
GraphDefVersionOp);
} // end namespace tensorflow
......@@ -217,6 +217,14 @@ class UnsortedSegmentSumTest(SegmentReductionHelper):
self.assertAllClose(unsorted_jacob_t, sorted_jacob_t, rtol=1e-3, atol=1e-3)
self.assertAllClose(unsorted_jacob_n, sorted_jacob_n, rtol=1e-3, atol=1e-3)
def testBadIndices(self):
with self.test_session():
for bad in [[-1]], [[7]]:
unsorted = tf.unsorted_segment_sum([[17]], bad, num_segments=2)
with self.assertRaisesOpError(
r"segment_ids\[0,0\] = %d is out of range \[0, 2\)" % bad[0][0]):
unsorted.eval()
class SparseSegmentReductionHelper(SegmentReductionHelper):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册