提交 46e4de6e 编写于 作者: A A. Unique TensorFlower 提交者: TensorFlower Gardener

Undo loop fusion changes for now as they seem to be altering a few results.

END_PUBLIC
RELNOTES: n/a

BEGIN_PUBLIC
Automated g4 rollback of changelist 164825735

PiperOrigin-RevId: 164883478
上级 36c1e8b0
......@@ -463,16 +463,6 @@ cc_library(
],
)
cc_test(
name = "cpu_instruction_fusion_test",
size = "small",
srcs = ["cpu_instruction_fusion_test.cc"],
deps = [
":cpu_instruction_fusion",
"//tensorflow/compiler/xla/tests:hlo_test_base",
],
)
cc_library(
name = "cpu_parallelization_preparation",
srcs = ["cpu_parallelization_preparation.cc"],
......
......@@ -20,28 +20,6 @@ limitations under the License.
namespace xla {
namespace cpu {
namespace {
bool CanBeLoweredIntoElementalLoop(const HloInstruction& hlo) {
if (hlo.IsElementwise()) {
return hlo.operand_count() > 0;
}
// These non-elementwise ops have a lowering that generates the output for a
// specified element at a time.
return (hlo.opcode() == HloOpcode::kConcatenate ||
hlo.opcode() == HloOpcode::kReverse ||
hlo.opcode() == HloOpcode::kBroadcast ||
hlo.opcode() == HloOpcode::kSlice ||
hlo.opcode() == HloOpcode::kDynamicSlice ||
hlo.opcode() == HloOpcode::kDynamicUpdateSlice ||
hlo.opcode() == HloOpcode::kReshape ||
hlo.opcode() == HloOpcode::kTranspose ||
hlo.opcode() == HloOpcode::kPad);
}
} // namespace
bool CpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
int64 operand_index) {
HloInstruction* producer = consumer->mutable_operand(operand_index);
......@@ -51,33 +29,20 @@ bool CpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
return false;
}
// Condition for consumer: must act elementwise on the operand. This permits
// only elementwise ops or (potentially) fusion ops to act as consumers.
if (!consumer->IsElementwiseOnOperand(operand_index)) {
// Condition for consumer: must be elementwise or a fusion op
// (which necessarily only contains elementwise operations)
if (!(consumer->opcode() == HloOpcode::kFusion ||
consumer->IsElementwise())) {
return false;
}
// Producer or consumer cannot be Map. Maps are technically elementwise but of
// a slightly different form (call instead of a computation). These are not
// Producer or consumer cannot be Map. Maps are technically elementwise but
// of a slightly different form (call instead of a computation). These are not
// yet supported in the CPU backend.
if (producer->opcode() == HloOpcode::kMap ||
consumer->opcode() == HloOpcode::kMap) {
return false;
}
// Avoid dragging something that could otherwise be implemented as a
// bitcast into the loop.
if (producer->CouldBeBitcast()) {
return false;
}
// Check to make sure that the producer can generate output a specified
// element at a time.
if (!CanBeLoweredIntoElementalLoop(*producer)) {
return false;
}
return InstructionFusion::ShouldFuse(consumer, operand_index);
return producer->IsElementwise() && producer->operand_count() > 0 &&
producer->opcode() != HloOpcode::kMap &&
consumer->opcode() != HloOpcode::kMap &&
InstructionFusion::ShouldFuse(consumer, operand_index);
}
} // namespace cpu
......
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.h"
#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
namespace xla {
namespace cpu {
using InstructionFusionTest = HloTestBase;
TEST_F(InstructionFusionTest, BroadcastFused) {
HloComputation::Builder builder(TestName());
Shape param_shape = ShapeUtil::MakeShape(F32, {8});
Shape result_shape = ShapeUtil::MakeShape(F32, {8, 8});
auto param0 = builder.AddInstruction(
HloInstruction::CreateParameter(0, param_shape, "param"));
auto broadcast1 = builder.AddInstruction(
HloInstruction::CreateBroadcast(result_shape, param0, {1}));
builder.AddInstruction(HloInstruction::CreateUnary(
result_shape, HloOpcode::kNegate, broadcast1));
auto module = CreateNewModule();
module->AddEntryComputation(builder.Build());
auto computation = module->entry_computation();
auto did_fusion = CpuInstructionFusion().Run(module.get());
ASSERT_TRUE(did_fusion.ok());
EXPECT_TRUE(did_fusion.ValueOrDie());
HloInstruction* root = computation->root_instruction();
ASSERT_EQ(HloOpcode::kFusion, root->opcode());
EXPECT_EQ(root->fusion_kind(), HloInstruction::FusionKind::kLoop);
HloInstruction* fused_root = root->fused_expression_root();
EXPECT_EQ(HloOpcode::kNegate, fused_root->opcode());
EXPECT_EQ(HloOpcode::kBroadcast, fused_root->operand(0)->opcode());
}
TEST_F(InstructionFusionTest, SliceBeforeReverseNotFused) {
HloComputation::Builder builder(TestName());
Shape param_shape = ShapeUtil::MakeShape(F32, {8});
Shape slice_shape = ShapeUtil::MakeShape(F32, {4});
auto param0 = builder.AddInstruction(
HloInstruction::CreateParameter(0, param_shape, "param"));
// The (slice, reverse) pair can't be fused into a loop because reverse
// doesn't act elementwise on slice.
auto slice1 = builder.AddInstruction(
HloInstruction::CreateSlice(slice_shape, param0, {0}, {4}, {1}));
auto reverse2 = builder.AddInstruction(
HloInstruction::CreateReverse(slice_shape, slice1, {0}));
builder.AddInstruction(
HloInstruction::CreateUnary(slice_shape, HloOpcode::kNegate, reverse2));
auto module = CreateNewModule();
module->AddEntryComputation(builder.Build());
auto computation = module->entry_computation();
auto did_fusion = CpuInstructionFusion().Run(module.get());
ASSERT_TRUE(did_fusion.ok());
EXPECT_TRUE(did_fusion.ValueOrDie());
HloInstruction* root = computation->root_instruction();
ASSERT_EQ(HloOpcode::kFusion, root->opcode());
EXPECT_EQ(root->fusion_kind(), HloInstruction::FusionKind::kLoop);
HloInstruction* fused_root = root->fused_expression_root();
EXPECT_EQ(HloOpcode::kNegate, fused_root->opcode());
EXPECT_EQ(HloOpcode::kReverse, fused_root->operand(0)->opcode());
EXPECT_EQ(HloOpcode::kSlice, root->operand(0)->opcode());
}
} // namespace cpu
} // namespace xla
int main(int argc, char** argv) {
return xla::ParseDebugOptionsFlagsAndRunTests(argc, argv);
}
......@@ -420,182 +420,6 @@ XLA_TEST_F(FusionTest, Reverse) {
*ExecuteAndTransfer(std::move(hlo_module), {}));
}
XLA_TEST_F(FusionTest, ConcatenateNegate) {
auto builder = HloComputation::Builder(TestName());
auto hlo_module = CreateNewModule();
auto const0 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3})));
auto const1 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR1<int32>({4})));
auto concat2 = builder.AddInstruction(HloInstruction::CreateConcatenate(
ShapeUtil::MakeShape(S32, {4}), {const0, const1}, 0));
auto negate3 = builder.AddInstruction(HloInstruction::CreateUnary(
ShapeUtil::MakeShape(S32, {4}), HloOpcode::kNegate, concat2));
hlo_module->AddEntryComputation(builder.Build())
->CreateFusionInstruction(/*instructions_to_fuse=*/{negate3, concat2},
HloInstruction::FusionKind::kLoop);
LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-1, -2, -3, -4}),
*ExecuteAndTransfer(std::move(hlo_module), {}));
}
XLA_TEST_F(FusionTest, ReverseNegate) {
auto builder = HloComputation::Builder(TestName());
auto hlo_module = CreateNewModule();
auto const0 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3})));
auto reverse1 = builder.AddInstruction(HloInstruction::CreateReverse(
ShapeUtil::MakeShape(S32, {3}), const0, {0}));
auto negate2 = builder.AddInstruction(HloInstruction::CreateUnary(
ShapeUtil::MakeShape(S32, {3}), HloOpcode::kNegate, reverse1));
hlo_module->AddEntryComputation(builder.Build())
->CreateFusionInstruction(/*instructions_to_fuse=*/{negate2, reverse1},
HloInstruction::FusionKind::kLoop);
LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-3, -2, -1}),
*ExecuteAndTransfer(std::move(hlo_module), {}));
}
XLA_TEST_F(FusionTest, BroadcastNegate) {
auto builder = HloComputation::Builder(TestName());
auto hlo_module = CreateNewModule();
auto const0 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR0<int32>(1)));
auto broadcast1 = builder.AddInstruction(HloInstruction::CreateBroadcast(
ShapeUtil::MakeShape(S32, {2}), const0, {}));
auto negate2 = builder.AddInstruction(HloInstruction::CreateUnary(
ShapeUtil::MakeShape(S32, {2}), HloOpcode::kNegate, broadcast1));
hlo_module->AddEntryComputation(builder.Build())
->CreateFusionInstruction(/*instructions_to_fuse=*/{negate2, broadcast1},
HloInstruction::FusionKind::kLoop);
LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-1, -1}),
*ExecuteAndTransfer(std::move(hlo_module), {}));
}
XLA_TEST_F(FusionTest, SliceNegate) {
auto builder = HloComputation::Builder(TestName());
auto hlo_module = CreateNewModule();
auto const0 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3, 4})));
auto slice1 = builder.AddInstruction(HloInstruction::CreateSlice(
ShapeUtil::MakeShape(S32, {2}), const0, {0}, {4}, {2}));
auto negate2 = builder.AddInstruction(HloInstruction::CreateUnary(
ShapeUtil::MakeShape(S32, {2}), HloOpcode::kNegate, slice1));
hlo_module->AddEntryComputation(builder.Build())
->CreateFusionInstruction(/*instructions_to_fuse=*/{negate2, slice1},
HloInstruction::FusionKind::kLoop);
LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-1, -3}),
*ExecuteAndTransfer(std::move(hlo_module), {}));
}
XLA_TEST_F(FusionTest, DynamicSliceNegate) {
auto builder = HloComputation::Builder(TestName());
auto hlo_module = CreateNewModule();
auto const0 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3, 4})));
auto const1 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1})));
auto dynamic_slice2 =
builder.AddInstruction(HloInstruction::CreateDynamicSlice(
ShapeUtil::MakeShape(S32, {2}), const0, const1, {2}));
auto negate3 = builder.AddInstruction(HloInstruction::CreateUnary(
ShapeUtil::MakeShape(S32, {2}), HloOpcode::kNegate, dynamic_slice2));
hlo_module->AddEntryComputation(builder.Build())
->CreateFusionInstruction(
/*instructions_to_fuse=*/{negate3, dynamic_slice2},
HloInstruction::FusionKind::kLoop);
LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-2, -3}),
*ExecuteAndTransfer(std::move(hlo_module), {}));
}
XLA_TEST_F(FusionTest, DynamicUpdateSliceNegate) {
auto builder = HloComputation::Builder(TestName());
auto hlo_module = CreateNewModule();
auto const0 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3, 4})));
auto const1 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR1<int32>({6, 7})));
auto const2 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1})));
auto dynamic_update_slice3 =
builder.AddInstruction(HloInstruction::CreateDynamicUpdateSlice(
ShapeUtil::MakeShape(S32, {4}), const0, const1, const2));
auto negate4 = builder.AddInstruction(
HloInstruction::CreateUnary(ShapeUtil::MakeShape(S32, {4}),
HloOpcode::kNegate, dynamic_update_slice3));
hlo_module->AddEntryComputation(builder.Build())
->CreateFusionInstruction(
/*instructions_to_fuse=*/{negate4, dynamic_update_slice3},
HloInstruction::FusionKind::kLoop);
LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-1, -6, -7, -4}),
*ExecuteAndTransfer(std::move(hlo_module), {}));
}
XLA_TEST_F(FusionTest, ReshapeNegate) {
auto builder = HloComputation::Builder(TestName());
auto hlo_module = CreateNewModule();
auto const0 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3, 4})));
auto reshape1 = builder.AddInstruction(
HloInstruction::CreateReshape(ShapeUtil::MakeShape(S32, {2, 2}), const0));
auto negate2 = builder.AddInstruction(HloInstruction::CreateUnary(
ShapeUtil::MakeShape(S32, {2, 2}), HloOpcode::kNegate, reshape1));
hlo_module->AddEntryComputation(builder.Build())
->CreateFusionInstruction(/*instructions_to_fuse=*/{negate2, reshape1},
HloInstruction::FusionKind::kLoop);
LiteralTestUtil::ExpectEqual(*Literal::CreateR2<int32>({{-1, -2}, {-3, -4}}),
*ExecuteAndTransfer(std::move(hlo_module), {}));
}
// TODO(b/64070202): Investigate failure.
XLA_TEST_F(FusionTest, DISABLED_ON_GPU(TransposeNegate)) {
auto builder = HloComputation::Builder(TestName());
auto hlo_module = CreateNewModule();
auto const0 = builder.AddInstruction(HloInstruction::CreateConstant(
Literal::CreateR2<int32>({{1, 2}, {3, 4}})));
auto transpose1 = builder.AddInstruction(HloInstruction::CreateTranspose(
ShapeUtil::MakeShape(S32, {2, 2}), const0, {1, 0}));
auto negate2 = builder.AddInstruction(HloInstruction::CreateUnary(
ShapeUtil::MakeShape(S32, {2, 2}), HloOpcode::kNegate, transpose1));
hlo_module->AddEntryComputation(builder.Build())
->CreateFusionInstruction(/*instructions_to_fuse=*/{negate2, transpose1},
HloInstruction::FusionKind::kLoop);
LiteralTestUtil::ExpectEqual(*Literal::CreateR2<int32>({{-1, -3}, {-2, -4}}),
*ExecuteAndTransfer(std::move(hlo_module), {}));
}
XLA_TEST_F(FusionTest, PadNegate) {
auto builder = HloComputation::Builder(TestName());
auto hlo_module = CreateNewModule();
auto const0 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3, 4})));
auto const1 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR0<int32>(-1)));
// Set up the padding configuration {low: 1, high: 1, interior: 1}.
PaddingConfig padding_config;
auto dimension = padding_config.add_dimensions();
dimension->set_edge_padding_low(1);
dimension->set_edge_padding_high(1);
dimension->set_interior_padding(1);
auto pad2 = builder.AddInstruction(HloInstruction::CreatePad(
ShapeUtil::MakeShape(S32, {9}), const0, const1, padding_config));
auto negate3 = builder.AddInstruction(HloInstruction::CreateUnary(
ShapeUtil::MakeShape(S32, {9}), HloOpcode::kNegate, pad2));
hlo_module->AddEntryComputation(builder.Build())
->CreateFusionInstruction(/*instructions_to_fuse=*/{negate3, pad2},
HloInstruction::FusionKind::kLoop);
LiteralTestUtil::ExpectEqual(
*Literal::CreateR1<int32>({1, -1, 1, -2, 1, -3, 1, -4, 1}),
*ExecuteAndTransfer(std::move(hlo_module), {}));
}
std::unique_ptr<HloComputation> MakeReduceTestComputation() {
auto builder = HloComputation::Builder("add");
auto lhs = builder.AddInstruction(HloInstruction::CreateParameter(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册