提交 61d520b8 编写于 作者: G George Karpenkov 提交者: TensorFlower Gardener

[XLA:GPU] [NFC] Remove unneeded gpu_constants.cc file

PiperOrigin-RevId: 481152442
上级 85ea1f4f
......@@ -80,7 +80,6 @@ cc_library(
cc_library(
name = "gpu_constants",
srcs = ["gpu_constants.cc"],
hdrs = ["gpu_constants.h"],
deps = [
"//tensorflow/compiler/xla:types",
......
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/compiler/xla/service/gpu/gpu_constants.h"
#include "tensorflow/tsl/framework/allocator.h"
namespace xla {
namespace gpu {
// kEntryParameterAlignBytes is equal to EIGEN_MAX_ALIGN_BYTES, though including
// Eigen headers here to get that symbol may not be a good idea.
// EIGEN_MAX_ALIGN_BYTES may differ between CUDA-enabled builds vs CUDA-disabled
// builds and we don't want the IR generated by XLA:GPU to depend on that.
//
// TODO(b/111767313): Consider raising EIGEN_MAX_ALIGN_BYTES if it helps.
const int64_t kEntryParameterAlignBytes = 16;
// cudnn requires 128-bit (16-byte) alignment for TensorCore operations, but
// says that 1024-bit (128-byte) alignment "may deliver better performance".
// https://docs.nvidia.com/deeplearning/cudnn/developer-guide/index.html#tensor-ops-guidelines-for-dl-compiler
const int64_t kXlaAllocatedBufferAlignBytes = 128;
const int64_t kConstantBufferAlignBytes = kXlaAllocatedBufferAlignBytes;
} // namespace gpu
} // namespace xla
......@@ -16,20 +16,33 @@ limitations under the License.
#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_CONSTANTS_H_
#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_CONSTANTS_H_
#include "tensorflow/compiler/xla/types.h"
#include <cstdint>
namespace xla {
namespace gpu {
// Minimum alignment for buffers passed as incoming arguments by TensorFlow.
extern const int64_t kEntryParameterAlignBytes;
//
// kEntryParameterAlignBytes is equal to EIGEN_MAX_ALIGN_BYTES, though including
// Eigen headers here to get that symbol may not be a good idea.
// EIGEN_MAX_ALIGN_BYTES may differ between CUDA-enabled builds vs CUDA-disabled
// builds and we don't want the IR generated by XLA:GPU to depend on that.
//
// TODO(b/111767313): Consider raising EIGEN_MAX_ALIGN_BYTES if it helps.
inline constexpr int64_t kEntryParameterAlignBytes = 16;
// Minimum alignment for buffers allocated by XLA: the temp buffers and the live
// out (result) buffers.
extern const int64_t kXlaAllocatedBufferAlignBytes;
//
// cudnn requires 128-bit (16-byte) alignment for TensorCore operations, but
// says that 1024-bit (128-byte) alignment "may deliver better performance".
// https://docs.nvidia.com/deeplearning/cudnn/developer-guide/index.html#tensor-ops-guidelines-for-dl-compiler
//
inline constexpr int64_t kXlaAllocatedBufferAlignBytes = 128;
// Minimum alignment for constant buffers.
extern const int64_t kConstantBufferAlignBytes;
inline constexpr int64_t kConstantBufferAlignBytes =
kXlaAllocatedBufferAlignBytes;
} // namespace gpu
} // namespace xla
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册