提交 73603a0a 编写于 作者: S Shenghang Tsai 提交者: GitHub

upgrade XRT XLA to TF 2.3.0 (#3531)

* compile tf 2.3.0 with gcc 7.3

* fix oneflow eigen

* minor fix

* fix include

* update protobuf if xla is on

* update path of tf proto generated cpp files

* fix path in script

* add .clangd to git ignore

* update xla ifs

* update scripts

* update path in script for clangd

* add gitignore

* add cmake flag XRT_TF_URL

* rm comment

* check in changes

* bash tricks to enable gcc 7.3

* use arg to control tuna

* bumpversion

* fix build wheel

* use real path

* add dir for cpu

* fix unwanted yum update cublas

* uncomment all

* rm suffix of wheelhouse_dir

* add log info
Co-authored-by: Ntsai <caishenghang@1f-dev.kbaeegfb1x0ubnoznzequyxzve.bx.internal.cloudapp.net>
Co-authored-by: Ntsai <caishenghang@oneflow.org>
Former-commit-id: da12e8db
上级 7f8aae21
......@@ -21,3 +21,5 @@ wheelhouse*
/oneflow/python/__export_symbols__.py
/oneflow/python/compatibility.py
/oneflow/python/framework/sysconfig_gen.py
.clangd
compile_commands.json
......@@ -214,6 +214,7 @@ include_directories(${ONEFLOW_INCLUDE_SRC_DIRS})
if(WITH_XLA)
list(APPEND oneflow_third_party_dependencies tensorflow_copy_libs_to_destination)
list(APPEND oneflow_third_party_dependencies tensorflow_symlink_headers)
list(APPEND oneflow_third_party_libs ${TENSORFLOW_XLA_LIBRARIES})
endif()
......
......@@ -4,8 +4,8 @@ set(EIGEN_INCLUDE_DIR ${THIRD_PARTY_DIR}/eigen/include/eigen3)
set(EIGEN_INSTALL_DIR ${THIRD_PARTY_DIR}/eigen)
if(WITH_XLA)
#set(EIGEN_URL "https://storage.googleapis.com/mirror.tensorflow.org/bitbucket.org/eigen/eigen/get/8071cda5714d.tar.gz")
set(EIGEN_URL "https://bitbucket.org/eigen/eigen/get/8071cda5714d.tar.gz")
#set(EIGEN_URL "https://storage.googleapis.com/mirror.tensorflow.org/gitlab.com/libeigen/eigen/-/archive/386d809bde475c65b7940f290efe80e6a05878c4/eigen-386d809bde475c65b7940f290efe80e6a05878c4.tar.gz")
set(EIGEN_URL "https://gitlab.com/libeigen/eigen/-/archive/386d809bde475c65b7940f290efe80e6a05878c4/eigen-386d809bde475c65b7940f290efe80e6a05878c4.tar.gz")
else()
set(EIGEN_URL ${THIRD_PARTY_SUBMODULE_DIR}/eigen/src/eigen)
endif()
......@@ -17,7 +17,7 @@ endif()
#add_definitions(-DEIGEN_NO_AUTOMATIC_RESIZING -DEIGEN_NO_MALLOC -DEIGEN_USE_GPU)
if (THIRD_PARTY)
ExternalProject_Add(eigen
PREFIX eigen
URL ${EIGEN_URL}
......
......@@ -6,7 +6,7 @@ set(PROTOBUF_BINARY_DIR ${THIRD_PARTY_DIR}/protobuf/bin)
set(PROTOBUF_SRC_DIR ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src)
if(WITH_XLA)
set(PROTOBUF_URL "https://storage.googleapis.com/mirror.tensorflow.org/github.com/protocolbuffers/protobuf/archive/310ba5ee72661c081129eb878c1bbcec936b20f0.tar.gz")
set(PROTOBUF_URL "https://github.com/protocolbuffers/protobuf/archive/v3.9.2.zip")
else()
set(PROTOBUF_URL ${THIRD_PARTY_SUBMODULE_DIR}/protobuf/src/protobuf)
endif()
......
......@@ -36,8 +36,7 @@ set(TENSORFLOW_INSTALL_DIR ${THIRD_PARTY_DIR}/tensorflow)
set(PATCHES_DIR ${PROJECT_SOURCE_DIR}/oneflow/xrt/patches)
set(TENSORFLOW_JIT_DIR ${TENSORFLOW_SRCS_DIR}/tensorflow/compiler/jit)
set(TENSORFLOW_GEN_DIR ${TENSORFLOW_SRCS_DIR}/bazel-out/${TENSORFLOW_GENFILE_DIR}/genfiles)
set(TENSORFLOW_GEN_DIR ${TENSORFLOW_SRCS_DIR}/bazel-out/${TENSORFLOW_GENFILE_DIR}/bin)
set(TENSORFLOW_EXTERNAL_DIR ${TENSORFLOW_SRCS_DIR}/bazel-tensorflow/external)
set(THIRD_ABSL_DIR ${TENSORFLOW_EXTERNAL_DIR}/com_google_absl)
set(THIRD_PROTOBUF_DIR ${TENSORFLOW_EXTERNAL_DIR}/com_google_protobuf/src)
......@@ -54,36 +53,68 @@ list(APPEND TENSORFLOW_XLA_INCLUDE_DIR
${THIRD_SNAPPY_DIR}
${THIRD_RE2_DIR}
)
include_directories(${TENSORFLOW_XLA_INCLUDE_DIR})
list(APPEND TENSORFLOW_XLA_INCLUDE_INSTALL_DIR
"${TENSORFLOW_INSTALL_DIR}/include/tensorflow_inc"
"${TENSORFLOW_INSTALL_DIR}/include/tensorflow_gen"
"${TENSORFLOW_INSTALL_DIR}/include/absl"
"${TENSORFLOW_INSTALL_DIR}/include/protobuf"
"${TENSORFLOW_INSTALL_DIR}/include/boringssl"
"${TENSORFLOW_INSTALL_DIR}/include/snappy"
"${TENSORFLOW_INSTALL_DIR}/include/re2"
)
list(APPEND TENSORFLOW_XLA_LIBRARIES libtensorflow_framework.so.1)
list(APPEND TENSORFLOW_XLA_LIBRARIES libxla_core.so)
link_directories(${TENSORFLOW_INSTALL_DIR}/lib)
if(NOT XRT_TF_URL)
set(XRT_TF_URL https://github.com/Oneflow-Inc/tensorflow/archive/1f_dep_v2.3.0r4.zip)
endif()
if (THIRD_PARTY)
ExternalProject_Add(${TENSORFLOW_PROJECT}
PREFIX ${TENSORFLOW_SOURCES_DIR}
GIT_REPOSITORY ${TENSORFLOW_GIT_URL}
GIT_TAG ${TENSORFLOW_GIT_TAG}
URL ${XRT_TF_URL}
CONFIGURE_COMMAND ""
BUILD_COMMAND cd ${TENSORFLOW_SRCS_DIR} &&
bazel build ${TENSORFLOW_BUILD_CMD} -j 20 //tensorflow/compiler/jit/xla_lib:libxla_core.so
bazel build ${TENSORFLOW_BUILD_CMD} -j HOST_CPUS //tensorflow/compiler/jit/xla_lib:libxla_core.so
INSTALL_COMMAND ""
)
set(TENSORFLOW_XLA_FRAMEWORK_LIB ${TENSORFLOW_SRCS_DIR}/bazel-bin/tensorflow/libtensorflow_framework.so.1)
set(TENSORFLOW_XLA_CORE_LIB ${TENSORFLOW_SRCS_DIR}/bazel-bin/tensorflow/compiler/jit/xla_lib/libxla_core.so)
set(TENSORFLOW_XLA_FRAMEWORK_LIB ${TENSORFLOW_SRCS_DIR}/bazel-bin/tensorflow/libtensorflow_framework.so.2)
set(TENSORFLOW_XLA_CORE_LIB ${TENSORFLOW_SRCS_DIR}/bazel-bin/tensorflow/compiler/jit/xla_lib/libxla_core.so)
add_custom_target(tensorflow_create_library_dir
COMMAND ${CMAKE_COMMAND} -E make_directory ${TENSORFLOW_INSTALL_DIR}/lib
DEPENDS ${TENSORFLOW_PROJECT})
add_custom_target(tensorflow_create_library_dir
COMMAND ${CMAKE_COMMAND} -E make_directory ${TENSORFLOW_INSTALL_DIR}/lib
DEPENDS ${TENSORFLOW_PROJECT})
add_custom_target(tensorflow_copy_libs_to_destination
COMMAND ${CMAKE_COMMAND} -E copy_if_different
${TENSORFLOW_XLA_FRAMEWORK_LIB} ${TENSORFLOW_XLA_CORE_LIB} ${TENSORFLOW_INSTALL_DIR}/lib
COMMAND ${CMAKE_COMMAND} -E create_symlink
${TENSORFLOW_INSTALL_DIR}/lib/libtensorflow_framework.so.2
${TENSORFLOW_INSTALL_DIR}/lib/libtensorflow_framework.so
DEPENDS tensorflow_create_library_dir)
add_custom_target(tensorflow_create_include_dir
COMMAND ${CMAKE_COMMAND} -E make_directory ${TENSORFLOW_INSTALL_DIR}/include
DEPENDS ${TENSORFLOW_PROJECT})
add_custom_target(tensorflow_symlink_headers
DEPENDS tensorflow_create_include_dir)
foreach(src_dst_pair IN ZIP_LISTS TENSORFLOW_XLA_INCLUDE_DIR TENSORFLOW_XLA_INCLUDE_INSTALL_DIR)
set(src ${src_dst_pair_0})
set(dst ${src_dst_pair_1})
add_custom_command(TARGET tensorflow_symlink_headers
COMMAND ${CMAKE_COMMAND} -E create_symlink
${src}
${dst}
)
endforeach()
add_custom_target(tensorflow_copy_libs_to_destination
COMMAND ${CMAKE_COMMAND} -E copy_if_different
${TENSORFLOW_XLA_FRAMEWORK_LIB} ${TENSORFLOW_XLA_CORE_LIB} ${TENSORFLOW_INSTALL_DIR}/lib
COMMAND ${CMAKE_COMMAND} -E create_symlink
${TENSORFLOW_INSTALL_DIR}/lib/libtensorflow_framework.so.1
${TENSORFLOW_INSTALL_DIR}/lib/libtensorflow_framework.so
DEPENDS tensorflow_create_library_dir)
endif(THIRD_PARTY)
include_directories(${TENSORFLOW_XLA_INCLUDE_INSTALL_DIR})
endif(WITH_XLA)
ARG from
ARG use_tuna_yum=1
ARG pip_args="-i https://pypi.tuna.tsinghua.edu.cn/simple"
FROM ${from}
LABEL maintainer="OneFlow Maintainers"
......@@ -13,9 +15,11 @@ ENV LD_LIBRARY_PATH /usr/local/lib64:/usr/local/lib
ENV PKG_CONFIG_PATH /usr/local/lib/pkgconfig
# use tuna mirror
COPY docker/package/manylinux/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo
RUN yum makecache
COPY docker/package/manylinux/CentOS-Base.repo /tmp/CentOS-Base.repo
RUN if [ "${use_tuna}" = "1" ]; then mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/ && yum makecache ; fi
# in 10.1, cuda yum repo will update cublas to 10.2 and breaks build
RUN yum-config-manager --disable cuda
ARG MANYLINUX_SHA=f5da004
RUN yum -y install unzip && curl -L -o manylinux.zip https://github.com/pypa/manylinux/archive/${MANYLINUX_SHA}.zip && unzip manylinux.zip -d tmp && cp -r tmp/*/docker/build_scripts /build_scripts && bash build_scripts/build.sh && rm -r build_scripts tmp manylinux.zip
......@@ -25,10 +29,10 @@ ENV SSL_CERT_FILE=/opt/_internal/certs.pem
RUN yum-config-manager --add-repo https://yum.repos.intel.com/setup/intelproducts.repo && \
rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB && \
yum update -y && yum install -y epel-release && \
yum install -y intel-mkl-64bit-2020.0-088 nasm swig rdma-core-devel
yum -y install centos-release-scl && \
yum install -y intel-mkl-64bit-2020.0-088 nasm swig rdma-core-devel devtoolset-7-gcc*
ENV TUNA_INDEX="-i https://pypi.tuna.tsinghua.edu.cn/simple"
RUN /opt/python/cp35-cp35m/bin/pip install $TUNA_INDEX -U cmake && ln -s /opt/_internal/cpython-3.5.9/bin/cmake /usr/bin/cmake
RUN /opt/python/cp35-cp35m/bin/pip install $pip_args -U cmake && ln -s /opt/_internal/cpython-3.5.9/bin/cmake /usr/bin/cmake
# overwrite patchelf to fix "maximum size exceed" problem
RUN mkdir -p /tmp && curl -L -o 0.11.zip https://github.com/NixOS/patchelf/archive/0.11.zip && unzip 0.11.zip && cd patchelf-0.11 && sed -i 's/32/64/g' src/patchelf.cc && ./bootstrap.sh && ./configure && make -j`nproc` && make install && cd .. && rm -rf patchelf-0.11 0.11.zip
......@@ -40,4 +44,10 @@ RUN /opt/python/cp35-cp35m/bin/pip install $TUNA_INDEX -r /tmp/dev-requirements.
&& /opt/python/cp38-cp38/bin/pip install $TUNA_INDEX -r /tmp/dev-requirements.txt --user \
&& rm /tmp/dev-requirements.txt
RUN curl -L https://github.com/bazelbuild/bazel/releases/download/3.4.1/bazel-3.4.1-linux-x86_64 -o /usr/local/bin/bazel \
&& chmod +x /usr/local/bin/bazel \
&& bazel
RUN echo "source scl_source enable devtoolset-7" >> ~/.bashrc
CMD ["/oneflow-src/docker/package/manylinux/build_wheel.sh"]
......@@ -48,6 +48,14 @@ fi
cd $ONEFLOW_SRC_DIR
# TF requires py3 to build
export PATH=/opt/python/cp37-cp37m/bin:$PATH
python --version
gcc --version
# specify a mounted dir as bazel cache dir
export TEST_TMPDIR=$CACHE_DIR/bazel_cache
THIRD_PARTY_BUILD_DIR=$CACHE_DIR/build-third-party
THIRD_PARTY_INSTALL_DIR=$CACHE_DIR/build-third-party-install
COMMON_CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release -DBUILD_RDMA=ON -DTHIRD_PARTY_DIR=$THIRD_PARTY_INSTALL_DIR"
......@@ -86,9 +94,10 @@ do
rm -rf $ONEFLOW_BUILD_DIR/python_scripts/oneflow/*.so
rm -rf $ONEFLOW_SRC_DIR/build/bdist.linux-x86_64
rm -rf $ONEFLOW_SRC_DIR/build/lib
cmake -DTHIRD_PARTY=OFF -DONEFLOW=ON\
cmake -DTHIRD_PARTY=OFF -DONEFLOW=ON \
-DCMAKE_EXPORT_COMPILE_COMMANDS=1 \
$COMMON_CMAKE_ARGS \
-DPython3_ROOT_DIR=$PY_ROOT \
-DPython3_EXECUTABLE=${PY_BIN} \
$EXTRA_ONEFLOW_CMAKE_ARGS \
$ONEFLOW_SRC_DIR
cmake --build . -j `nproc`
......
set -ex
ONEFLOW_SRC_DIR=${ONEFLOW_SRC_DIR:-${PWD}}
wheelhouse_dir=${ONEFLOW_SRC_DIR}/wheelhouse-xla
# TF requires py3 to build
PY_ROOT=/opt/python/cp37-cp37m
PY_BIN=${PY_ROOT}/bin
export PATH=$PY_BIN:$PATH
python --version
source scl_source enable devtoolset-7
cache_dir=$ONEFLOW_SRC_DIR/manylinux2014-build-cache-cuda-10.2-xla
cache_dir=$ONEFLOW_SRC_DIR/manylinux2014-build-cache-cuda-11.0-xla
export TEST_TMPDIR=$cache_dir/bazel_cache
gcc --version
bash docker/package/manylinux/build_wheel.sh \
--python3.6 \
--cache-dir $cache_dir \
--house-dir $wheelhouse_dir \
-DCMAKE_EXPORT_COMPILE_COMMANDS=1 \
-DWITH_XLA=ON
set -ex
docker run --rm -it \
-v `pwd`:`pwd` \
-w `pwd` oneflow:rel-manylinux2014-cuda-11.0 bash
set -ex
wheelhouse_dir=/oneflow-src/wheelhouse
wheelhouse_dir=`pwd`/wheelhouse
package_name=oneflow
tuna_build_args=""
tuna_build_args="--build-arg use_tuna_yum=0 --build-arg pip_args="""
function release() {
set -ex
docker_tag=oneflow:rel-manylinux2014-cuda-$1
......@@ -12,22 +15,41 @@ function release() {
else
cudnn_version=7
fi
docker build --build-arg from=nvidia/cuda:$1-cudnn${cudnn_version}-devel-centos7 -f docker/package/manylinux/Dockerfile -t $docker_tag .
docker run --rm -it -v `pwd`:/oneflow-src -w /oneflow-src $docker_tag \
/oneflow-src/docker/package/manylinux/build_wheel.sh --cache-dir /oneflow-src/manylinux2014-build-cache-cuda-$1 \
--house-dir $wheelhouse_dir \
docker build --build-arg from=nvidia/cuda:$1-cudnn${cudnn_version}-devel-centos7 \
${tuna_build_args} \
-f docker/package/manylinux/Dockerfile -t $docker_tag .
docker run --rm -it -v `pwd`:`pwd` -w `pwd` $docker_tag \
docker/package/manylinux/build_wheel.sh --cache-dir `pwd`/manylinux2014-build-cache-cuda-$1 \
--house-dir ${wheelhouse_dir} \
--package-name ${package_name}_cu`echo $1 | tr -d .`
}
function release_cpu() {
docker run --rm -it -v `pwd`:/oneflow-src -w /oneflow-src oneflow:rel-manylinux2014-cuda-10.2 \
/oneflow-src/docker/package/manylinux/build_wheel.sh --cache-dir /oneflow-src/manylinux2014-build-cache-cpu \
--house-dir $wheelhouse_dir \
docker run --rm -it -v `pwd`:`pwd` -w `pwd` oneflow:rel-manylinux2014-cuda-10.2 \
docker/package/manylinux/build_wheel.sh --cache-dir `pwd`/manylinux2014-build-cache-cpu \
--house-dir ${wheelhouse_dir} \
-DBUILD_CUDA=OFF \
--package-name "${package_name}_cpu"
}
release_cpu
function release_xla() {
set -ex
docker_tag=oneflow:rel-manylinux2014-cuda-$1
if [ "$1" == "11.0" ]; then
cudnn_version=8
else
cudnn_version=7
fi
docker build --build-arg from=nvidia/cuda:$1-cudnn${cudnn_version}-devel-centos7 \
${tuna_build_args} \
-f docker/package/manylinux/Dockerfile -t $docker_tag .
docker run --rm -it -v `pwd`:`pwd` -w `pwd` $docker_tag \
bash -l docker/package/manylinux/build_wheel.sh --cache-dir `pwd`/manylinux2014-build-cache-cuda-$1-xla \
--house-dir ${wheelhouse_dir} \
--package-name ${package_name}_cu`echo $1 | tr -d .`_xla \
-DWITH_XLA=ON
}
release 11.0
release 10.2
release 10.1
......@@ -35,3 +57,11 @@ release 10.0
release 9.2
release 9.1
release 9.0
release_cpu
release_xla 11.0
release_xla 10.2
release_xla 10.1
release_xla 10.0
# failed to build XLA with CUDA 9.X
......@@ -87,7 +87,9 @@ def compare_with_tensorflow_rmsprop(
gradients = tape.gradient(loss, var)
opt.apply_gradients(zip([gradients], [var]))
assert np.allclose(x.flatten(), var.numpy().flatten(), rtol=1e-3, atol=1e-3,)
assert np.allclose(x.flatten(), var.numpy().flatten(), rtol=1e-3, atol=1e-3,), (
x.flatten() - var.numpy().flatten()
)
def compare_with_tensorflow_adam(
......
......@@ -13,4 +13,4 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
__version__ = "0.1.10"
__version__ = "0.1.11b1"
......@@ -38,7 +38,8 @@ XlaAllocator::XlaAllocator(const se::Platform *platform, DeviceBufferAllocator *
XlaAllocator::~XlaAllocator() {}
xla::StatusOr<se::OwningDeviceMemory> XlaAllocator::Allocate(int device_ordinal, uint64 size,
bool retry_on_failure) {
bool retry_on_failure,
int64 /*memory_space*/) {
se::DeviceMemoryBase memory_base;
if (allocate_index_ < populated_buffers_.size()
&& populated_buffers_[allocate_index_].populated) {
......
......@@ -16,6 +16,7 @@ limitations under the License.
#ifndef ONEFLOW_XRT_XLA_XLA_ALLOCATOR_H_
#define ONEFLOW_XRT_XLA_XLA_ALLOCATOR_H_
#include "oneflow/core/common/util.h"
#include "oneflow/xrt/xla/memory/device_buffer_allocator.h"
#include "tensorflow/compiler/xla/statusor.h"
......@@ -28,14 +29,16 @@ namespace mola {
namespace se = tensorflow::se;
using uint64 = tensorflow::uint64;
using int64 = tensorflow::int64;
class XlaAllocator : public se::DeviceMemoryAllocator {
public:
explicit XlaAllocator(const se::Platform *platform, DeviceBufferAllocator *allocator);
virtual ~XlaAllocator();
using se::DeviceMemoryAllocator::Allocate;
xla::StatusOr<se::OwningDeviceMemory> Allocate(int device_ordinal, uint64 size,
bool retry_on_failure) override;
bool retry_on_failure,
int64 /*memory_space*/) override;
tensorflow::Status Deallocate(int device_ordinal, se::DeviceMemoryBase mem) override;
bool AllowsAsynchronousDeallocation() const override { return true; }
......@@ -47,6 +50,10 @@ class XlaAllocator : public se::DeviceMemoryAllocator {
void PopulateDeviceMemory(const std::vector<se::DeviceMemoryBase> &device_buffers,
const std::vector<int64_t> &allocation_indices);
stream_executor::port::StatusOr<stream_executor::Stream *> GetStream(
int device_ordinal) override {
UNIMPLEMENTED();
};
private:
DeviceBufferAllocator *allocator_;
......
......@@ -137,10 +137,11 @@ std::shared_ptr<Executable> XlaGraphCompiler::BuildExecutable(
xla::ExecutableBuildOptions build_options;
build_options.set_device_ordinal(this->device_ordinal_);
build_options.set_result_layout(xla_output_shape);
MOLA_CHECK_AND_ASSIGN(auto executable,
MOLA_CHECK_AND_ASSIGN(auto executables,
client->Compile(computation, argument_layouts, build_options));
return std::make_shared<XlaExecutable>(builder_->name(), this->device_, xla_input_shapes, xla_output_shape,
std::move(executable));
CHECK(executables.size() == 1);
return std::make_shared<XlaExecutable>(builder_->name(), this->device_, xla_input_shapes,
xla_output_shape, std::move(executables.at(0)));
}
void XlaGraphCompiler::BuildEntryParameters(const std::vector<Parameter> &entry_params,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册