提交 73603a0a 编写于 作者: S Shenghang Tsai 提交者: GitHub

upgrade XRT XLA to TF 2.3.0 (#3531)

* compile tf 2.3.0 with gcc 7.3

* fix oneflow eigen

* minor fix

* fix include

* update protobuf if xla is on

* update path of tf proto generated cpp files

* fix path in script

* add .clangd to git ignore

* update xla ifs

* update scripts

* update path in script for clangd

* add gitignore

* add cmake flag XRT_TF_URL

* rm comment

* check in changes

* bash tricks to enable gcc 7.3

* use arg to control tuna

* bumpversion

* fix build wheel

* use real path

* add dir for cpu

* fix unwanted yum update cublas

* uncomment all

* rm suffix of wheelhouse_dir

* add log info
Co-authored-by: Ntsai <caishenghang@1f-dev.kbaeegfb1x0ubnoznzequyxzve.bx.internal.cloudapp.net>
Co-authored-by: Ntsai <caishenghang@oneflow.org>
Former-commit-id: da12e8db
上级 7f8aae21
...@@ -21,3 +21,5 @@ wheelhouse* ...@@ -21,3 +21,5 @@ wheelhouse*
/oneflow/python/__export_symbols__.py /oneflow/python/__export_symbols__.py
/oneflow/python/compatibility.py /oneflow/python/compatibility.py
/oneflow/python/framework/sysconfig_gen.py /oneflow/python/framework/sysconfig_gen.py
.clangd
compile_commands.json
...@@ -214,6 +214,7 @@ include_directories(${ONEFLOW_INCLUDE_SRC_DIRS}) ...@@ -214,6 +214,7 @@ include_directories(${ONEFLOW_INCLUDE_SRC_DIRS})
if(WITH_XLA) if(WITH_XLA)
list(APPEND oneflow_third_party_dependencies tensorflow_copy_libs_to_destination) list(APPEND oneflow_third_party_dependencies tensorflow_copy_libs_to_destination)
list(APPEND oneflow_third_party_dependencies tensorflow_symlink_headers)
list(APPEND oneflow_third_party_libs ${TENSORFLOW_XLA_LIBRARIES}) list(APPEND oneflow_third_party_libs ${TENSORFLOW_XLA_LIBRARIES})
endif() endif()
......
...@@ -4,8 +4,8 @@ set(EIGEN_INCLUDE_DIR ${THIRD_PARTY_DIR}/eigen/include/eigen3) ...@@ -4,8 +4,8 @@ set(EIGEN_INCLUDE_DIR ${THIRD_PARTY_DIR}/eigen/include/eigen3)
set(EIGEN_INSTALL_DIR ${THIRD_PARTY_DIR}/eigen) set(EIGEN_INSTALL_DIR ${THIRD_PARTY_DIR}/eigen)
if(WITH_XLA) if(WITH_XLA)
#set(EIGEN_URL "https://storage.googleapis.com/mirror.tensorflow.org/bitbucket.org/eigen/eigen/get/8071cda5714d.tar.gz") #set(EIGEN_URL "https://storage.googleapis.com/mirror.tensorflow.org/gitlab.com/libeigen/eigen/-/archive/386d809bde475c65b7940f290efe80e6a05878c4/eigen-386d809bde475c65b7940f290efe80e6a05878c4.tar.gz")
set(EIGEN_URL "https://bitbucket.org/eigen/eigen/get/8071cda5714d.tar.gz") set(EIGEN_URL "https://gitlab.com/libeigen/eigen/-/archive/386d809bde475c65b7940f290efe80e6a05878c4/eigen-386d809bde475c65b7940f290efe80e6a05878c4.tar.gz")
else() else()
set(EIGEN_URL ${THIRD_PARTY_SUBMODULE_DIR}/eigen/src/eigen) set(EIGEN_URL ${THIRD_PARTY_SUBMODULE_DIR}/eigen/src/eigen)
endif() endif()
......
...@@ -6,7 +6,7 @@ set(PROTOBUF_BINARY_DIR ${THIRD_PARTY_DIR}/protobuf/bin) ...@@ -6,7 +6,7 @@ set(PROTOBUF_BINARY_DIR ${THIRD_PARTY_DIR}/protobuf/bin)
set(PROTOBUF_SRC_DIR ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src) set(PROTOBUF_SRC_DIR ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src)
if(WITH_XLA) if(WITH_XLA)
set(PROTOBUF_URL "https://storage.googleapis.com/mirror.tensorflow.org/github.com/protocolbuffers/protobuf/archive/310ba5ee72661c081129eb878c1bbcec936b20f0.tar.gz") set(PROTOBUF_URL "https://github.com/protocolbuffers/protobuf/archive/v3.9.2.zip")
else() else()
set(PROTOBUF_URL ${THIRD_PARTY_SUBMODULE_DIR}/protobuf/src/protobuf) set(PROTOBUF_URL ${THIRD_PARTY_SUBMODULE_DIR}/protobuf/src/protobuf)
endif() endif()
......
...@@ -36,8 +36,7 @@ set(TENSORFLOW_INSTALL_DIR ${THIRD_PARTY_DIR}/tensorflow) ...@@ -36,8 +36,7 @@ set(TENSORFLOW_INSTALL_DIR ${THIRD_PARTY_DIR}/tensorflow)
set(PATCHES_DIR ${PROJECT_SOURCE_DIR}/oneflow/xrt/patches) set(PATCHES_DIR ${PROJECT_SOURCE_DIR}/oneflow/xrt/patches)
set(TENSORFLOW_JIT_DIR ${TENSORFLOW_SRCS_DIR}/tensorflow/compiler/jit) set(TENSORFLOW_JIT_DIR ${TENSORFLOW_SRCS_DIR}/tensorflow/compiler/jit)
set(TENSORFLOW_GEN_DIR ${TENSORFLOW_SRCS_DIR}/bazel-out/${TENSORFLOW_GENFILE_DIR}/bin)
set(TENSORFLOW_GEN_DIR ${TENSORFLOW_SRCS_DIR}/bazel-out/${TENSORFLOW_GENFILE_DIR}/genfiles)
set(TENSORFLOW_EXTERNAL_DIR ${TENSORFLOW_SRCS_DIR}/bazel-tensorflow/external) set(TENSORFLOW_EXTERNAL_DIR ${TENSORFLOW_SRCS_DIR}/bazel-tensorflow/external)
set(THIRD_ABSL_DIR ${TENSORFLOW_EXTERNAL_DIR}/com_google_absl) set(THIRD_ABSL_DIR ${TENSORFLOW_EXTERNAL_DIR}/com_google_absl)
set(THIRD_PROTOBUF_DIR ${TENSORFLOW_EXTERNAL_DIR}/com_google_protobuf/src) set(THIRD_PROTOBUF_DIR ${TENSORFLOW_EXTERNAL_DIR}/com_google_protobuf/src)
...@@ -54,36 +53,68 @@ list(APPEND TENSORFLOW_XLA_INCLUDE_DIR ...@@ -54,36 +53,68 @@ list(APPEND TENSORFLOW_XLA_INCLUDE_DIR
${THIRD_SNAPPY_DIR} ${THIRD_SNAPPY_DIR}
${THIRD_RE2_DIR} ${THIRD_RE2_DIR}
) )
include_directories(${TENSORFLOW_XLA_INCLUDE_DIR})
list(APPEND TENSORFLOW_XLA_INCLUDE_INSTALL_DIR
"${TENSORFLOW_INSTALL_DIR}/include/tensorflow_inc"
"${TENSORFLOW_INSTALL_DIR}/include/tensorflow_gen"
"${TENSORFLOW_INSTALL_DIR}/include/absl"
"${TENSORFLOW_INSTALL_DIR}/include/protobuf"
"${TENSORFLOW_INSTALL_DIR}/include/boringssl"
"${TENSORFLOW_INSTALL_DIR}/include/snappy"
"${TENSORFLOW_INSTALL_DIR}/include/re2"
)
list(APPEND TENSORFLOW_XLA_LIBRARIES libtensorflow_framework.so.1) list(APPEND TENSORFLOW_XLA_LIBRARIES libtensorflow_framework.so.1)
list(APPEND TENSORFLOW_XLA_LIBRARIES libxla_core.so) list(APPEND TENSORFLOW_XLA_LIBRARIES libxla_core.so)
link_directories(${TENSORFLOW_INSTALL_DIR}/lib) link_directories(${TENSORFLOW_INSTALL_DIR}/lib)
if(NOT XRT_TF_URL)
set(XRT_TF_URL https://github.com/Oneflow-Inc/tensorflow/archive/1f_dep_v2.3.0r4.zip)
endif()
if (THIRD_PARTY) if (THIRD_PARTY)
ExternalProject_Add(${TENSORFLOW_PROJECT} ExternalProject_Add(${TENSORFLOW_PROJECT}
PREFIX ${TENSORFLOW_SOURCES_DIR} PREFIX ${TENSORFLOW_SOURCES_DIR}
GIT_REPOSITORY ${TENSORFLOW_GIT_URL} URL ${XRT_TF_URL}
GIT_TAG ${TENSORFLOW_GIT_TAG}
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
BUILD_COMMAND cd ${TENSORFLOW_SRCS_DIR} && BUILD_COMMAND cd ${TENSORFLOW_SRCS_DIR} &&
bazel build ${TENSORFLOW_BUILD_CMD} -j 20 //tensorflow/compiler/jit/xla_lib:libxla_core.so bazel build ${TENSORFLOW_BUILD_CMD} -j HOST_CPUS //tensorflow/compiler/jit/xla_lib:libxla_core.so
INSTALL_COMMAND "" INSTALL_COMMAND ""
) )
set(TENSORFLOW_XLA_FRAMEWORK_LIB ${TENSORFLOW_SRCS_DIR}/bazel-bin/tensorflow/libtensorflow_framework.so.1) set(TENSORFLOW_XLA_FRAMEWORK_LIB ${TENSORFLOW_SRCS_DIR}/bazel-bin/tensorflow/libtensorflow_framework.so.2)
set(TENSORFLOW_XLA_CORE_LIB ${TENSORFLOW_SRCS_DIR}/bazel-bin/tensorflow/compiler/jit/xla_lib/libxla_core.so) set(TENSORFLOW_XLA_CORE_LIB ${TENSORFLOW_SRCS_DIR}/bazel-bin/tensorflow/compiler/jit/xla_lib/libxla_core.so)
add_custom_target(tensorflow_create_library_dir add_custom_target(tensorflow_create_library_dir
COMMAND ${CMAKE_COMMAND} -E make_directory ${TENSORFLOW_INSTALL_DIR}/lib COMMAND ${CMAKE_COMMAND} -E make_directory ${TENSORFLOW_INSTALL_DIR}/lib
DEPENDS ${TENSORFLOW_PROJECT}) DEPENDS ${TENSORFLOW_PROJECT})
add_custom_target(tensorflow_copy_libs_to_destination add_custom_target(tensorflow_copy_libs_to_destination
COMMAND ${CMAKE_COMMAND} -E copy_if_different COMMAND ${CMAKE_COMMAND} -E copy_if_different
${TENSORFLOW_XLA_FRAMEWORK_LIB} ${TENSORFLOW_XLA_CORE_LIB} ${TENSORFLOW_INSTALL_DIR}/lib ${TENSORFLOW_XLA_FRAMEWORK_LIB} ${TENSORFLOW_XLA_CORE_LIB} ${TENSORFLOW_INSTALL_DIR}/lib
COMMAND ${CMAKE_COMMAND} -E create_symlink COMMAND ${CMAKE_COMMAND} -E create_symlink
${TENSORFLOW_INSTALL_DIR}/lib/libtensorflow_framework.so.1 ${TENSORFLOW_INSTALL_DIR}/lib/libtensorflow_framework.so.2
${TENSORFLOW_INSTALL_DIR}/lib/libtensorflow_framework.so ${TENSORFLOW_INSTALL_DIR}/lib/libtensorflow_framework.so
DEPENDS tensorflow_create_library_dir) DEPENDS tensorflow_create_library_dir)
add_custom_target(tensorflow_create_include_dir
COMMAND ${CMAKE_COMMAND} -E make_directory ${TENSORFLOW_INSTALL_DIR}/include
DEPENDS ${TENSORFLOW_PROJECT})
add_custom_target(tensorflow_symlink_headers
DEPENDS tensorflow_create_include_dir)
foreach(src_dst_pair IN ZIP_LISTS TENSORFLOW_XLA_INCLUDE_DIR TENSORFLOW_XLA_INCLUDE_INSTALL_DIR)
set(src ${src_dst_pair_0})
set(dst ${src_dst_pair_1})
add_custom_command(TARGET tensorflow_symlink_headers
COMMAND ${CMAKE_COMMAND} -E create_symlink
${src}
${dst}
)
endforeach()
endif(THIRD_PARTY) endif(THIRD_PARTY)
include_directories(${TENSORFLOW_XLA_INCLUDE_INSTALL_DIR})
endif(WITH_XLA) endif(WITH_XLA)
ARG from ARG from
ARG use_tuna_yum=1
ARG pip_args="-i https://pypi.tuna.tsinghua.edu.cn/simple"
FROM ${from} FROM ${from}
LABEL maintainer="OneFlow Maintainers" LABEL maintainer="OneFlow Maintainers"
...@@ -13,9 +15,11 @@ ENV LD_LIBRARY_PATH /usr/local/lib64:/usr/local/lib ...@@ -13,9 +15,11 @@ ENV LD_LIBRARY_PATH /usr/local/lib64:/usr/local/lib
ENV PKG_CONFIG_PATH /usr/local/lib/pkgconfig ENV PKG_CONFIG_PATH /usr/local/lib/pkgconfig
# use tuna mirror # use tuna mirror
COPY docker/package/manylinux/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo COPY docker/package/manylinux/CentOS-Base.repo /tmp/CentOS-Base.repo
RUN yum makecache RUN if [ "${use_tuna}" = "1" ]; then mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/ && yum makecache ; fi
# in 10.1, cuda yum repo will update cublas to 10.2 and breaks build
RUN yum-config-manager --disable cuda
ARG MANYLINUX_SHA=f5da004 ARG MANYLINUX_SHA=f5da004
RUN yum -y install unzip && curl -L -o manylinux.zip https://github.com/pypa/manylinux/archive/${MANYLINUX_SHA}.zip && unzip manylinux.zip -d tmp && cp -r tmp/*/docker/build_scripts /build_scripts && bash build_scripts/build.sh && rm -r build_scripts tmp manylinux.zip RUN yum -y install unzip && curl -L -o manylinux.zip https://github.com/pypa/manylinux/archive/${MANYLINUX_SHA}.zip && unzip manylinux.zip -d tmp && cp -r tmp/*/docker/build_scripts /build_scripts && bash build_scripts/build.sh && rm -r build_scripts tmp manylinux.zip
...@@ -25,10 +29,10 @@ ENV SSL_CERT_FILE=/opt/_internal/certs.pem ...@@ -25,10 +29,10 @@ ENV SSL_CERT_FILE=/opt/_internal/certs.pem
RUN yum-config-manager --add-repo https://yum.repos.intel.com/setup/intelproducts.repo && \ RUN yum-config-manager --add-repo https://yum.repos.intel.com/setup/intelproducts.repo && \
rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB && \ rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB && \
yum update -y && yum install -y epel-release && \ yum update -y && yum install -y epel-release && \
yum install -y intel-mkl-64bit-2020.0-088 nasm swig rdma-core-devel yum -y install centos-release-scl && \
yum install -y intel-mkl-64bit-2020.0-088 nasm swig rdma-core-devel devtoolset-7-gcc*
ENV TUNA_INDEX="-i https://pypi.tuna.tsinghua.edu.cn/simple" RUN /opt/python/cp35-cp35m/bin/pip install $pip_args -U cmake && ln -s /opt/_internal/cpython-3.5.9/bin/cmake /usr/bin/cmake
RUN /opt/python/cp35-cp35m/bin/pip install $TUNA_INDEX -U cmake && ln -s /opt/_internal/cpython-3.5.9/bin/cmake /usr/bin/cmake
# overwrite patchelf to fix "maximum size exceed" problem # overwrite patchelf to fix "maximum size exceed" problem
RUN mkdir -p /tmp && curl -L -o 0.11.zip https://github.com/NixOS/patchelf/archive/0.11.zip && unzip 0.11.zip && cd patchelf-0.11 && sed -i 's/32/64/g' src/patchelf.cc && ./bootstrap.sh && ./configure && make -j`nproc` && make install && cd .. && rm -rf patchelf-0.11 0.11.zip RUN mkdir -p /tmp && curl -L -o 0.11.zip https://github.com/NixOS/patchelf/archive/0.11.zip && unzip 0.11.zip && cd patchelf-0.11 && sed -i 's/32/64/g' src/patchelf.cc && ./bootstrap.sh && ./configure && make -j`nproc` && make install && cd .. && rm -rf patchelf-0.11 0.11.zip
...@@ -40,4 +44,10 @@ RUN /opt/python/cp35-cp35m/bin/pip install $TUNA_INDEX -r /tmp/dev-requirements. ...@@ -40,4 +44,10 @@ RUN /opt/python/cp35-cp35m/bin/pip install $TUNA_INDEX -r /tmp/dev-requirements.
&& /opt/python/cp38-cp38/bin/pip install $TUNA_INDEX -r /tmp/dev-requirements.txt --user \ && /opt/python/cp38-cp38/bin/pip install $TUNA_INDEX -r /tmp/dev-requirements.txt --user \
&& rm /tmp/dev-requirements.txt && rm /tmp/dev-requirements.txt
RUN curl -L https://github.com/bazelbuild/bazel/releases/download/3.4.1/bazel-3.4.1-linux-x86_64 -o /usr/local/bin/bazel \
&& chmod +x /usr/local/bin/bazel \
&& bazel
RUN echo "source scl_source enable devtoolset-7" >> ~/.bashrc
CMD ["/oneflow-src/docker/package/manylinux/build_wheel.sh"] CMD ["/oneflow-src/docker/package/manylinux/build_wheel.sh"]
...@@ -48,6 +48,14 @@ fi ...@@ -48,6 +48,14 @@ fi
cd $ONEFLOW_SRC_DIR cd $ONEFLOW_SRC_DIR
# TF requires py3 to build
export PATH=/opt/python/cp37-cp37m/bin:$PATH
python --version
gcc --version
# specify a mounted dir as bazel cache dir
export TEST_TMPDIR=$CACHE_DIR/bazel_cache
THIRD_PARTY_BUILD_DIR=$CACHE_DIR/build-third-party THIRD_PARTY_BUILD_DIR=$CACHE_DIR/build-third-party
THIRD_PARTY_INSTALL_DIR=$CACHE_DIR/build-third-party-install THIRD_PARTY_INSTALL_DIR=$CACHE_DIR/build-third-party-install
COMMON_CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release -DBUILD_RDMA=ON -DTHIRD_PARTY_DIR=$THIRD_PARTY_INSTALL_DIR" COMMON_CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release -DBUILD_RDMA=ON -DTHIRD_PARTY_DIR=$THIRD_PARTY_INSTALL_DIR"
...@@ -86,9 +94,10 @@ do ...@@ -86,9 +94,10 @@ do
rm -rf $ONEFLOW_BUILD_DIR/python_scripts/oneflow/*.so rm -rf $ONEFLOW_BUILD_DIR/python_scripts/oneflow/*.so
rm -rf $ONEFLOW_SRC_DIR/build/bdist.linux-x86_64 rm -rf $ONEFLOW_SRC_DIR/build/bdist.linux-x86_64
rm -rf $ONEFLOW_SRC_DIR/build/lib rm -rf $ONEFLOW_SRC_DIR/build/lib
cmake -DTHIRD_PARTY=OFF -DONEFLOW=ON\ cmake -DTHIRD_PARTY=OFF -DONEFLOW=ON \
-DCMAKE_EXPORT_COMPILE_COMMANDS=1 \
$COMMON_CMAKE_ARGS \ $COMMON_CMAKE_ARGS \
-DPython3_ROOT_DIR=$PY_ROOT \ -DPython3_EXECUTABLE=${PY_BIN} \
$EXTRA_ONEFLOW_CMAKE_ARGS \ $EXTRA_ONEFLOW_CMAKE_ARGS \
$ONEFLOW_SRC_DIR $ONEFLOW_SRC_DIR
cmake --build . -j `nproc` cmake --build . -j `nproc`
......
set -ex
ONEFLOW_SRC_DIR=${ONEFLOW_SRC_DIR:-${PWD}}
wheelhouse_dir=${ONEFLOW_SRC_DIR}/wheelhouse-xla
# TF requires py3 to build
PY_ROOT=/opt/python/cp37-cp37m
PY_BIN=${PY_ROOT}/bin
export PATH=$PY_BIN:$PATH
python --version
source scl_source enable devtoolset-7
cache_dir=$ONEFLOW_SRC_DIR/manylinux2014-build-cache-cuda-10.2-xla
cache_dir=$ONEFLOW_SRC_DIR/manylinux2014-build-cache-cuda-11.0-xla
export TEST_TMPDIR=$cache_dir/bazel_cache
gcc --version
bash docker/package/manylinux/build_wheel.sh \
--python3.6 \
--cache-dir $cache_dir \
--house-dir $wheelhouse_dir \
-DCMAKE_EXPORT_COMPILE_COMMANDS=1 \
-DWITH_XLA=ON
set -ex
docker run --rm -it \
-v `pwd`:`pwd` \
-w `pwd` oneflow:rel-manylinux2014-cuda-11.0 bash
set -ex set -ex
wheelhouse_dir=/oneflow-src/wheelhouse wheelhouse_dir=`pwd`/wheelhouse
package_name=oneflow package_name=oneflow
tuna_build_args=""
tuna_build_args="--build-arg use_tuna_yum=0 --build-arg pip_args="""
function release() { function release() {
set -ex set -ex
docker_tag=oneflow:rel-manylinux2014-cuda-$1 docker_tag=oneflow:rel-manylinux2014-cuda-$1
...@@ -12,22 +15,41 @@ function release() { ...@@ -12,22 +15,41 @@ function release() {
else else
cudnn_version=7 cudnn_version=7
fi fi
docker build --build-arg from=nvidia/cuda:$1-cudnn${cudnn_version}-devel-centos7 -f docker/package/manylinux/Dockerfile -t $docker_tag . docker build --build-arg from=nvidia/cuda:$1-cudnn${cudnn_version}-devel-centos7 \
docker run --rm -it -v `pwd`:/oneflow-src -w /oneflow-src $docker_tag \ ${tuna_build_args} \
/oneflow-src/docker/package/manylinux/build_wheel.sh --cache-dir /oneflow-src/manylinux2014-build-cache-cuda-$1 \ -f docker/package/manylinux/Dockerfile -t $docker_tag .
--house-dir $wheelhouse_dir \ docker run --rm -it -v `pwd`:`pwd` -w `pwd` $docker_tag \
docker/package/manylinux/build_wheel.sh --cache-dir `pwd`/manylinux2014-build-cache-cuda-$1 \
--house-dir ${wheelhouse_dir} \
--package-name ${package_name}_cu`echo $1 | tr -d .` --package-name ${package_name}_cu`echo $1 | tr -d .`
} }
function release_cpu() { function release_cpu() {
docker run --rm -it -v `pwd`:/oneflow-src -w /oneflow-src oneflow:rel-manylinux2014-cuda-10.2 \ docker run --rm -it -v `pwd`:`pwd` -w `pwd` oneflow:rel-manylinux2014-cuda-10.2 \
/oneflow-src/docker/package/manylinux/build_wheel.sh --cache-dir /oneflow-src/manylinux2014-build-cache-cpu \ docker/package/manylinux/build_wheel.sh --cache-dir `pwd`/manylinux2014-build-cache-cpu \
--house-dir $wheelhouse_dir \ --house-dir ${wheelhouse_dir} \
-DBUILD_CUDA=OFF \ -DBUILD_CUDA=OFF \
--package-name "${package_name}_cpu" --package-name "${package_name}_cpu"
} }
release_cpu function release_xla() {
set -ex
docker_tag=oneflow:rel-manylinux2014-cuda-$1
if [ "$1" == "11.0" ]; then
cudnn_version=8
else
cudnn_version=7
fi
docker build --build-arg from=nvidia/cuda:$1-cudnn${cudnn_version}-devel-centos7 \
${tuna_build_args} \
-f docker/package/manylinux/Dockerfile -t $docker_tag .
docker run --rm -it -v `pwd`:`pwd` -w `pwd` $docker_tag \
bash -l docker/package/manylinux/build_wheel.sh --cache-dir `pwd`/manylinux2014-build-cache-cuda-$1-xla \
--house-dir ${wheelhouse_dir} \
--package-name ${package_name}_cu`echo $1 | tr -d .`_xla \
-DWITH_XLA=ON
}
release 11.0 release 11.0
release 10.2 release 10.2
release 10.1 release 10.1
...@@ -35,3 +57,11 @@ release 10.0 ...@@ -35,3 +57,11 @@ release 10.0
release 9.2 release 9.2
release 9.1 release 9.1
release 9.0 release 9.0
release_cpu
release_xla 11.0
release_xla 10.2
release_xla 10.1
release_xla 10.0
# failed to build XLA with CUDA 9.X
...@@ -87,7 +87,9 @@ def compare_with_tensorflow_rmsprop( ...@@ -87,7 +87,9 @@ def compare_with_tensorflow_rmsprop(
gradients = tape.gradient(loss, var) gradients = tape.gradient(loss, var)
opt.apply_gradients(zip([gradients], [var])) opt.apply_gradients(zip([gradients], [var]))
assert np.allclose(x.flatten(), var.numpy().flatten(), rtol=1e-3, atol=1e-3,) assert np.allclose(x.flatten(), var.numpy().flatten(), rtol=1e-3, atol=1e-3,), (
x.flatten() - var.numpy().flatten()
)
def compare_with_tensorflow_adam( def compare_with_tensorflow_adam(
......
...@@ -13,4 +13,4 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -13,4 +13,4 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
""" """
__version__ = "0.1.10" __version__ = "0.1.11b1"
...@@ -38,7 +38,8 @@ XlaAllocator::XlaAllocator(const se::Platform *platform, DeviceBufferAllocator * ...@@ -38,7 +38,8 @@ XlaAllocator::XlaAllocator(const se::Platform *platform, DeviceBufferAllocator *
XlaAllocator::~XlaAllocator() {} XlaAllocator::~XlaAllocator() {}
xla::StatusOr<se::OwningDeviceMemory> XlaAllocator::Allocate(int device_ordinal, uint64 size, xla::StatusOr<se::OwningDeviceMemory> XlaAllocator::Allocate(int device_ordinal, uint64 size,
bool retry_on_failure) { bool retry_on_failure,
int64 /*memory_space*/) {
se::DeviceMemoryBase memory_base; se::DeviceMemoryBase memory_base;
if (allocate_index_ < populated_buffers_.size() if (allocate_index_ < populated_buffers_.size()
&& populated_buffers_[allocate_index_].populated) { && populated_buffers_[allocate_index_].populated) {
......
...@@ -16,6 +16,7 @@ limitations under the License. ...@@ -16,6 +16,7 @@ limitations under the License.
#ifndef ONEFLOW_XRT_XLA_XLA_ALLOCATOR_H_ #ifndef ONEFLOW_XRT_XLA_XLA_ALLOCATOR_H_
#define ONEFLOW_XRT_XLA_XLA_ALLOCATOR_H_ #define ONEFLOW_XRT_XLA_XLA_ALLOCATOR_H_
#include "oneflow/core/common/util.h"
#include "oneflow/xrt/xla/memory/device_buffer_allocator.h" #include "oneflow/xrt/xla/memory/device_buffer_allocator.h"
#include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/statusor.h"
...@@ -28,14 +29,16 @@ namespace mola { ...@@ -28,14 +29,16 @@ namespace mola {
namespace se = tensorflow::se; namespace se = tensorflow::se;
using uint64 = tensorflow::uint64; using uint64 = tensorflow::uint64;
using int64 = tensorflow::int64;
class XlaAllocator : public se::DeviceMemoryAllocator { class XlaAllocator : public se::DeviceMemoryAllocator {
public: public:
explicit XlaAllocator(const se::Platform *platform, DeviceBufferAllocator *allocator); explicit XlaAllocator(const se::Platform *platform, DeviceBufferAllocator *allocator);
virtual ~XlaAllocator(); virtual ~XlaAllocator();
using se::DeviceMemoryAllocator::Allocate;
xla::StatusOr<se::OwningDeviceMemory> Allocate(int device_ordinal, uint64 size, xla::StatusOr<se::OwningDeviceMemory> Allocate(int device_ordinal, uint64 size,
bool retry_on_failure) override; bool retry_on_failure,
int64 /*memory_space*/) override;
tensorflow::Status Deallocate(int device_ordinal, se::DeviceMemoryBase mem) override; tensorflow::Status Deallocate(int device_ordinal, se::DeviceMemoryBase mem) override;
bool AllowsAsynchronousDeallocation() const override { return true; } bool AllowsAsynchronousDeallocation() const override { return true; }
...@@ -47,6 +50,10 @@ class XlaAllocator : public se::DeviceMemoryAllocator { ...@@ -47,6 +50,10 @@ class XlaAllocator : public se::DeviceMemoryAllocator {
void PopulateDeviceMemory(const std::vector<se::DeviceMemoryBase> &device_buffers, void PopulateDeviceMemory(const std::vector<se::DeviceMemoryBase> &device_buffers,
const std::vector<int64_t> &allocation_indices); const std::vector<int64_t> &allocation_indices);
stream_executor::port::StatusOr<stream_executor::Stream *> GetStream(
int device_ordinal) override {
UNIMPLEMENTED();
};
private: private:
DeviceBufferAllocator *allocator_; DeviceBufferAllocator *allocator_;
......
...@@ -137,10 +137,11 @@ std::shared_ptr<Executable> XlaGraphCompiler::BuildExecutable( ...@@ -137,10 +137,11 @@ std::shared_ptr<Executable> XlaGraphCompiler::BuildExecutable(
xla::ExecutableBuildOptions build_options; xla::ExecutableBuildOptions build_options;
build_options.set_device_ordinal(this->device_ordinal_); build_options.set_device_ordinal(this->device_ordinal_);
build_options.set_result_layout(xla_output_shape); build_options.set_result_layout(xla_output_shape);
MOLA_CHECK_AND_ASSIGN(auto executable, MOLA_CHECK_AND_ASSIGN(auto executables,
client->Compile(computation, argument_layouts, build_options)); client->Compile(computation, argument_layouts, build_options));
return std::make_shared<XlaExecutable>(builder_->name(), this->device_, xla_input_shapes, xla_output_shape, CHECK(executables.size() == 1);
std::move(executable)); return std::make_shared<XlaExecutable>(builder_->name(), this->device_, xla_input_shapes,
xla_output_shape, std::move(executables.at(0)));
} }
void XlaGraphCompiler::BuildEntryParameters(const std::vector<Parameter> &entry_params, void XlaGraphCompiler::BuildEntryParameters(const std::vector<Parameter> &entry_params,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册