upgrade XRT XLA to TF 2.3.0 (#3531)

* compile tf 2.3.0 with gcc 7.3 * fix oneflow eigen * minor fix * fix include * update protobuf if xla is on * update path of tf proto generated cpp files * fix path in script * add .clangd to git ignore * update xla ifs * update scripts * update path in script for clangd * add gitignore * add cmake flag XRT_TF_URL * rm comment * check in changes * bash tricks to enable gcc 7.3 * use arg to control tuna * bumpversion * fix build wheel * use real path * add dir for cpu * fix unwanted yum update cublas * uncomment all * rm suffix of wheelhouse_dir * add log info Co-authored-by: N tsai <caishenghang@1f-dev.kbaeegfb1x0ubnoznzequyxzve.bx.internal.cloudapp.net> Co-authored-by: N tsai <caishenghang@oneflow.org> Former-commit-id: da12e8db

upgrade XRT XLA to TF 2.3.0 (#3531)
* compile tf 2.3.0 with gcc 7.3 * fix oneflow eigen * minor fix * fix include * update protobuf if xla is on * update path of tf proto generated cpp files * fix path in script * add .clangd to git ignore * update xla ifs * update scripts * update path in script for clangd * add gitignore * add cmake flag XRT_TF_URL * rm comment * check in changes * bash tricks to enable gcc 7.3 * use arg to control tuna * bumpversion * fix build wheel * use real path * add dir for cpu * fix unwanted yum update cublas * uncomment all * rm suffix of wheelhouse_dir * add log info Co-authored-by: N tsai <caishenghang@1f-dev.kbaeegfb1x0ubnoznzequyxzve.bx.internal.cloudapp.net> Co-authored-by: N tsai <caishenghang@oneflow.org> Former-commit-id: da12e8db
73603a0a · Shenghang Tsai · GitHub · 7f8aae21 · 73603a0a · 73603a0a
15 changed file
--- a/.gitignore
+++ b/.gitignore
@@ -21,3 +21,5 @@ wheelhouse*
 /oneflow/python/__export_symbols__.py
 /oneflow/python/compatibility.py
 /oneflow/python/framework/sysconfig_gen.py
+.clangd
+compile_commands.json
--- a/cmake/third_party.cmake
+++ b/cmake/third_party.cmake
@@ -214,6 +214,7 @@ include_directories(${ONEFLOW_INCLUDE_SRC_DIRS})
 if(WITH_XLA)
  list(APPEND oneflow_third_party_dependencies tensorflow_copy_libs_to_destination)
+  list(APPEND oneflow_third_party_dependencies tensorflow_symlink_headers)
  list(APPEND oneflow_third_party_libs ${TENSORFLOW_XLA_LIBRARIES})
 endif()

--- a/cmake/third_party/eigen.cmake
+++ b/cmake/third_party/eigen.cmake
@@ -4,8 +4,8 @@ set(EIGEN_INCLUDE_DIR ${THIRD_PARTY_DIR}/eigen/include/eigen3)
 set(EIGEN_INSTALL_DIR ${THIRD_PARTY_DIR}/eigen)
 if(WITH_XLA)
-  #set(EIGEN_URL "https://storage.googleapis.com/mirror.tensorflow.org/bitbucket.org/eigen/eigen/get/8071cda5714d.tar.gz")
+  #set(EIGEN_URL "https://storage.googleapis.com/mirror.tensorflow.org/gitlab.com/libeigen/eigen/-/archive/386d809bde475c65b7940f290efe80e6a05878c4/eigen-386d809bde475c65b7940f290efe80e6a05878c4.tar.gz")
-  set(EIGEN_URL "https://bitbucket.org/eigen/eigen/get/8071cda5714d.tar.gz")
+  set(EIGEN_URL "https://gitlab.com/libeigen/eigen/-/archive/386d809bde475c65b7940f290efe80e6a05878c4/eigen-386d809bde475c65b7940f290efe80e6a05878c4.tar.gz")
 else()
  set(EIGEN_URL ${THIRD_PARTY_SUBMODULE_DIR}/eigen/src/eigen)
 endif()

--- a/cmake/third_party/protobuf.cmake
+++ b/cmake/third_party/protobuf.cmake
@@ -6,7 +6,7 @@ set(PROTOBUF_BINARY_DIR ${THIRD_PARTY_DIR}/protobuf/bin)
 set(PROTOBUF_SRC_DIR ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src)
 if(WITH_XLA)
-  set(PROTOBUF_URL "https://storage.googleapis.com/mirror.tensorflow.org/github.com/protocolbuffers/protobuf/archive/310ba5ee72661c081129eb878c1bbcec936b20f0.tar.gz")
+  set(PROTOBUF_URL "https://github.com/protocolbuffers/protobuf/archive/v3.9.2.zip")
 else()
  set(PROTOBUF_URL ${THIRD_PARTY_SUBMODULE_DIR}/protobuf/src/protobuf)
 endif()

--- a/cmake/third_party/tensorflow.cmake
+++ b/cmake/third_party/tensorflow.cmake
@@ -36,8 +36,7 @@ set(TENSORFLOW_INSTALL_DIR ${THIRD_PARTY_DIR}/tensorflow)
 set(PATCHES_DIR  ${PROJECT_SOURCE_DIR}/oneflow/xrt/patches)
 set(TENSORFLOW_JIT_DIR ${TENSORFLOW_SRCS_DIR}/tensorflow/compiler/jit)
+set(TENSORFLOW_GEN_DIR ${TENSORFLOW_SRCS_DIR}/bazel-out/${TENSORFLOW_GENFILE_DIR}/bin)
-set(TENSORFLOW_GEN_DIR ${TENSORFLOW_SRCS_DIR}/bazel-out/${TENSORFLOW_GENFILE_DIR}/genfiles)
 set(TENSORFLOW_EXTERNAL_DIR ${TENSORFLOW_SRCS_DIR}/bazel-tensorflow/external)
 set(THIRD_ABSL_DIR ${TENSORFLOW_EXTERNAL_DIR}/com_google_absl)
 set(THIRD_PROTOBUF_DIR ${TENSORFLOW_EXTERNAL_DIR}/com_google_protobuf/src)
@@ -54,36 +53,68 @@ list(APPEND TENSORFLOW_XLA_INCLUDE_DIR
  ${THIRD_SNAPPY_DIR}
  ${THIRD_RE2_DIR}
 )
-include_directories(${TENSORFLOW_XLA_INCLUDE_DIR})
+list(APPEND TENSORFLOW_XLA_INCLUDE_INSTALL_DIR
+  "${TENSORFLOW_INSTALL_DIR}/include/tensorflow_inc"
+  "${TENSORFLOW_INSTALL_DIR}/include/tensorflow_gen"
+  "${TENSORFLOW_INSTALL_DIR}/include/absl"
+  "${TENSORFLOW_INSTALL_DIR}/include/protobuf"
+  "${TENSORFLOW_INSTALL_DIR}/include/boringssl"
+  "${TENSORFLOW_INSTALL_DIR}/include/snappy"
+  "${TENSORFLOW_INSTALL_DIR}/include/re2"
+)
 list(APPEND TENSORFLOW_XLA_LIBRARIES libtensorflow_framework.so.1)
 list(APPEND TENSORFLOW_XLA_LIBRARIES libxla_core.so)
 link_directories(${TENSORFLOW_INSTALL_DIR}/lib)
+if(NOT XRT_TF_URL)
+  set(XRT_TF_URL https://github.com/Oneflow-Inc/tensorflow/archive/1f_dep_v2.3.0r4.zip)
+endif()
 if (THIRD_PARTY)
  ExternalProject_Add(${TENSORFLOW_PROJECT}
    PREFIX ${TENSORFLOW_SOURCES_DIR}
-    GIT_REPOSITORY ${TENSORFLOW_GIT_URL}
+    URL ${XRT_TF_URL}
-    GIT_TAG ${TENSORFLOW_GIT_TAG}
    CONFIGURE_COMMAND ""
    BUILD_COMMAND cd ${TENSORFLOW_SRCS_DIR} &&
-                  bazel build ${TENSORFLOW_BUILD_CMD} -j 20 //tensorflow/compiler/jit/xla_lib:libxla_core.so
+                  bazel build ${TENSORFLOW_BUILD_CMD} -j HOST_CPUS //tensorflow/compiler/jit/xla_lib:libxla_core.so
    INSTALL_COMMAND ""
  )
-set(TENSORFLOW_XLA_FRAMEWORK_LIB ${TENSORFLOW_SRCS_DIR}/bazel-bin/tensorflow/libtensorflow_framework.so.1)
+  set(TENSORFLOW_XLA_FRAMEWORK_LIB ${TENSORFLOW_SRCS_DIR}/bazel-bin/tensorflow/libtensorflow_framework.so.2)
-set(TENSORFLOW_XLA_CORE_LIB ${TENSORFLOW_SRCS_DIR}/bazel-bin/tensorflow/compiler/jit/xla_lib/libxla_core.so)
+  set(TENSORFLOW_XLA_CORE_LIB ${TENSORFLOW_SRCS_DIR}/bazel-bin/tensorflow/compiler/jit/xla_lib/libxla_core.so)
-add_custom_target(tensorflow_create_library_dir
+  add_custom_target(tensorflow_create_library_dir
    COMMAND ${CMAKE_COMMAND} -E make_directory ${TENSORFLOW_INSTALL_DIR}/lib
    DEPENDS ${TENSORFLOW_PROJECT})
-add_custom_target(tensorflow_copy_libs_to_destination
+  add_custom_target(tensorflow_copy_libs_to_destination
    COMMAND ${CMAKE_COMMAND} -E copy_if_different
        ${TENSORFLOW_XLA_FRAMEWORK_LIB} ${TENSORFLOW_XLA_CORE_LIB} ${TENSORFLOW_INSTALL_DIR}/lib
    COMMAND ${CMAKE_COMMAND} -E create_symlink
-      ${TENSORFLOW_INSTALL_DIR}/lib/libtensorflow_framework.so.1
+        ${TENSORFLOW_INSTALL_DIR}/lib/libtensorflow_framework.so.2
        ${TENSORFLOW_INSTALL_DIR}/lib/libtensorflow_framework.so
    DEPENDS tensorflow_create_library_dir)
+  add_custom_target(tensorflow_create_include_dir
+    COMMAND ${CMAKE_COMMAND} -E make_directory ${TENSORFLOW_INSTALL_DIR}/include
+    DEPENDS ${TENSORFLOW_PROJECT})
+  add_custom_target(tensorflow_symlink_headers
+    DEPENDS tensorflow_create_include_dir)
+  foreach(src_dst_pair IN ZIP_LISTS TENSORFLOW_XLA_INCLUDE_DIR TENSORFLOW_XLA_INCLUDE_INSTALL_DIR)
+    set(src ${src_dst_pair_0})
+    set(dst ${src_dst_pair_1})
+    add_custom_command(TARGET tensorflow_symlink_headers
+      COMMAND ${CMAKE_COMMAND} -E create_symlink
+        ${src}
+        ${dst}
+    )
+  endforeach()
 endif(THIRD_PARTY)
+include_directories(${TENSORFLOW_XLA_INCLUDE_INSTALL_DIR})
 endif(WITH_XLA)
--- a/docker/package/manylinux/Dockerfile
+++ b/docker/package/manylinux/Dockerfile
 ARG from
+ARG use_tuna_yum=1
+ARG pip_args="-i https://pypi.tuna.tsinghua.edu.cn/simple"
 FROM ${from}
 LABEL maintainer="OneFlow Maintainers"
@@ -13,9 +15,11 @@ ENV LD_LIBRARY_PATH /usr/local/lib64:/usr/local/lib
 ENV PKG_CONFIG_PATH /usr/local/lib/pkgconfig
 # use tuna mirror
-COPY docker/package/manylinux/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo
+COPY docker/package/manylinux/CentOS-Base.repo /tmp/CentOS-Base.repo
-RUN yum makecache
+RUN if [ "${use_tuna}" = "1" ]; then mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/ && yum makecache ; fi
+# in 10.1, cuda yum repo will update cublas to 10.2 and breaks build
+RUN yum-config-manager --disable cuda
 ARG MANYLINUX_SHA=f5da004
 RUN yum -y install unzip && curl -L -o manylinux.zip https://github.com/pypa/manylinux/archive/${MANYLINUX_SHA}.zip && unzip manylinux.zip -d tmp && cp -r tmp/*/docker/build_scripts /build_scripts && bash build_scripts/build.sh && rm -r build_scripts tmp manylinux.zip
@@ -25,10 +29,10 @@ ENV SSL_CERT_FILE=/opt/_internal/certs.pem
 RUN yum-config-manager --add-repo https://yum.repos.intel.com/setup/intelproducts.repo && \
    rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB && \
    yum update -y && yum install -y epel-release && \
-    yum install -y intel-mkl-64bit-2020.0-088 nasm swig rdma-core-devel
+    yum -y install centos-release-scl && \
+    yum install -y intel-mkl-64bit-2020.0-088 nasm swig rdma-core-devel devtoolset-7-gcc*
-ENV TUNA_INDEX="-i https://pypi.tuna.tsinghua.edu.cn/simple"
+RUN /opt/python/cp35-cp35m/bin/pip install $pip_args -U cmake && ln -s /opt/_internal/cpython-3.5.9/bin/cmake /usr/bin/cmake
-RUN /opt/python/cp35-cp35m/bin/pip install $TUNA_INDEX -U cmake && ln -s /opt/_internal/cpython-3.5.9/bin/cmake /usr/bin/cmake
 # overwrite patchelf to fix "maximum size exceed" problem
 RUN mkdir -p /tmp && curl -L -o 0.11.zip https://github.com/NixOS/patchelf/archive/0.11.zip && unzip 0.11.zip && cd patchelf-0.11 && sed -i 's/32/64/g' src/patchelf.cc && ./bootstrap.sh && ./configure && make -j`nproc` && make install && cd .. && rm -rf patchelf-0.11 0.11.zip
@@ -40,4 +44,10 @@ RUN /opt/python/cp35-cp35m/bin/pip install $TUNA_INDEX -r /tmp/dev-requirements.
    && /opt/python/cp38-cp38/bin/pip install $TUNA_INDEX -r /tmp/dev-requirements.txt --user \
    && rm /tmp/dev-requirements.txt
+RUN curl -L https://github.com/bazelbuild/bazel/releases/download/3.4.1/bazel-3.4.1-linux-x86_64 -o /usr/local/bin/bazel \
+    && chmod +x /usr/local/bin/bazel \
+    && bazel
+RUN echo "source scl_source enable devtoolset-7" >> ~/.bashrc
 CMD ["/oneflow-src/docker/package/manylinux/build_wheel.sh"]
--- a/docker/package/manylinux/build_wheel.sh
+++ b/docker/package/manylinux/build_wheel.sh
@@ -48,6 +48,14 @@ fi
 cd $ONEFLOW_SRC_DIR
+# TF requires py3 to build
+export PATH=/opt/python/cp37-cp37m/bin:$PATH
+python --version
+gcc --version
+# specify a mounted dir as bazel cache dir
+export TEST_TMPDIR=$CACHE_DIR/bazel_cache
 THIRD_PARTY_BUILD_DIR=$CACHE_DIR/build-third-party
 THIRD_PARTY_INSTALL_DIR=$CACHE_DIR/build-third-party-install
 COMMON_CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release -DBUILD_RDMA=ON -DTHIRD_PARTY_DIR=$THIRD_PARTY_INSTALL_DIR"
@@ -86,9 +94,10 @@ do
    rm -rf $ONEFLOW_BUILD_DIR/python_scripts/oneflow/*.so
    rm -rf $ONEFLOW_SRC_DIR/build/bdist.linux-x86_64
    rm -rf $ONEFLOW_SRC_DIR/build/lib
-    cmake -DTHIRD_PARTY=OFF -DONEFLOW=ON\
+    cmake -DTHIRD_PARTY=OFF -DONEFLOW=ON \
+        -DCMAKE_EXPORT_COMPILE_COMMANDS=1 \
        $COMMON_CMAKE_ARGS \
-        -DPython3_ROOT_DIR=$PY_ROOT \
+        -DPython3_EXECUTABLE=${PY_BIN} \
        $EXTRA_ONEFLOW_CMAKE_ARGS \
        $ONEFLOW_SRC_DIR
    cmake --build . -j `nproc`

--- a/docker/package/manylinux/build_xla.sh
+++ b/docker/package/manylinux/build_xla.sh
+set -ex
+ONEFLOW_SRC_DIR=${ONEFLOW_SRC_DIR:-${PWD}}
+wheelhouse_dir=${ONEFLOW_SRC_DIR}/wheelhouse-xla
+# TF requires py3 to build
+PY_ROOT=/opt/python/cp37-cp37m
+PY_BIN=${PY_ROOT}/bin
+export PATH=$PY_BIN:$PATH
+python --version
+source scl_source enable devtoolset-7
+cache_dir=$ONEFLOW_SRC_DIR/manylinux2014-build-cache-cuda-10.2-xla
+cache_dir=$ONEFLOW_SRC_DIR/manylinux2014-build-cache-cuda-11.0-xla
+export TEST_TMPDIR=$cache_dir/bazel_cache
+gcc --version
+bash docker/package/manylinux/build_wheel.sh \
+    --python3.6 \
+    --cache-dir $cache_dir \
+    --house-dir $wheelhouse_dir \
+    -DCMAKE_EXPORT_COMPILE_COMMANDS=1 \
+    -DWITH_XLA=ON
--- a/docker/package/manylinux/launch.sh
+++ b/docker/package/manylinux/launch.sh
+set -ex
+docker run --rm -it \
+    -v `pwd`:`pwd` \
+    -w `pwd` oneflow:rel-manylinux2014-cuda-11.0 bash
--- a/docker/package/manylinux/make_release.sh
+++ b/docker/package/manylinux/make_release.sh
 set -ex
-wheelhouse_dir=/oneflow-src/wheelhouse
+wheelhouse_dir=`pwd`/wheelhouse
 package_name=oneflow
+tuna_build_args=""
+tuna_build_args="--build-arg use_tuna_yum=0 --build-arg pip_args="""
 function release() {
    set -ex
    docker_tag=oneflow:rel-manylinux2014-cuda-$1
@@ -12,22 +15,41 @@ function release() {
    else
        cudnn_version=7
    fi
-    docker build --build-arg from=nvidia/cuda:$1-cudnn${cudnn_version}-devel-centos7 -f docker/package/manylinux/Dockerfile -t $docker_tag .
+    docker build --build-arg from=nvidia/cuda:$1-cudnn${cudnn_version}-devel-centos7 \
-    docker run --rm -it -v `pwd`:/oneflow-src -w /oneflow-src $docker_tag \
+        ${tuna_build_args} \
-        /oneflow-src/docker/package/manylinux/build_wheel.sh --cache-dir /oneflow-src/manylinux2014-build-cache-cuda-$1 \
+        -f docker/package/manylinux/Dockerfile -t $docker_tag .
-        --house-dir $wheelhouse_dir \
+    docker run --rm -it -v `pwd`:`pwd` -w `pwd` $docker_tag \
+        docker/package/manylinux/build_wheel.sh --cache-dir `pwd`/manylinux2014-build-cache-cuda-$1 \
+        --house-dir ${wheelhouse_dir} \
        --package-name ${package_name}_cu`echo $1 | tr -d .`
 }
 function release_cpu() {
-    docker run --rm -it -v `pwd`:/oneflow-src -w /oneflow-src oneflow:rel-manylinux2014-cuda-10.2 \
+    docker run --rm -it -v `pwd`:`pwd` -w `pwd` oneflow:rel-manylinux2014-cuda-10.2 \
-        /oneflow-src/docker/package/manylinux/build_wheel.sh --cache-dir /oneflow-src/manylinux2014-build-cache-cpu \
+        docker/package/manylinux/build_wheel.sh --cache-dir `pwd`/manylinux2014-build-cache-cpu \
-        --house-dir $wheelhouse_dir \
+        --house-dir ${wheelhouse_dir} \
        -DBUILD_CUDA=OFF \
        --package-name "${package_name}_cpu"
 }
-release_cpu
+function release_xla() {
+    set -ex
+    docker_tag=oneflow:rel-manylinux2014-cuda-$1
+    if [ "$1" == "11.0" ]; then
+        cudnn_version=8
+    else
+        cudnn_version=7
+    fi
+    docker build --build-arg from=nvidia/cuda:$1-cudnn${cudnn_version}-devel-centos7 \
+        ${tuna_build_args} \
+        -f docker/package/manylinux/Dockerfile -t $docker_tag .
+    docker run --rm -it -v `pwd`:`pwd` -w `pwd` $docker_tag \
+        bash -l docker/package/manylinux/build_wheel.sh --cache-dir `pwd`/manylinux2014-build-cache-cuda-$1-xla \
+        --house-dir ${wheelhouse_dir} \
+        --package-name ${package_name}_cu`echo $1 | tr -d .`_xla \
+        -DWITH_XLA=ON
+}
 release 11.0
 release 10.2
 release 10.1
@@ -35,3 +57,11 @@ release 10.0
 release 9.2
 release 9.1
 release 9.0
+release_cpu
+release_xla 11.0
+release_xla 10.2
+release_xla 10.1
+release_xla 10.0
+# failed to build XLA with CUDA 9.X
--- a/oneflow/python/test/ops/test_optimizers.py
+++ b/oneflow/python/test/ops/test_optimizers.py
@@ -87,7 +87,9 @@ def compare_with_tensorflow_rmsprop(
        gradients = tape.gradient(loss, var)
        opt.apply_gradients(zip([gradients], [var]))
-    assert np.allclose(x.flatten(), var.numpy().flatten(), rtol=1e-3, atol=1e-3,)
+    assert np.allclose(x.flatten(), var.numpy().flatten(), rtol=1e-3, atol=1e-3,), (
+        x.flatten() - var.numpy().flatten()
+    )
 def compare_with_tensorflow_adam(

--- a/oneflow/python/version.py
+++ b/oneflow/python/version.py
@@ -13,4 +13,4 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-__version__ = "0.1.10"
+__version__ = "0.1.11b1"
--- a/oneflow/xrt/xla/xla_allocator.cpp
+++ b/oneflow/xrt/xla/xla_allocator.cpp
@@ -38,7 +38,8 @@ XlaAllocator::XlaAllocator(const se::Platform *platform, DeviceBufferAllocator *
 XlaAllocator::~XlaAllocator() {}
 xla::StatusOr<se::OwningDeviceMemory> XlaAllocator::Allocate(int device_ordinal, uint64 size,
-                                                             bool retry_on_failure) {
+                                                             bool retry_on_failure,
+                                                             int64 /*memory_space*/) {
  se::DeviceMemoryBase memory_base;
  if (allocate_index_ < populated_buffers_.size()
      && populated_buffers_[allocate_index_].populated) {

--- a/oneflow/xrt/xla/xla_allocator.h
+++ b/oneflow/xrt/xla/xla_allocator.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef ONEFLOW_XRT_XLA_XLA_ALLOCATOR_H_
 #define ONEFLOW_XRT_XLA_XLA_ALLOCATOR_H_
+#include "oneflow/core/common/util.h"
 #include "oneflow/xrt/xla/memory/device_buffer_allocator.h"
 #include "tensorflow/compiler/xla/statusor.h"
@@ -28,14 +29,16 @@ namespace mola {
 namespace se = tensorflow::se;
 using uint64 = tensorflow::uint64;
+using int64 = tensorflow::int64;
 class XlaAllocator : public se::DeviceMemoryAllocator {
 public:
  explicit XlaAllocator(const se::Platform *platform, DeviceBufferAllocator *allocator);
  virtual ~XlaAllocator();
+  using se::DeviceMemoryAllocator::Allocate;
  xla::StatusOr<se::OwningDeviceMemory> Allocate(int device_ordinal, uint64 size,
-                                                 bool retry_on_failure) override;
+                                                 bool retry_on_failure,
+                                                 int64 /*memory_space*/) override;
  tensorflow::Status Deallocate(int device_ordinal, se::DeviceMemoryBase mem) override;
  bool AllowsAsynchronousDeallocation() const override { return true; }
@@ -47,6 +50,10 @@ class XlaAllocator : public se::DeviceMemoryAllocator {
  void PopulateDeviceMemory(const std::vector<se::DeviceMemoryBase> &device_buffers,
                            const std::vector<int64_t> &allocation_indices);
+  stream_executor::port::StatusOr<stream_executor::Stream *> GetStream(
+      int device_ordinal) override {
+    UNIMPLEMENTED();
+  };
 private:
  DeviceBufferAllocator *allocator_;

--- a/oneflow/xrt/xla/xla_graph_compiler.cpp
+++ b/oneflow/xrt/xla/xla_graph_compiler.cpp
@@ -137,10 +137,11 @@ std::shared_ptr<Executable> XlaGraphCompiler::BuildExecutable(
  xla::ExecutableBuildOptions build_options;
  build_options.set_device_ordinal(this->device_ordinal_);
  build_options.set_result_layout(xla_output_shape);
-  MOLA_CHECK_AND_ASSIGN(auto executable,
+  MOLA_CHECK_AND_ASSIGN(auto executables,
                        client->Compile(computation, argument_layouts, build_options));
-  return std::make_shared<XlaExecutable>(builder_->name(), this->device_, xla_input_shapes, xla_output_shape,
+  CHECK(executables.size() == 1);
-                                         std::move(executable));
+  return std::make_shared<XlaExecutable>(builder_->name(), this->device_, xla_input_shapes,
+                                         xla_output_shape, std::move(executables.at(0)));
 }
 void XlaGraphCompiler::BuildEntryParameters(const std::vector<Parameter> &entry_params,