未验证 提交 cbfd43e4 编写于 作者: R risemeup1 提交者: GitHub

apply gcc12 to gpups (#52960)

* apply gcc12 to gpups

* apply gcc12 to gpups

* apply gcc12 to gpups

* apply gcc12 to gpups

* apply gcc12 to gpups

* apply gcc12 to gpups

* apply gcc12 to gpips

* apply gcc12 to gpups

* apply gcc12 to gpups

* test

* test

* apply gcc12 to gpups

* apply_gcc12_to_gpups

* fix compiler bug

* fix compiler bug

* test

* fix dangling-pointer compiler

* fix dangling-pointer compiler

* fix dangling-pointer compiler

* apply_gcc12_to_gpups

* apply gcc12 to gpups

* Update cuda_streams_py.cc
上级 328195d7
......@@ -48,7 +48,7 @@ PD_DECLARE_KERNEL(sum_grad, GPU, ALL_LAYOUT);
TEST(Benchmark, EagerScaleCUDA) {
eager_test::InitEnv(paddle::platform::CUDAPlace());
for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) {
for (const std::string mode : {"Accuracy", "WarmUp", "Performance"}) {
paddle::framework::DDim ddim = phi::make_ddim({2, 4, 4, 4});
paddle::Tensor tensor = CreateTensorWithValue(ddim,
paddle::platform::CUDAPlace(),
......@@ -89,7 +89,7 @@ TEST(Benchmark, EagerMatmulCUDA) {
paddle::platform::CUDAPlace place;
eager_test::InitEnv(place);
for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) {
for (const std::string mode : {"Accuracy", "WarmUp", "Performance"}) {
paddle::framework::DDim ddimX = phi::make_ddim({2, 2});
paddle::Tensor X = CreateTensorWithValue(ddimX,
paddle::platform::CUDAPlace(),
......@@ -143,7 +143,7 @@ TEST(Benchmark, EagerIntermediateMatmulCUDA) {
tracer->SetExpectedPlace(place);
paddle::imperative::SetCurrentTracer(tracer);
for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) {
for (const std::string mode : {"Accuracy", "WarmUp", "Performance"}) {
paddle::framework::DDim ddimX = phi::make_ddim({2, 2});
paddle::Tensor X = CreateTensorWithValue(ddimX,
paddle::platform::CUDAPlace(),
......@@ -197,7 +197,7 @@ TEST(Benchmark, EagerIntermediateMLPCUDA) {
tracer->SetExpectedPlace(place);
paddle::imperative::SetCurrentTracer(tracer);
for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) {
for (const std::string mode : {"Accuracy", "WarmUp", "Performance"}) {
paddle::framework::DDim ddimX = phi::make_ddim({MLP_M, MLP_N});
paddle::Tensor X = CreateTensorWithValue(ddimX,
paddle::platform::CUDAPlace(),
......
......@@ -53,7 +53,7 @@ TEST(Benchmark, FluidScaleCUDA) {
platform::CUDAPlace place;
eager_test::InitEnv(place);
for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) {
for (const std::string mode : {"Accuracy", "WarmUp", "Performance"}) {
std::shared_ptr<imperative::VarBase> X(new imperative::VarBase(true, "X"));
X->SetOverridedStopGradient(false);
......@@ -108,7 +108,7 @@ TEST(Benchmark, FluidMatmulCUDA) {
platform::CUDAPlace place;
eager_test::InitEnv(place);
for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) {
for (const std::string mode : {"Accuracy", "WarmUp", "Performance"}) {
std::shared_ptr<imperative::VarBase> X(new imperative::VarBase(true, "X"));
X->SetOverridedStopGradient(false);
std::shared_ptr<imperative::VarBase> Y(new imperative::VarBase(true, "Y"));
......@@ -176,7 +176,7 @@ TEST(Benchmark, FluidMLPCUDA) {
platform::CUDAPlace place;
eager_test::InitEnv(place);
for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) {
for (const std::string mode : {"Accuracy", "WarmUp", "Performance"}) {
paddle::platform::DeviceContextPool& pool =
paddle::platform::DeviceContextPool::Instance();
auto* dev_ctx = dynamic_cast<phi::GPUContext*>(pool.Get(place));
......
......@@ -760,6 +760,10 @@ struct FeaturePushValue {
int mf_dim;
float mf_g[0];
__device__ __forceinline__ FeaturePushValue() = default;
__device__ __forceinline__ FeaturePushValue(const FeaturePushValue&) =
default;
__device__ __forceinline__ FeaturePushValue
operator+(const FeaturePushValue& a) const {
FeaturePushValue out;
......
......@@ -257,16 +257,16 @@ void BindCudaStream(py::module *m_ptr) {
"Priority should be 1(high) or 2(normal) "));
}
auto stream_flag = phi::CUDAStream::StreamFlag::kStreamNonBlocking;
if (place == nullptr) {
int curr_device_id = platform::GetCurrentDeviceId();
auto place_tmp = platform::CUDAPlace(curr_device_id);
place = &place_tmp;
}
auto stream_flag = phi::CUDAStream::StreamFlag::kStreamNonBlocking;
new (&self) phi::CUDAStream(place_tmp, priority - 2, stream_flag);
} else {
// seting priority 1(high) and 2(normal) correspond to the actual
// cuda stream priority -1 and 0.
new (&self) phi::CUDAStream(*place, priority - 2, stream_flag);
}
#else
PADDLE_THROW(platform::errors::Unavailable(
"Class CUDAStream can only be initialized on the GPU platform."));
......
# A image for building paddle binaries
# Use cuda devel base image for both cpu and gpu environment
# When you modify it, please be aware of cudnn-runtime version
FROM <baseimg>
MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
# ENV variables
ARG WITH_GPU
ARG WITH_AVX
ENV WITH_GPU=${WITH_GPU:-ON}
ENV WITH_AVX=${WITH_AVX:-ON}
ENV DEBIAN_FRONTEND=noninteractive
<setcuda>
ENV HOME /root
# Add bash enhancements
COPY paddle/scripts/docker/root/ /root/
RUN chmod 777 /tmp
RUN apt-key del 7fa2af80
RUN rm /etc/apt/sources.list.d/*
RUN apt-key adv --fetch-keys https://developer.download.nvidia.cn/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub
RUN apt-get update --allow-unauthenticated && \
apt-get install -y software-properties-common && add-apt-repository ppa:deadsnakes/ppa && \
apt-get update && \
apt-get install -y curl wget vim git unzip unrar tar xz-utils libssl-dev bzip2 gzip \
coreutils ntp language-pack-zh-hans libsm6 libxext6 libxrender-dev libgl1-mesa-glx \
bison graphviz libjpeg-dev zlib1g-dev automake locales swig net-tools libtool kmod
<install_cpu_package>
# Downgrade gcc&&g++
WORKDIR /usr/bin
COPY tools/dockerfile/build_scripts /build_scripts
RUN bash /build_scripts/install_trt.sh
# Older versions of patchelf limited the size of the files being processed and were fixed in this pr.
# # https://github.com/NixOS/patchelf/commit/ba2695a8110abbc8cc6baf0eea819922ee5007fa
# # So install a newer version here.
RUN bash /build_scripts/install_patchelf.sh
RUN bash /build_scripts/install_gcc.sh gcc121
RUN cp gcc gcc.bak && cp g++ g++.bak && rm gcc && rm g++
RUN ln -s /usr/local/gcc-12.1/bin/gcc /usr/local/bin/gcc
RUN ln -s /usr/local/gcc-12.1/bin/g++ /usr/local/bin/g++
RUN ln -s /usr/local/gcc-12.1/bin/gcc /usr/bin/gcc
RUN ln -s /usr/local/gcc-12.1/bin/g++ /usr/bin/g++
ENV PATH=/usr/local/gcc-12.1/bin:$PATH
Run bash /build_scripts/install_cudnn.sh cudnn841
ENV CUDNN_VERSION=8.4.1
#RUN bash /build_scripts/install_nccl2.sh
RUN rm -rf /build_script
# install cmake
WORKDIR /home
RUN wget -q https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.tar.gz && tar -zxvf cmake-3.18.0-Linux-x86_64.tar.gz && rm cmake-3.18.0-Linux-x86_64.tar.gz
ENV PATH=/home/cmake-3.18.0-Linux-x86_64/bin:$PATH
RUN apt-get update && \
apt-get install -y python3.7 python3.7-dev python3.7-distutils\
python3.8 python3.8-dev python3.8-distutils \
python3.9 python3.9-dev python3.9-distutils && \
apt-get install python-is-python3 && \
rm /usr/bin/python && ln -s /usr/bin/python3.7 /usr/bin/python && \
rm /usr/bin/python3 && ln -s /usr/bin/python3.7 /usr/bin/python3
WORKDIR /home
RUN wget https://files.pythonhosted.org/packages/a7/e0/30642b9c2df516506d40b563b0cbd080c49c6b3f11a70b4c7a670f13a78b/setuptools-50.3.2.zip && apt-get -y install unzip && unzip setuptools-50.3.2.zip
WORKDIR /home/setuptools-50.3.2
RUN python3.9 setup.py build && python3.9 setup.py install && \
python3.8 setup.py build && python3.8 setup.py install && \
python3.7 setup.py build && python3.7 setup.py install
WORKDIR /home
RUN wget https://files.pythonhosted.org/packages/28/af/2c76c8aa46ccdf7578b83d97a11a2d1858794d4be4a1610ade0d30182e8b/pip-20.0.1.tar.gz && tar -zxvf pip-20.0.1.tar.gz
WORKDIR pip-20.0.1
RUN python3.9 setup.py install && \
python3.8 setup.py install && \
python3.7 setup.py install
WORKDIR /home
RUN rm setuptools-50.3.2.zip pip-20.0.1.tar.gz && \
rm -r setuptools-50.3.2 pip-20.0.1
RUN rm /usr/local/bin/pip && ln -s /usr/local/bin/pip3.7 /usr/local/bin/pip && \
rm /usr/local/bin/pip3 && ln -s /usr/local/bin/pip3.7 /usr/local/bin/pip3
# remove them when apt-get support 2.27 and higher version
RUN wget -q https://ftp.gnu.org/gnu/binutils/binutils-2.33.1.tar.gz && \
tar -xzf binutils-2.33.1.tar.gz && \
cd binutils-2.33.1 && \
./configure && make -j && make install && cd .. && rm -rf binutils-2.33.1 binutils-2.33.1.tar.gz
# Install Go and glide
RUN wget --no-check-certificate -qO- https://paddle-ci.gz.bcebos.com/go1.17.2.linux-amd64.tar.gz | \
tar -xz -C /usr/local && \
mkdir /root/gopath && \
mkdir /root/gopath/bin && \
mkdir /root/gopath/src
ENV GOROOT=/usr/local/go GOPATH=/root/gopath
# should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT.
ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin
# install glide
RUN curl -s -q https://glide.sh/get | sh
# git credential to skip password typing
RUN git config --global credential.helper store
# Fix locales to en_US.UTF-8
RUN localedef -i en_US -f UTF-8 en_US.UTF-8
RUN pip3.7 --no-cache-dir install ipython==5.3.0 && \
pip3.7 --no-cache-dir install ipykernel==4.6.0 wheel && \
pip3.8 --no-cache-dir install ipython==5.3.0 && \
pip3.8 --no-cache-dir install ipykernel==4.6.0 wheel && \
pip3.9 --no-cache-dir install ipython==5.3.0 && \
pip3.9 --no-cache-dir install ipykernel==4.6.0 wheel
#For docstring checker
RUN pip3.7 --no-cache-dir install pytest astroid isort && \
pip3.8 --no-cache-dir install pytest astroid isort && \
pip3.9 --no-cache-dir install pytest astroid isort
#For pre-commit
RUN pip3.7 --no-cache-dir install --upgrade pip && \
pip3.8 --no-cache-dir install --upgrade pip && \
pip3.9 --no-cache-dir install --upgrade pip
RUN pip3.7 --no-cache-dir install pre-commit==2.17.0 pylint==2.12.0 && \
pip3.8 --no-cache-dir install pre-commit==2.17.0 pylint==2.12.0 && \
pip3.9 --no-cache-dir install pre-commit==2.17.0 pylint==2.12.0 && \
pip3.7 --no-cache-dir install cpplint==1.6.0 clang-format==13.0.0 && \
pip3.8 --no-cache-dir install cpplint==1.6.0 clang-format==13.0.0 && \
pip3.9 --no-cache-dir install cpplint==1.6.0 clang-format==13.0.0
COPY ./python/requirements.txt /root/
RUN pip3.7 --no-cache-dir install -r /root/requirements.txt && \
pip3.8 --no-cache-dir install -r /root/requirements.txt && \
pip3.9 --no-cache-dir install -r /root/requirements.txt
# Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service
#RUN mkdir /var/run/sshd && echo 'root:root' | chpasswd && sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config && sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config
#CMD source ~/.bashrc
# ccache 3.7.9
RUN wget https://paddle-ci.gz.bcebos.com/ccache-3.7.9.tar.gz && \
tar xf ccache-3.7.9.tar.gz && mkdir /usr/local/ccache-3.7.9 && cd ccache-3.7.9 && \
./configure -prefix=/usr/local/ccache-3.7.9 && \
make -j8 && make install && \
ln -s /usr/local/ccache-3.7.9/bin/ccache /usr/local/bin/ccache && \
cd ../ && rm -rf ccache-3.7.9 ccache-3.7.9.tar.gz
# clang+llvm 3.8.0
RUN wget https://paddle-ci.cdn.bcebos.com/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-16.04.tar.xz && \
tar xf clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-16.04.tar.xz && cd clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-16.04 && \
cp -rn * /usr/local && cd .. && rm -rf clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-16.04 && rm -rf clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-16.04.tar.xz
EXPOSE 22
......@@ -71,4 +71,18 @@ elif [ "$1" == "gcc122" ]; then
ln -s /usr/local/gcc-12.2/lib64/libgfortran.so.5 ${lib_so_5} && \
ln -s /usr/local/gcc-12.2/lib64/libstdc++.so.6 ${lib_so_6} && \
cp /usr/local/gcc-12.2/lib64/libstdc++.so.6.0.30 ${lib_path}
elif [ "$1" == "gcc121" ]; then
wget -q --no-proxy https://paddle-ci.gz.bcebos.com/gcc-12.1.0.tar.gz
tar -xzf gcc-12.1.0.tar.gz && \
cd gcc-12.1.0 && \
unset LIBRARY_PATH CPATH C_INCLUDE_PATH PKG_CONFIG_PATH CPLUS_INCLUDE_PATH INCLUDE && \
./contrib/download_prerequisites && \
cd .. && mkdir temp_gcc121 && cd temp_gcc121 && \
../gcc-12.1.0/configure --prefix=/usr/local/gcc-12.1 --enable-checking=release --enable-languages=c,c++ --disable-multilib && \
make -j8 && make install
cd .. && rm -rf temp_gcc122 gcc-12.1.0 gcc-12.1.0.tar.gz
cp ${lib_so_6} ${lib_so_6}.bak && rm -f ${lib_so_6} &&
ln -s /usr/local/gcc-12.1/lib64/libgfortran.so.5 ${lib_so_5} && \
ln -s /usr/local/gcc-12.1/lib64/libstdc++.so.6 ${lib_so_6} && \
cp /usr/local/gcc-12.1/lib64/libstdc++.so.6.0.30 ${lib_path}
fi
......@@ -41,6 +41,7 @@ elif [ "$VERSION" == "12.0" ]; then
libnccl-*
exit 0
fi
DEB="nccl-local-repo-ubuntu2004-2.16.5-cuda12.0_1.0-1_amd64.deb"
elif [ "$VERSION" == "9.0" ]; then
DEB="nccl-repo-ubuntu1604-2.3.7-ga-cuda9.0_1-1_amd64.deb"
else
......
......@@ -147,8 +147,8 @@ function make_ce_framework_dockcerfile(){
function make_unbuntu18_cu117_dockerfile(){
dockerfile_name="Dockerfile.cuda117_cudnn8_gcc82_ubuntu18_coverage"
sed "s#<baseimg>#nvidia/cuda:11.7.0-cudnn8-devel-ubuntu18.04#g" ./Dockerfile.ubuntu18 >${dockerfile_name}
sed -i "s#<setcuda>#ENV LD_LIBRARY_PATH=/usr/local/cuda-11.7/targets/x86_64-linux/lib:\$LD_LIBRARY_PATH #g" ${dockerfile_name}
sed "s#<baseimg>#nvidia/cuda:12.0.1-cudnn8-devel-ubuntu20.04#g" ./Dockerfile.ubuntu20 >${dockerfile_name}
sed -i "s#<setcuda>#ENV LD_LIBRARY_PATH=/usr/local/cuda-12.0/targets/x86_64-linux/lib:\$LD_LIBRARY_PATH #g" ${dockerfile_name}
sed -i 's#<install_cpu_package>##g' ${dockerfile_name}
sed -i "7i ENV TZ=Asia/Beijing" ${dockerfile_name}
sed -i "8i RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone" ${dockerfile_name}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册