未验证 提交 860f6077 编写于 作者: S sneaxiy 提交者: GitHub

fix broadcast kernel (#46158)

上级 e468e93c
...@@ -70,11 +70,12 @@ namespace platform { ...@@ -70,11 +70,12 @@ namespace platform {
* *
*/ */
#define CUDA_KERNEL_LOOP_TYPE(i, num, index_type) \ #define CUDA_KERNEL_LOOP_TYPE(i, num, index_type) \
int64_t __index__ = \ int64_t __index__ = \
static_cast<int64_t>(blockIdx.x) * blockDim.x + threadIdx.x; \ static_cast<int64_t>(blockIdx.x) * blockDim.x + threadIdx.x; \
for (index_type i = __index__; __index__ < (num); \ int64_t __stride__ = static_cast<int64_t>(blockDim.x) * gridDim.x; \
__index__ += blockDim.x * gridDim.x, i = __index__) for (index_type i = __index__; __index__ < (num); \
__index__ += __stride__, i = __index__)
class CublasHandleHolder { class CublasHandleHolder {
public: public:
......
...@@ -70,8 +70,9 @@ namespace platform { ...@@ -70,8 +70,9 @@ namespace platform {
#define CUDA_KERNEL_LOOP_TYPE(i, num, index_type) \ #define CUDA_KERNEL_LOOP_TYPE(i, num, index_type) \
int64_t __index__ = \ int64_t __index__ = \
static_cast<int64_t>(hipBlockIdx_x) * hipBlockDim_x + hipThreadIdx_x; \ static_cast<int64_t>(hipBlockIdx_x) * hipBlockDim_x + hipThreadIdx_x; \
int64_t __stride__ = static_cast<int64_t>(hipBlockDim_x) * hipGridDim_x; \
for (index_type i = __index__; __index__ < (num); \ for (index_type i = __index__; __index__ < (num); \
__index__ += hipBlockDim_x * hipGridDim_x, i = __index__) __index__ += __stride__, i = __index__)
class CublasHandleHolder { class CublasHandleHolder {
public: public:
......
...@@ -62,11 +62,12 @@ namespace gpu { ...@@ -62,11 +62,12 @@ namespace gpu {
* *
*/ */
#define CUDA_KERNEL_LOOP_TYPE(i, num, index_type) \ #define CUDA_KERNEL_LOOP_TYPE(i, num, index_type) \
int64_t __index__ = \ int64_t __index__ = \
static_cast<int64_t>(blockIdx.x) * blockDim.x + threadIdx.x; \ static_cast<int64_t>(blockIdx.x) * blockDim.x + threadIdx.x; \
for (index_type i = __index__; __index__ < (num); \ int64_t __stride__ = static_cast<int64_t>(blockDim.x) * gridDim.x; \
__index__ += blockDim.x * gridDim.x, i = __index__) for (index_type i = __index__; __index__ < (num); \
__index__ += __stride__, i = __index__)
} // namespace gpu } // namespace gpu
} // namespace backends } // namespace backends
......
...@@ -65,8 +65,9 @@ namespace gpu { ...@@ -65,8 +65,9 @@ namespace gpu {
#define CUDA_KERNEL_LOOP_TYPE(i, num, index_type) \ #define CUDA_KERNEL_LOOP_TYPE(i, num, index_type) \
int64_t __index__ = \ int64_t __index__ = \
static_cast<int64_t>(hipBlockIdx_x) * hipBlockDim_x + hipThreadIdx_x; \ static_cast<int64_t>(hipBlockIdx_x) * hipBlockDim_x + hipThreadIdx_x; \
int64_t __stride__ = static_cast<int64_t>(hipBlockDim_x) * hipGridDim_x; \
for (index_type i = __index__; __index__ < (num); \ for (index_type i = __index__; __index__ < (num); \
__index__ += hipBlockDim_x * hipGridDim_x, i = __index__) __index__ += __stride__, i = __index__)
} // namespace gpu } // namespace gpu
} // namespace backends } // namespace backends
......
...@@ -468,6 +468,397 @@ void LaunchBroadcastKernel( ...@@ -468,6 +468,397 @@ void LaunchBroadcastKernel(
func); func);
} }
#ifndef PADDLE_WITH_XPU_KP
HOSTDEVICE static int64_t ConvertSrcIdxToDstIdx(
int64_t src_idx,
const phi::Array<int64_t, phi::DDim::kMaxRank + 1> &src_strides,
const phi::Array<int64_t, phi::DDim::kMaxRank + 1> &dst_strides,
int rank) {
int64_t dst_idx = 0;
int64_t old_src_idx = src_idx;
for (int k = 0; k < rank; ++k) {
auto local_idx = src_idx / src_strides[k + 1];
src_idx -= local_idx * src_strides[k + 1];
if (dst_strides[k] != dst_strides[k + 1]) {
dst_idx += local_idx * dst_strides[k + 1];
}
}
return dst_idx;
}
template <typename T, int VecSize, bool IsBoundary>
HOSTDEVICE static void ReadVecDataWithInt64Index(
const T *in,
int64_t idx,
bool need_broadcast,
const phi::Array<int64_t, phi::DDim::kMaxRank + 1> &src_strides,
const phi::Array<int64_t, phi::DDim::kMaxRank + 1> &dst_strides,
int rank,
int n,
phi::AlignedVector<T, VecSize> *out) {
if (IsBoundary) {
for (int i = 0; i < n; ++i) {
(*out)[i] =
in[ConvertSrcIdxToDstIdx(idx + i, src_strides, dst_strides, rank)];
}
} else {
if (!need_broadcast) {
phi::Load<T, VecSize>(in + idx, out);
} else {
#pragma unroll
for (int i = 0; i < VecSize; ++i) {
(*out)[i] =
in[ConvertSrcIdxToDstIdx(idx + i, src_strides, dst_strides, rank)];
}
}
}
}
template <typename InT,
typename OutT,
typename Functor,
int VecSize,
int NumIns>
struct ApplyFunctorWithInt64IndexHelper {
HOSTDEVICE static OutT Run(const phi::AlignedVector<InT, VecSize> *ins_vec,
Functor functor,
int i);
};
template <typename InT, typename OutT, typename Functor, int VecSize>
struct ApplyFunctorWithInt64IndexHelper<InT, OutT, Functor, VecSize, 0> {
HOSTDEVICE static OutT Run(const phi::AlignedVector<InT, VecSize> *ins_vec,
Functor functor,
int i) {
return static_cast<OutT>(functor());
}
};
template <typename InT, typename OutT, typename Functor, int VecSize>
struct ApplyFunctorWithInt64IndexHelper<InT, OutT, Functor, VecSize, 1> {
HOSTDEVICE static OutT Run(const phi::AlignedVector<InT, VecSize> *ins_vec,
Functor functor,
int i) {
return static_cast<OutT>(functor(ins_vec[0][i]));
}
};
template <typename InT, typename OutT, typename Functor, int VecSize>
struct ApplyFunctorWithInt64IndexHelper<InT, OutT, Functor, VecSize, 2> {
HOSTDEVICE static OutT Run(const phi::AlignedVector<InT, VecSize> *ins_vec,
Functor functor,
int i) {
return static_cast<OutT>(functor(ins_vec[0][i], ins_vec[1][i]));
}
};
template <typename InT, typename OutT, typename Functor, int VecSize>
struct ApplyFunctorWithInt64IndexHelper<InT, OutT, Functor, VecSize, 3> {
HOSTDEVICE static OutT Run(const phi::AlignedVector<InT, VecSize> *ins_vec,
Functor functor,
int i) {
return static_cast<OutT>(
functor(ins_vec[0][i], ins_vec[1][i], ins_vec[2][i]));
}
};
template <int N>
struct MaxWithOne {
static constexpr auto kValue = (N >= 1 ? N : 1);
};
template <typename InT,
typename OutT,
typename Functor,
int VecSize,
int NumIns>
__global__ void BroadcastKernelWithInt64Index(
phi::Array<const InT *, MaxWithOne<NumIns>::kValue> ins,
OutT *out,
phi::Array<phi::Array<int64_t, phi::DDim::kMaxRank + 1>,
MaxWithOne<NumIns>::kValue> ins_strides,
phi::Array<int64_t, phi::DDim::kMaxRank + 1> out_strides,
phi::Array<bool, MaxWithOne<NumIns>::kValue> need_broadcasts,
int rank,
Functor functor) {
int64_t numel = out_strides[0];
int64_t idx =
(static_cast<int64_t>(blockIdx.x) * blockDim.x + threadIdx.x) * VecSize;
int64_t stride = static_cast<int64_t>(blockDim.x) * gridDim.x * VecSize;
int64_t limit = numel - VecSize;
phi::Array<phi::AlignedVector<InT, VecSize>, MaxWithOne<NumIns>::kValue>
ins_vec;
phi::AlignedVector<OutT, VecSize> out_vec;
for (; idx <= limit; idx += stride) {
#pragma unroll
for (int i = 0; i < NumIns; ++i) {
ReadVecDataWithInt64Index<InT, VecSize, false>(ins[i],
idx,
need_broadcasts[i],
out_strides,
ins_strides[i],
rank,
VecSize,
&ins_vec[i]);
}
#pragma unroll
for (int i = 0; i < VecSize; ++i) {
out_vec[i] = ApplyFunctorWithInt64IndexHelper<InT,
OutT,
Functor,
VecSize,
NumIns>::Run(ins_vec.Get(),
functor,
i);
}
phi::Store<OutT, VecSize>(out_vec, out + idx);
}
if (idx < numel) {
int remain = numel - idx; // remain is always less than VecSize, therefore
// `int` is enough here
#pragma unroll
for (int i = 0; i < NumIns; ++i) {
ReadVecDataWithInt64Index<InT, VecSize, true>(ins[i],
idx,
need_broadcasts[i],
out_strides,
ins_strides[i],
rank,
remain,
&ins_vec[i]);
}
for (int i = 0; i < remain; ++i) {
out[idx + i] =
ApplyFunctorWithInt64IndexHelper<InT,
OutT,
Functor,
VecSize,
NumIns>::Run(ins_vec.Get(),
functor,
i);
}
}
}
template <typename InT,
typename OutT,
typename Functor,
int Arity,
int NumOuts,
int VecSize>
struct LaunchBroadcastKernelWithInt64IndexHelper {
static void Run(const KPDevice &ctx,
const std::vector<const DenseTensor *> &ins,
std::vector<DenseTensor *> *outs,
int axis,
Functor functor) {
PADDLE_THROW(phi::errors::PermissionDenied(
"Unreachable code branch. This may be a bug."));
}
};
template <typename InT, typename OutT, typename Functor, int Arity, int VecSize>
struct LaunchBroadcastKernelWithInt64IndexHelper<InT,
OutT,
Functor,
Arity,
/*NumOuts=*/1,
VecSize> {
static void Run(const KPDevice &ctx,
const std::vector<const DenseTensor *> &ins,
std::vector<DenseTensor *> *outs,
int axis,
Functor functor) {
phi::Array<const InT *, MaxWithOne<Arity>::kValue> ins_ptrs;
for (int i = 0; i < Arity; ++i) {
ins_ptrs[i] = ins[i]->data<InT>();
}
auto *out_tensor = (*outs)[0];
auto *out_ptr = ctx.Alloc<OutT>(out_tensor);
phi::Array<phi::Array<int64_t, phi::DDim::kMaxRank>,
MaxWithOne<Arity>::kValue>
ins_expand_dims;
phi::Array<int64_t, phi::DDim::kMaxRank> broadcast_out_dims;
int rank;
if (Arity == 1) {
rank = ins[0]->dims().size();
for (int i = 0; i < rank; ++i) {
broadcast_out_dims[i] = ins[0]->dims()[i];
}
ins_expand_dims[0] = broadcast_out_dims;
} else if (Arity >= 2) {
CalculateBroadcastDims(ins[0]->dims().Get(),
ins[1]->dims().Get(),
ins[0]->dims().size(),
ins[1]->dims().size(),
axis,
ins_expand_dims[0].GetMutable(),
ins_expand_dims[1].GetMutable(),
broadcast_out_dims.GetMutable(),
&rank);
for (int i = 2; i < Arity; ++i) {
auto tmp_dims = broadcast_out_dims;
phi::Array<int64_t, phi::DDim::kMaxRank> tmp_expand_dims;
int tmp_rank;
PADDLE_ENFORCE_GE(rank,
ins[i]->dims().size(),
phi::errors::InvalidArgument(
"Unsupported reverse broadcast when the input "
"tensor number is larger than 2."));
CalculateBroadcastDims(tmp_dims.Get(),
ins[i]->dims().Get(),
rank,
ins[i]->dims().size(),
axis,
tmp_expand_dims.GetMutable(),
ins_expand_dims[i].GetMutable(),
broadcast_out_dims.GetMutable(),
&tmp_rank);
PADDLE_ENFORCE_EQ(rank,
tmp_rank,
phi::errors::InvalidArgument(
"Wrong broadcast algorithm. This may be a bug."));
}
}
phi::Array<phi::Array<int64_t, phi::DDim::kMaxRank + 1>,
MaxWithOne<Arity>::kValue>
ins_strides;
phi::Array<bool, MaxWithOne<Arity>::kValue> need_broadcasts;
phi::Array<int64_t, phi::DDim::kMaxRank + 1> out_strides;
const auto &out_dims = out_tensor->dims();
if (rank <= out_dims.size()) {
out_strides = ShapeToStride(out_dims.Get(), rank);
} else {
out_strides = ShapeToStride(broadcast_out_dims.Get(), rank);
}
for (int i = 0; i < Arity; ++i) {
ins_strides[i] = ShapeToStride(ins_expand_dims[i].Get(), rank);
need_broadcasts[i] =
!IsSameShape(out_strides.Get(), ins_strides[i].Get(), rank + 1);
}
int64_t numel = out_strides[0];
auto gpu_config =
phi::backends::gpu::GetGpuLaunchConfig1D(ctx, numel, VecSize);
BroadcastKernelWithInt64Index<InT, OutT, Functor, VecSize, Arity>
<<<gpu_config.block_per_grid,
gpu_config.thread_per_block,
0,
ctx.stream()>>>(ins_ptrs,
out_ptr,
ins_strides,
out_strides,
need_broadcasts,
rank,
functor);
}
private:
static void CalculateBroadcastDims(const int64_t *x_dims,
const int64_t *y_dims,
int nx,
int ny,
int axis,
int64_t *x_out_dims,
int64_t *y_out_dims,
int64_t *broadcast_out_dims,
int *length) {
PADDLE_ENFORCE_GE(
axis, 0, phi::errors::InvalidArgument("Invalid axis value: %d", axis));
if (nx == ny) {
*length = nx;
for (int i = 0; i < nx; ++i) {
if (x_dims[i] != y_dims[i]) {
PADDLE_ENFORCE_EQ(
x_dims[i] == 1 || y_dims[i] == 1,
true,
phi::errors::InvalidArgument("Cannot broadcast input shape where "
"x_dims[%d] = %d, y_dims[%d] = %d.",
i,
x_dims[i],
i,
y_dims[i]));
}
broadcast_out_dims[i] = std::max(x_dims[i], y_dims[i]);
x_out_dims[i] = x_dims[i];
y_out_dims[i] = y_dims[i];
}
} else if (nx > ny) {
*length = nx;
for (int i = nx - axis; i < ny; ++i) {
PADDLE_ENFORCE_EQ(
y_dims[i],
1,
phi::errors::InvalidArgument(
"The trailing Y.shape[%d] should be 1 but got %d.",
i,
y_dims[i]));
}
for (int i = 0; i < nx; ++i) {
if (i >= axis && i - axis < ny) {
if (x_dims[i] != y_dims[i - axis]) {
PADDLE_ENFORCE_EQ(x_dims[i] == 1 || y_dims[i - axis] == 1,
true,
phi::errors::InvalidArgument(
"Cannot broadcast input shape where "
"x_dims[%d] = %d, y_dims[%d] = %d.",
i,
x_dims[i],
i - axis,
y_dims[i - axis]));
}
broadcast_out_dims[i] = std::max(x_dims[i], y_dims[i - axis]);
x_out_dims[i] = x_dims[i];
y_out_dims[i] = y_dims[i - axis];
} else {
broadcast_out_dims[i] = x_dims[i];
x_out_dims[i] = x_dims[i];
y_out_dims[i] = 1;
}
}
} else {
CalculateBroadcastDims(y_dims,
x_dims,
ny,
nx,
axis,
y_out_dims,
x_out_dims,
broadcast_out_dims,
length);
}
}
static bool IsSameShape(const int64_t *x, const int64_t *y, int rank) {
for (int i = 0; i < rank; ++i) {
if (x[i] != y[i]) return false;
}
return true;
}
static phi::Array<int64_t, phi::DDim::kMaxRank + 1> ShapeToStride(
const int64_t *arr, int rank) {
phi::Array<int64_t, phi::DDim::kMaxRank + 1> strides;
strides[rank] = 1;
for (int i = rank - 1; i >= 0; --i) {
strides[i] = strides[i + 1] * arr[i];
}
return strides;
}
};
#endif
template <ElementwiseType ET, template <ElementwiseType ET,
typename InT, typename InT,
typename OutT, typename OutT,
...@@ -509,6 +900,63 @@ void BroadcastKernelForDifferentVecSize( ...@@ -509,6 +900,63 @@ void BroadcastKernelForDifferentVecSize(
outs->size(), outs->size(),
NumOuts)); NumOuts));
#ifndef PADDLE_WITH_XPU_KP
constexpr bool kEnabledInt64IndexKernel = (NumOuts == 1 && kArity <= 3);
bool use_int64_index_kernel =
kEnabledInt64IndexKernel &&
(*outs)[0]->numel() >= std::numeric_limits<int32_t>::max();
if (use_int64_index_kernel) {
int vec_size = GetVecsize<InT, OutT>(ins, outs);
switch (vec_size) {
case VecSizeL: {
LaunchBroadcastKernelWithInt64IndexHelper<InT,
OutT,
Functor,
kArity,
NumOuts,
VecSizeL>::Run(ctx,
ins,
outs,
axis,
func);
break;
}
case VecSizeM: {
LaunchBroadcastKernelWithInt64IndexHelper<InT,
OutT,
Functor,
kArity,
NumOuts,
VecSizeM>::Run(ctx,
ins,
outs,
axis,
func);
break;
}
case VecSizeS: {
LaunchBroadcastKernelWithInt64IndexHelper<InT,
OutT,
Functor,
kArity,
NumOuts,
VecSizeS>::Run(ctx,
ins,
outs,
axis,
func);
break;
}
default: {
PADDLE_THROW(phi::errors::Unimplemented(
"Unsupported vectorized size: %d!", vec_size));
break;
}
}
return;
}
#endif
// mergedim and get vec_size // mergedim and get vec_size
const auto merge_dims = DimensionsTransform(ins, (*outs)[0]->dims(), axis); const auto merge_dims = DimensionsTransform(ins, (*outs)[0]->dims(), axis);
phi::Array<kps::details::BroadcastConfig, kArity> configs; phi::Array<kps::details::BroadcastConfig, kArity> configs;
......
...@@ -92,7 +92,7 @@ struct BinaryOperation { ...@@ -92,7 +92,7 @@ struct BinaryOperation {
std::vector<DenseTensor*> outs{output}; std::vector<DenseTensor*> outs{output};
paddle::operators:: paddle::operators::
LaunchElementwiseCudaKernel<ElementwiseType::kBinary, T, T>( LaunchElementwiseCudaKernel<ElementwiseType::kBinary, T, T>(
dev_ctx, ins, &outs, -1, BinaryFunctor<T>()); dev_ctx, ins, &outs, 0, BinaryFunctor<T>());
} }
}; };
......
...@@ -101,8 +101,13 @@ RUN curl -s -q https://glide.sh/get | sh ...@@ -101,8 +101,13 @@ RUN curl -s -q https://glide.sh/get | sh
# Downgrade TensorRT # Downgrade TensorRT
COPY tools/dockerfile/build_scripts /build_scripts COPY tools/dockerfile/build_scripts /build_scripts
RUN bash /build_scripts/install_nccl2.sh RUN bash /build_scripts/install_nccl2.sh
RUN rm -rf /build_scripts
# Older versions of patchelf limited the size of the files being processed and were fixed in this pr.
# # https://github.com/NixOS/patchelf/commit/ba2695a8110abbc8cc6baf0eea819922ee5007fa
# # So install a newer version here.
RUN bash /build_scripts/install_patchelf.sh
RUN rm -rf /build_scripts
# git credential to skip password typing # git credential to skip password typing
RUN git config --global credential.helper store RUN git config --global credential.helper store
...@@ -143,13 +148,6 @@ RUN wget -q https://launchpad.net/ubuntu/+archive/primary/+sourcefiles/binutils/ ...@@ -143,13 +148,6 @@ RUN wget -q https://launchpad.net/ubuntu/+archive/primary/+sourcefiles/binutils/
RUN apt-get install libprotobuf-dev -y RUN apt-get install libprotobuf-dev -y
# Older versions of patchelf limited the size of the files being processed and were fixed in this pr.
# https://github.com/NixOS/patchelf/commit/ba2695a8110abbc8cc6baf0eea819922ee5007fa
# So install a newer version here.
RUN wget -q https://paddle-ci.cdn.bcebos.com/patchelf_0.10-2_amd64.deb && \
dpkg -i patchelf_0.10-2_amd64.deb && \
rm -rf patchelf_0.10-2_amd64.deb
# Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service # Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service
RUN mkdir /var/run/sshd && echo 'root:root' | chpasswd && sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config && sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config RUN mkdir /var/run/sshd && echo 'root:root' | chpasswd && sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config && sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config
CMD source ~/.bashrc CMD source ~/.bashrc
......
...@@ -28,6 +28,10 @@ RUN apt-get update && \ ...@@ -28,6 +28,10 @@ RUN apt-get update && \
# Downgrade gcc&&g++ # Downgrade gcc&&g++
WORKDIR /usr/bin WORKDIR /usr/bin
COPY tools/dockerfile/build_scripts /build_scripts COPY tools/dockerfile/build_scripts /build_scripts
# Older versions of patchelf limited the size of the files being processed and were fixed in this pr.
# https://github.com/NixOS/patchelf/commit/ba2695a8110abbc8cc6baf0eea819922ee5007fa
# So install a newer version here.
RUN bash /build_scripts/install_patchelf.sh
RUN bash /build_scripts/install_gcc.sh gcc82 && rm -rf /build_scripts RUN bash /build_scripts/install_gcc.sh gcc82 && rm -rf /build_scripts
RUN cp gcc gcc.bak && cp g++ g++.bak && rm gcc && rm g++ RUN cp gcc gcc.bak && cp g++ g++.bak && rm gcc && rm g++
RUN ln -s /usr/local/gcc-8.2/bin/gcc /usr/local/bin/gcc RUN ln -s /usr/local/gcc-8.2/bin/gcc /usr/local/bin/gcc
...@@ -99,14 +103,6 @@ RUN pip3.7 --no-cache-dir install pylint pytest astroid isort ...@@ -99,14 +103,6 @@ RUN pip3.7 --no-cache-dir install pylint pytest astroid isort
COPY ./python/requirements.txt /root/ COPY ./python/requirements.txt /root/
RUN pip3.7 --no-cache-dir install -r /root/requirements.txt RUN pip3.7 --no-cache-dir install -r /root/requirements.txt
# Older versions of patchelf limited the size of the files being processed and were fixed in this pr.
# https://github.com/NixOS/patchelf/commit/ba2695a8110abbc8cc6baf0eea819922ee5007fa
# So install a newer version here.
RUN wget -q https://paddle-ci.cdn.bcebos.com/patchelf_0.10-2_amd64.deb && \
dpkg -i patchelf_0.10-2_amd64.deb && \
rm -rf patchelf_0.10-2_amd64.deb
# Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service # Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service
#RUN mkdir /var/run/sshd && echo 'root:root' | chpasswd && sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config && sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config #RUN mkdir /var/run/sshd && echo 'root:root' | chpasswd && sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config && sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config
#CMD source ~/.bashrc #CMD source ~/.bashrc
......
...@@ -143,9 +143,14 @@ RUN curl -s -q https://glide.sh/get | sh ...@@ -143,9 +143,14 @@ RUN curl -s -q https://glide.sh/get | sh
# See https://github.com/PaddlePaddle/Paddle/issues/10129 for details. # See https://github.com/PaddlePaddle/Paddle/issues/10129 for details.
# Downgrade TensorRT # Downgrade TensorRT
# Older versions of patchelf limited the size of the files being processed and were fixed in this pr.
# https://github.com/NixOS/patchelf/commit/ba2695a8110abbc8cc6baf0eea819922ee5007fa
# So install a newer version here.
COPY tools/dockerfile/build_scripts /build_scripts COPY tools/dockerfile/build_scripts /build_scripts
RUN bash /build_scripts/install_trt.sh && \ RUN bash /build_scripts/install_trt.sh && \
bash /build_scripts/install_nccl2.sh bash /build_scripts/install_nccl2.sh && \
bash /build_scripts/install_patchelf.sh
RUN rm -rf /build_scripts RUN rm -rf /build_scripts
# git credential to skip password typing # git credential to skip password typing
...@@ -236,13 +241,6 @@ RUN wget -q https://launchpad.net/ubuntu/+archive/primary/+sourcefiles/binutils/ ...@@ -236,13 +241,6 @@ RUN wget -q https://launchpad.net/ubuntu/+archive/primary/+sourcefiles/binutils/
RUN apt-get install libprotobuf-dev -y RUN apt-get install libprotobuf-dev -y
# Older versions of patchelf limited the size of the files being processed and were fixed in this pr.
# https://github.com/NixOS/patchelf/commit/ba2695a8110abbc8cc6baf0eea819922ee5007fa
# So install a newer version here.
RUN wget -q https://paddle-ci.cdn.bcebos.com/patchelf_0.10-2_amd64.deb && \
dpkg -i patchelf_0.10-2_amd64.deb && \
rm -rf patchelf_0.10-2_amd64.deb
# Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service # Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service
RUN mkdir /var/run/sshd && echo 'root:root' | chpasswd && sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config && sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config RUN mkdir /var/run/sshd && echo 'root:root' | chpasswd && sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config && sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config
CMD source ~/.bashrc CMD source ~/.bashrc
......
...@@ -35,6 +35,10 @@ RUN apt-get update --allow-unauthenticated && \ ...@@ -35,6 +35,10 @@ RUN apt-get update --allow-unauthenticated && \
WORKDIR /usr/bin WORKDIR /usr/bin
COPY tools/dockerfile/build_scripts /build_scripts COPY tools/dockerfile/build_scripts /build_scripts
RUN bash /build_scripts/install_trt.sh RUN bash /build_scripts/install_trt.sh
# Older versions of patchelf limited the size of the files being processed and were fixed in this pr.
# # https://github.com/NixOS/patchelf/commit/ba2695a8110abbc8cc6baf0eea819922ee5007fa
# # So install a newer version here.
RUN bash /build_scripts/install_patchelf.sh
RUN bash /build_scripts/install_gcc.sh gcc82 && rm -rf /build_scripts RUN bash /build_scripts/install_gcc.sh gcc82 && rm -rf /build_scripts
RUN cp gcc gcc.bak && cp g++ g++.bak && rm gcc && rm g++ RUN cp gcc gcc.bak && cp g++ g++.bak && rm gcc && rm g++
RUN ln -s /usr/local/gcc-8.2/bin/gcc /usr/local/bin/gcc RUN ln -s /usr/local/gcc-8.2/bin/gcc /usr/local/bin/gcc
...@@ -151,14 +155,6 @@ RUN pip3.6 --no-cache-dir install -r /root/requirements.txt && \ ...@@ -151,14 +155,6 @@ RUN pip3.6 --no-cache-dir install -r /root/requirements.txt && \
pip3.8 --no-cache-dir install -r /root/requirements.txt && \ pip3.8 --no-cache-dir install -r /root/requirements.txt && \
pip3.9 --no-cache-dir install -r /root/requirements.txt pip3.9 --no-cache-dir install -r /root/requirements.txt
# Older versions of patchelf limited the size of the files being processed and were fixed in this pr.
# https://github.com/NixOS/patchelf/commit/ba2695a8110abbc8cc6baf0eea819922ee5007fa
# So install a newer version here.
RUN wget -q https://paddle-ci.cdn.bcebos.com/patchelf_0.10-2_amd64.deb && \
dpkg -i patchelf_0.10-2_amd64.deb && \
rm -rf patchelf_0.10-2_amd64.deb
# Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service # Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service
#RUN mkdir /var/run/sshd && echo 'root:root' | chpasswd && sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config && sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config #RUN mkdir /var/run/sshd && echo 'root:root' | chpasswd && sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config && sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config
#CMD source ~/.bashrc #CMD source ~/.bashrc
......
...@@ -106,7 +106,7 @@ export SSL_CERT_FILE=/opt/_internal/certs.pem ...@@ -106,7 +106,7 @@ export SSL_CERT_FILE=/opt/_internal/certs.pem
# tar -xzf patchelf-0.9njs2.tar.gz # tar -xzf patchelf-0.9njs2.tar.gz
# (cd patchelf-0.9njs2 && ./configure && make && make install) # (cd patchelf-0.9njs2 && ./configure && make && make install)
# rm -rf patchelf-0.9njs2.tar.gz patchelf-0.9njs2 # rm -rf patchelf-0.9njs2.tar.gz patchelf-0.9njs2
yum install -y patchelf sh "$MY_DIR/install_patchelf.sh"
# Install latest pypi release of auditwheel # Install latest pypi release of auditwheel
#LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}:$(dirname ${PY35_BIN})/lib" $PY35_BIN/pip install auditwheel #LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}:$(dirname ${PY35_BIN})/lib" $PY35_BIN/pip install auditwheel
......
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
TMP_DIR=patchelf_tmp
rm -rf "$TMP_DIR"
git clone -b 0.15.0 https://github.com/NixOS/patchelf "$TMP_DIR"
cd "$TMP_DIR"
./bootstrap.sh
./configure
make
make install
cd ..
rm -rf "$TMP_DIR"
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册