提交 a906a361 编写于 作者: Y Yihua Xu

Add the macro for NVCC (test=develop)

上级 d91740ac
...@@ -149,11 +149,20 @@ class StackKernel : public framework::OpKernel<T> { ...@@ -149,11 +149,20 @@ class StackKernel : public framework::OpKernel<T> {
for (auto i = axis; i < dim.size(); ++i) post *= dim[i]; for (auto i = axis; i < dim.size(); ++i) post *= dim[i];
#ifdef __NVCC__ #ifdef __NVCC__
int total_num = pre * n * post;
auto &dev_ctx = ctx.template device_context<DeviceContext>();
thrust::device_vector<const T *> device_x_vec(x_datas); thrust::device_vector<const T *> device_x_vec(x_datas);
auto x_data_arr = device_x_vec.data().get(); auto x_data_arr = device_x_vec.data().get();
StackFunctorForRange(dev_ctx, x_data_arr, y_data, total_num, n, post);
// Wait() must be called because device_x_vec may be destructed before
// kernel ends
dev_ctx.Wait();
#else #else
auto x_data_arr = x_datas.data(); auto x_data_arr = x_datas.data();
#endif
size_t x_offset = 0; size_t x_offset = 0;
size_t y_offset = 0; size_t y_offset = 0;
for (int i = 0; i < pre; i++) { for (int i = 0; i < pre; i++) {
...@@ -164,10 +173,6 @@ class StackKernel : public framework::OpKernel<T> { ...@@ -164,10 +173,6 @@ class StackKernel : public framework::OpKernel<T> {
} }
x_offset += post; x_offset += post;
} }
#ifdef __NVCC__
// Wait() must be called because device_x_vec may be destructed before
// kernel ends
dev_ctx.Wait();
#endif #endif
} }
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册