提交 96604fda 编写于 作者: Q Qiao Longfei

fix gpu data

test=develop
上级 238b24bf
...@@ -361,9 +361,10 @@ class AdamOpKernel : public framework::OpKernel<T> { ...@@ -361,9 +361,10 @@ class AdamOpKernel : public framework::OpKernel<T> {
if (lazy_mode) { if (lazy_mode) {
std::vector<int64_t> id_vector; std::vector<int64_t> id_vector;
size_t row_count = grad_merge.rows().size(); size_t row_count = grad_merge.rows().size();
std::vector<int64_t> cpu_rows(grad_merge.rows());
for (size_t row_index = 0; row_index < row_count; ++row_index) { for (size_t row_index = 0; row_index < row_count; ++row_index) {
for (size_t offset = 0; offset < row_numel; ++offset) { for (size_t offset = 0; offset < row_numel; ++offset) {
size_t i = rows[row_index] * row_numel + offset; size_t i = cpu_rows[row_index] * row_numel + offset;
id_vector.push_back(i); id_vector.push_back(i);
} }
} }
......
...@@ -128,7 +128,7 @@ struct ForRangeIn<CUDADeviceContext> { ...@@ -128,7 +128,7 @@ struct ForRangeIn<CUDADeviceContext> {
int grid_size = (range_.size() + num_threads - 1) / num_threads; int grid_size = (range_.size() + num_threads - 1) / num_threads;
ForRangeInElemwiseOp<<<grid_size, block_size, 0, dev_ctx_.stream()>>>( ForRangeInElemwiseOp<<<grid_size, block_size, 0, dev_ctx_.stream()>>>(
func, range_.data(), range_size); func, range_.CUDAData(dev_ctx_.GetPlace()), range_size);
} }
const CUDADeviceContext& dev_ctx_; const CUDADeviceContext& dev_ctx_;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册