diff --git a/paddle/framework/mixed_vector_test.cu b/paddle/framework/mixed_vector_test.cu index 6adad6c12c350cf284700579533040eec5cbb095..a037cc3b9900c4a9ad88d530e19b525a8bd2cb53 100644 --- a/paddle/framework/mixed_vector_test.cu +++ b/paddle/framework/mixed_vector_test.cu @@ -11,3 +11,81 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include + +#include "glog/logging.h" +#include "gtest/gtest.h" +#include "paddle/framework/mixed_vector.h" +#include "paddle/platform/gpu_info.h" + +template +using vec = paddle::framework::Vector; + +TEST(mixed_vector, CPU_VECTOR) { + vec tmp; + for (int i = 0; i < 10; ++i) { + tmp.push_back(i); + } + ASSERT_EQ(tmp.size(), 10); + vec tmp2; + tmp2 = tmp; + ASSERT_EQ(tmp2.size(), 10); + for (int i = 0; i < 10; ++i) { + ASSERT_EQ(tmp2[i], i); + ASSERT_EQ(tmp2[i], tmp[i]); + } + int cnt = 0; + for (auto& t : tmp2) { + ASSERT_EQ(t, cnt); + ++cnt; + } +} + +static __global__ void multiply_10(int* ptr) { + for (int i = 0; i < 10; ++i) { + ptr[i] *= 10; + } +} + +cudaStream_t GetCUDAStream(paddle::platform::CUDAPlace place) { + return reinterpret_cast( + paddle::platform::DeviceContextPool::Instance().Get(place)) + ->stream(); +} + +TEST(mixed_vector, GPU_VECTOR) { + vec tmp; + for (int i = 0; i < 10; ++i) { + tmp.push_back(i); + } + ASSERT_EQ(tmp.size(), 10); + paddle::platform::CUDAPlace gpu(0); + + multiply_10<<<1, 1, 0, GetCUDAStream(gpu)>>>(tmp.MutableData(gpu)); + + for (int i = 0; i < 10; ++i) { + ASSERT_EQ(tmp[i], i * 10); + } +} + +TEST(mixed_vector, MultiGPU) { + if (paddle::platform::GetCUDADeviceCount() < 2) { + LOG(WARNING) << "Skip mixed_vector.MultiGPU since there are not multiple " + "GPUs in your machine."; + return; + } + + vec tmp; + for (int i = 0; i < 10; ++i) { + tmp.push_back(i); + } + ASSERT_EQ(tmp.size(), 10); + paddle::platform::CUDAPlace gpu0(0); + multiply_10<<<1, 1, 0, GetCUDAStream(gpu0)>>>(tmp.MutableData(gpu0)); + paddle::platform::CUDAPlace gpu1(1); + multiply_10<<<1, 1, 0, GetCUDAStream(gpu1)>>>(tmp.MutableData(gpu1)); + + for (int i = 0; i < 10; ++i) { + ASSERT_EQ(tmp[i], i * 100); + } +}