提交 e7eeb19f 编写于 作者: Q qiaolongfei

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into...

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into parallel-executor-support-prefetch
......@@ -22,7 +22,12 @@ endif()
cc_test(eigen_test SRCS eigen_test.cc DEPS tensor)
nv_test(mixed_vector_test SRCS mixed_vector_test.cu DEPS place memory device_context tensor)
if(WITH_GPU)
nv_test(mixed_vector_test SRCS mixed_vector_test.cc mixed_vector_test.cu DEPS place memory device_context tensor)
else()
cc_test(mixed_vector_test SRCS mixed_vector_test.cc DEPS place memory device_context tensor)
endif()
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto recordio)
cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory)
nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor)
......
......@@ -16,6 +16,7 @@
#include <algorithm>
#include <initializer_list>
#include <memory>
#include <vector>
#include "paddle/fluid/framework/tensor.h"
......@@ -386,13 +387,14 @@ template <typename T>
class CPUVector : public std::vector<T, std::allocator<T>> {
public:
CPUVector() : std::vector<T>() {}
CPUVector(size_t count, const T &value = T())
CPUVector(size_t count, const T &value = T()) // NOLINT
: std::vector<T>(count, value) {}
CPUVector(std::initializer_list<T> init) : std::vector<T>(init) {}
CPUVector(const std::vector<T> &other) : std::vector<T>(other) {}
explicit CPUVector(const CPUVector<T> &other) : std::vector<T>(other) {}
CPUVector(const std::vector<T> &other) : std::vector<T>(other) {} // NOLINT
CPUVector(const CPUVector<T> &other) : std::vector<T>(other) {}
CPUVector(CPUVector<T> &&other) : std::vector<T>(std::move(other)) {}
CPUVector(std::vector<T> &&other) : std::vector<T>(std::move(other)) {}
CPUVector(std::vector<T> &&other) // NOLINT
: std::vector<T>(std::move(other)) {}
CPUVector &operator=(const CPUVector &other) {
this->assign(other.begin(), other.end());
return *this;
......@@ -410,8 +412,6 @@ class CPUVector : public std::vector<T, std::allocator<T>> {
return os;
}
void resize(size_t size) { this->resize(size); }
T &operator[](size_t id) { return this->at(id); }
const T &operator[](size_t id) const { return this->at(id); }
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <memory>
#include "glog/logging.h"
#include "gtest/gtest.h"
#include "paddle/fluid/framework/mixed_vector.h"
template <typename T>
using vec = paddle::framework::Vector<T>;
TEST(mixed_vector, CPU_VECTOR) {
vec<int> tmp;
for (int i = 0; i < 10; ++i) {
tmp.push_back(i);
}
ASSERT_EQ(tmp.size(), 10UL);
vec<int> tmp2;
tmp2 = tmp;
ASSERT_EQ(tmp2.size(), 10UL);
for (int i = 0; i < 10; ++i) {
ASSERT_EQ(tmp2[i], i);
ASSERT_EQ(tmp2[i], tmp[i]);
}
int cnt = 0;
for (auto& t : tmp2) {
ASSERT_EQ(t, cnt);
++cnt;
}
}
TEST(mixed_vector, InitWithCount) {
paddle::framework::Vector<int> vec(10, 10);
for (int i = 0; i < 10; ++i) {
ASSERT_EQ(vec[i], 10);
}
}
TEST(mixed_vector, ForEach) {
vec<int> tmp;
for (auto& v : tmp) {
VLOG(3) << v;
}
}
TEST(mixed_vector, Reserve) {
paddle::framework::Vector<int> vec;
vec.reserve(1);
vec.push_back(0);
vec.push_back(0);
vec.push_back(0);
}
TEST(mixed_vector, Resize) {
paddle::framework::Vector<int> vec;
vec.resize(1);
vec.push_back(0);
vec.push_back(0);
vec.push_back(0);
}
......@@ -11,7 +11,9 @@
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <cuda_runtime.h>
#include <memory>
#include "glog/logging.h"
#include "gtest/gtest.h"
......@@ -21,26 +23,6 @@
template <typename T>
using vec = paddle::framework::Vector<T>;
TEST(mixed_vector, CPU_VECTOR) {
vec<int> tmp;
for (int i = 0; i < 10; ++i) {
tmp.push_back(i);
}
ASSERT_EQ(tmp.size(), 10UL);
vec<int> tmp2;
tmp2 = tmp;
ASSERT_EQ(tmp2.size(), 10UL);
for (int i = 0; i < 10; ++i) {
ASSERT_EQ(tmp2[i], i);
ASSERT_EQ(tmp2[i], tmp[i]);
}
int cnt = 0;
for (auto& t : tmp2) {
ASSERT_EQ(t, cnt);
++cnt;
}
}
static __global__ void multiply_10(int* ptr) {
for (int i = 0; i < 10; ++i) {
ptr[i] *= 10;
......@@ -91,24 +73,3 @@ TEST(mixed_vector, MultiGPU) {
ASSERT_EQ(tmp[i], i * 100);
}
}
TEST(mixed_vector, InitWithCount) {
paddle::framework::Vector<int> vec(10, 10);
for (int i = 0; i < 10; ++i) {
ASSERT_EQ(vec[i], 10);
}
}
TEST(mixed_vector, ForEach) {
vec<int> tmp;
for (auto& v : tmp) {
}
}
TEST(mixed_vector, Reserve) {
paddle::framework::Vector<int> vec;
vec.reserve(1);
vec.push_back(0);
vec.push_back(0);
vec.push_back(0);
}
......@@ -107,44 +107,24 @@ class TestMNIST(TestParallelExecutorBase):
label = np.ones(shape=[32, 1], dtype='int64')
return img, label
# simple_fc
def check_simple_fc_convergence(self, use_cuda, use_reduce=False):
def _compare_reduce_and_allreduce(self, model, use_cuda, random_data=True):
if use_cuda and not core.is_compiled_with_cuda():
return
self.check_network_convergence(simple_fc_net, use_cuda=use_cuda)
self.check_network_convergence(
simple_fc_net, use_cuda=use_cuda, allow_op_delay=True)
img, label = self._init_data()
model, use_cuda=use_cuda, use_reduce=True)
self.check_network_convergence(
simple_fc_net,
feed_dict={"image": img,
"label": label},
use_cuda=use_cuda,
use_reduce=use_reduce)
model, use_cuda=use_cuda, allow_op_delay=True, use_reduce=True)
def check_simple_fc_convergence_with_Reduce(self, use_cuda):
if use_cuda and not core.is_compiled_with_cuda():
return
self.check_network_convergence(
simple_fc_net, use_cuda=use_cuda, use_reduce=True)
self.check_network_convergence(
simple_fc_net,
use_cuda=use_cuda,
allow_op_delay=True,
use_reduce=True)
img, label = self._init_data()
img, label = self._init_data(random_data)
all_reduce_first_loss, all_reduce_last_loss = self.check_network_convergence(
simple_fc_net,
model,
feed_dict={"image": img,
"label": label},
use_cuda=use_cuda,
use_reduce=False)
reduce_first_loss, reduce_last_loss = self.check_network_convergence(
simple_fc_net,
model,
feed_dict={"image": img,
"label": label},
use_cuda=use_cuda,
......@@ -153,7 +133,24 @@ class TestMNIST(TestParallelExecutorBase):
for loss in zip(all_reduce_first_loss, reduce_first_loss):
self.assertAlmostEquals(loss[0], loss[1], delta=1e-6)
for loss in zip(all_reduce_last_loss, reduce_last_loss):
self.assertAlmostEquals(loss[0], loss[1], delta=1e-6)
self.assertAlmostEquals(loss[0], loss[1], delta=1e-4)
# simple_fc
def check_simple_fc_convergence(self, use_cuda, use_reduce=False):
if use_cuda and not core.is_compiled_with_cuda():
return
self.check_network_convergence(simple_fc_net, use_cuda=use_cuda)
self.check_network_convergence(
simple_fc_net, use_cuda=use_cuda, allow_op_delay=True)
img, label = self._init_data()
self.check_network_convergence(
simple_fc_net,
feed_dict={"image": img,
"label": label},
use_cuda=use_cuda,
use_reduce=use_reduce)
def test_simple_fc(self):
# use_cuda
......@@ -162,8 +159,8 @@ class TestMNIST(TestParallelExecutorBase):
def test_simple_fc_with_new_strategy(self):
# use_cuda, use_reduce
self.check_simple_fc_convergence_with_Reduce(True)
self.check_simple_fc_convergence_with_Reduce(False)
self._compare_reduce_and_allreduce(simple_fc_net, True)
self._compare_reduce_and_allreduce(simple_fc_net, False)
def check_simple_fc_parallel_accuracy(self, use_cuda):
if use_cuda and not core.is_compiled_with_cuda():
......@@ -209,39 +206,13 @@ class TestMNIST(TestParallelExecutorBase):
"label": label},
use_cuda=use_cuda)
def check_batchnorm_fc_convergence_use_reduce(self, use_cuda):
if use_cuda and not core.is_compiled_with_cuda():
return
self.check_network_convergence(
fc_with_batchnorm, use_cuda=use_cuda, use_reduce=True)
img, label = self._init_data()
all_reduce_first_loss, all_reduce_last_loss = self.check_network_convergence(
fc_with_batchnorm,
feed_dict={"image": img,
"label": label},
use_cuda=use_cuda,
use_reduce=False)
reduce_first_loss, reduce_last_loss = self.check_network_convergence(
fc_with_batchnorm,
feed_dict={"image": img,
"label": label},
use_cuda=use_cuda,
use_reduce=True)
for loss in zip(all_reduce_first_loss, reduce_first_loss):
self.assertAlmostEquals(loss[0], loss[1], delta=1e-6)
for loss in zip(all_reduce_last_loss, reduce_last_loss):
self.assertAlmostEquals(loss[0], loss[1], delta=1e-4)
def test_batchnorm_fc(self):
self.check_batchnorm_fc_convergence(True)
self.check_batchnorm_fc_convergence(False)
def test_batchnorm_fc_with_new_strategy(self):
self.check_batchnorm_fc_convergence_use_reduce(True)
self.check_batchnorm_fc_convergence_use_reduce(False)
self._compare_reduce_and_allreduce(fc_with_batchnorm, True)
self._compare_reduce_and_allreduce(fc_with_batchnorm, False)
if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册