提交 8f2486ca 编写于 作者: C chenweihang

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into squeeze_op

# Get the latest git tag.
set(PADDLE_VERSION $ENV{PADDLE_VERSION})
set(tmp_version "HEAD")
set(TAG_VERSION_REGEX "[0-9]+\\.[0-9]+\\.[0-9]+(\\.(a|b|rc)\\.[0-9]+)?")
set(COMMIT_VERSION_REGEX "[0-9a-f]+[0-9a-f]+[0-9a-f]+[0-9a-f]+[0-9a-f]+")
while ("${PADDLE_VERSION}" STREQUAL "")
execute_process(
COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 ${tmp_version}
COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 --always ${tmp_version}
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_TAG_NAME
RESULT_VARIABLE GIT_RESULT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if (NOT ${GIT_RESULT})
# Check the tag is a correct version
if (${GIT_TAG_NAME} MATCHES "v[0-9]+\\.[0-9]+\\.[0-9]+(\\.(a|b|rc)\\.[0-9]+)?")
if (${GIT_TAG_NAME} MATCHES "${COMMIT_VERSION_REGEX}")
# if no tag was found, set PADDLE_VERSION to latest
set(PADDLE_VERSION "latest")
elseif (${GIT_TAG_NAME} MATCHES "v${TAG_VERSION_REGEX}")
string(REPLACE "v" "" PADDLE_VERSION ${GIT_TAG_NAME})
else() # otherwise, get the previous git tag name.
set(tmp_version "${GIT_TAG_NAME}~1")
......
......@@ -21,8 +21,8 @@ namespace framework {
// a static local variable is already being initialized.
// https://stackoverflow.com/questions/11711920/how-to-implement-multithread-safe-singleton-in-c11-without-using-mutex
OpInfoMap& OpInfoMap::Instance() {
static OpInfoMap* g_op_info_map = new OpInfoMap();
return *g_op_info_map;
static OpInfoMap g_op_info_map;
return g_op_info_map;
}
} // namespace framework
} // namespace paddle
......@@ -19,8 +19,9 @@ namespace paddle {
namespace memory {
namespace detail {
BuddyAllocator::BuddyAllocator(SystemAllocator* system_allocator,
size_t min_chunk_size, size_t max_chunk_size)
BuddyAllocator::BuddyAllocator(
std::unique_ptr<SystemAllocator> system_allocator, size_t min_chunk_size,
size_t max_chunk_size)
: min_chunk_size_(min_chunk_size),
max_chunk_size_(max_chunk_size),
cache_(system_allocator->UseGpu()),
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#include <memory>
#include <mutex> // NOLINT
#include <set>
#include <tuple>
......@@ -32,8 +33,8 @@ namespace detail {
class BuddyAllocator {
public:
BuddyAllocator(SystemAllocator* system_allocator, size_t min_chunk_size,
size_t max_chunk_size);
BuddyAllocator(std::unique_ptr<SystemAllocator> system_allocator,
size_t min_chunk_size, size_t max_chunk_size);
~BuddyAllocator();
......@@ -103,7 +104,7 @@ class BuddyAllocator {
private:
/*! Allocate CPU/GPU memory from system */
SystemAllocator* system_allocator_;
std::unique_ptr<SystemAllocator> system_allocator_;
std::mutex mutex_;
};
......
......@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <vector>
#include "paddle/fluid/memory/malloc.h"
#include "glog/logging.h"
......@@ -34,12 +36,15 @@ namespace memory {
using BuddyAllocator = detail::BuddyAllocator;
BuddyAllocator* GetCPUBuddyAllocator() {
static std::once_flag init_flag;
static detail::BuddyAllocator* a = nullptr;
if (a == nullptr) {
a = new detail::BuddyAllocator(new detail::CPUAllocator,
platform::CpuMinChunkSize(),
platform::CpuMaxChunkSize());
}
std::call_once(init_flag, []() {
a = new detail::BuddyAllocator(
std::unique_ptr<detail::SystemAllocator>(new detail::CPUAllocator),
platform::CpuMinChunkSize(), platform::CpuMaxChunkSize());
});
return a;
}
......@@ -68,19 +73,22 @@ size_t Used<platform::CPUPlace>(platform::CPUPlace place) {
#ifdef PADDLE_WITH_CUDA
BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
static BuddyAllocator** as = NULL;
if (as == NULL) {
static std::once_flag init_flag;
static detail::BuddyAllocator** a_arr = nullptr;
std::call_once(init_flag, [gpu_id]() {
int gpu_num = platform::GetCUDADeviceCount();
as = new BuddyAllocator*[gpu_num];
for (int gpu = 0; gpu < gpu_num; gpu++) {
as[gpu] = nullptr;
}
}
platform::SetDeviceId(gpu_id);
if (!as[gpu_id]) {
as[gpu_id] = new BuddyAllocator(new detail::GPUAllocator(gpu_id),
platform::GpuMinChunkSize(),
platform::GpuMaxChunkSize());
PADDLE_ENFORCE(gpu_id < gpu_num, "gpu_id:%d should < gpu_num:%d", gpu_id,
gpu_num);
a_arr = new BuddyAllocator*[gpu_num];
for (int i = 0; i < gpu_num; i++) {
a_arr[i] = nullptr;
platform::SetDeviceId(i);
a_arr[i] = new BuddyAllocator(
std::unique_ptr<detail::SystemAllocator>(new detail::GPUAllocator(i)),
platform::GpuMinChunkSize(), platform::GpuMaxChunkSize());
VLOG(10) << "\n\nNOTE: each GPU device use "
<< FLAGS_fraction_of_gpu_memory_to_use * 100
<< "% of GPU memory.\n"
......@@ -88,7 +96,10 @@ BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
<< "FLAGS_fraction_of_gpu_memory_to_use"
<< "' to change the fraction of GPU usage.\n\n";
}
return as[gpu_id];
});
platform::SetDeviceId(gpu_id);
return a_arr[gpu_id];
}
template <>
......@@ -125,12 +136,16 @@ void Free<platform::CUDAPlace>(platform::CUDAPlace place, void* p) {
}
BuddyAllocator* GetCUDAPinnedBuddyAllocator() {
static BuddyAllocator* ba = NULL;
if (ba == NULL) {
ba = new BuddyAllocator(new detail::CUDAPinnedAllocator,
static std::once_flag init_flag;
static BuddyAllocator* ba = nullptr;
std::call_once(init_flag, []() {
ba = new BuddyAllocator(std::unique_ptr<detail::SystemAllocator>(
new detail::CUDAPinnedAllocator),
platform::CUDAPinnedMinChunkSize(),
platform::CUDAPinnedMaxChunkSize());
}
});
return ba;
}
......
......@@ -205,10 +205,11 @@ class ConditionalBlockGradInferShape : public framework::InferShapeBase {
context->SetOutputsDim(framework::GradVarName("Params"),
context->GetInputsDim("Params"));
}
PADDLE_ENFORCE(context->HasOutputs(framework::GradVarName("X")));
if (context->HasOutputs(framework::GradVarName("X"))) {
context->SetOutputsDim(framework::GradVarName("X"),
context->GetInputsDim("X"));
}
}
};
class ConditionalBlockGradMaker : public framework::SingleGradOpDescMaker {
......
......@@ -44,8 +44,10 @@ class MergeLoDTensorOp : public framework::OperatorBase {
scope.FindVar(Output("Out"))->GetMutable<framework::LoDTensor>();
auto level = static_cast<size_t>(Attr<int>("level"));
auto &mask_dim = mask.dims();
PADDLE_ENFORCE(in_true.numel() || in_false.numel(),
"Input(InTrue) or Input(InFalse) should be initialized.");
auto &mask_dim = mask.dims();
std::unique_ptr<framework::LoDTensor> cpu_mask{new framework::LoDTensor()};
if (platform::is_cpu_place(mask.place())) {
cpu_mask->ShareDataWith(mask);
......@@ -59,19 +61,27 @@ class MergeLoDTensorOp : public framework::OperatorBase {
}
auto *mask_data = cpu_mask->data<bool>();
int rank = in_true.dims().size();
platform::Place place = in_true.place();
std::type_index data_type = in_true.type();
framework::DDim in_true_dims =
framework::slice_ddim(in_true.dims(), 1, rank);
platform::Place place = dev_place;
int64_t batch_size = in_true.dims()[0] + in_false.dims()[0];
auto in_true_dim_vec = framework::vectorize(in_true_dims);
in_true_dim_vec.insert(in_true_dim_vec.begin(), batch_size);
std::type_index data_type =
in_true.IsInitialized() ? in_true.type() : in_false.type();
int rank;
framework::DDim in_dims;
if (in_true.IsInitialized()) {
rank = in_true.dims().size();
in_dims = framework::slice_ddim(in_true.dims(), 1, rank);
} else {
rank = in_false.dims().size();
in_dims = framework::slice_ddim(in_false.dims(), 1, rank);
}
auto in_dim_vec = framework::vectorize(in_dims);
in_dim_vec.insert(in_dim_vec.begin(), batch_size);
framework::DDim out_dims = framework::make_ddim(in_true_dim_vec);
framework::DDim out_dims = framework::make_ddim(in_dim_vec);
out->Resize(out_dims);
out->mutable_data(place, data_type);
auto *out_lod = out->mutable_lod();
......
......@@ -50,14 +50,14 @@ class SqueezeOpInferShape : public framework::InferShapeBase {
static framework::DDim GetOutputShape(const std::vector<int> squeeze_dims,
const framework::DDim &in_dims) {
int num_squeeze_dims = squeeze_dims.size();
int num_squeeze_dims = static_cast<int>(squeeze_dims.size());
int cnt_squeezed_dims = 0;
bool should_squeeze[9] = {false};
// Determines number of dimensions of output tensor after squeeze.
// Mark and count the dimensions need to be squeezed
if (num_squeeze_dims == 0) {
for (int idx = 0; idx < in_dims.size(); ++idx) {
for (int idx = 0; idx < static_cast<int>(in_dims.size()); ++idx) {
if (in_dims[idx] == 1) {
should_squeeze[idx] = true;
++cnt_squeezed_dims;
......@@ -84,7 +84,8 @@ class SqueezeOpInferShape : public framework::InferShapeBase {
// Make output dimensions
std::vector<int64_t> output_shape(in_dims.size() - cnt_squeezed_dims, 0);
for (int in_idx = 0, out_idx = 0; in_idx < in_dims.size(); ++in_idx) {
for (int in_idx = 0, out_idx = 0; in_idx < static_cast<int>(in_dims.size());
++in_idx) {
if (!should_squeeze[in_idx]) {
output_shape[out_idx++] = in_dims[in_idx];
}
......@@ -151,6 +152,8 @@ class SqueezeOpMaker : public framework::OpProtoAndCheckerMaker {
Case 2:
Given
X.shape = (1, 3, 1, 5)
and
axes = []
we get:
Out.shape = (3, 5)
)DOC");
......
......@@ -14,10 +14,11 @@
import paddle
import paddle.fluid.layers as layers
from paddle.fluid.framework import Program, program_guard, default_main_program, default_startup_program
from paddle.fluid.framework import Program, program_guard
from paddle.fluid.executor import Executor
from paddle.fluid.optimizer import MomentumOptimizer
import paddle.fluid.core as core
import paddle.fluid as fluid
import unittest
import numpy as np
......@@ -31,14 +32,13 @@ class TestMNISTIfElseOp(unittest.TestCase):
label = layers.data(name='y', shape=[1], dtype='int64')
limit = layers.fill_constant_batch_size_like(
input=label, dtype='int64', shape=[1], value=5.0)
limit = layers.fill_constant(shape=[1], dtype='int64', value=5)
cond = layers.less_than(x=label, y=limit)
true_image, false_image = layers.split_lod_tensor(
input=image, mask=cond)
true_out = layers.create_tensor(dtype='float32')
true_cond = layers.ConditionalBlock([true_image])
true_cond = layers.ConditionalBlock([cond])
with true_cond.block():
hidden = layers.fc(input=true_image, size=100, act='tanh')
......@@ -46,7 +46,7 @@ class TestMNISTIfElseOp(unittest.TestCase):
layers.assign(input=prob, output=true_out)
false_out = layers.create_tensor(dtype='float32')
false_cond = layers.ConditionalBlock([false_image])
false_cond = layers.ConditionalBlock([cond])
with false_cond.block():
hidden = layers.fc(input=false_image, size=200, act='tanh')
......@@ -64,7 +64,7 @@ class TestMNISTIfElseOp(unittest.TestCase):
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=8192),
batch_size=200)
batch_size=10)
place = core.CPUPlace()
exe = Executor(place)
......@@ -94,8 +94,7 @@ class TestMNISTIfElseOp(unittest.TestCase):
label = layers.data(name='y', shape=[1], dtype='int64')
limit = layers.fill_constant_batch_size_like(
input=label, dtype='int64', shape=[1], value=5.0)
limit = layers.fill_constant(shape=[1], dtype='int64', value=5)
cond = layers.less_than(x=label, y=limit)
ie = layers.IfElse(cond)
......@@ -125,7 +124,7 @@ class TestMNISTIfElseOp(unittest.TestCase):
place = core.CPUPlace()
exe = Executor(place)
exe.run(kwargs['startup_program'])
exe.run(startup_prog)
PASS_NUM = 100
for pass_id in range(PASS_NUM):
for data in train_reader():
......@@ -133,7 +132,7 @@ class TestMNISTIfElseOp(unittest.TestCase):
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
y_data = y_data.reshape((y_data.shape[0], 1))
outs = exe.run(kwargs['main_program'],
outs = exe.run(prog,
feed={'x': x_data,
'y': y_data},
fetch_list=[avg_loss])
......@@ -143,6 +142,67 @@ class TestMNISTIfElseOp(unittest.TestCase):
self.assertFalse(True)
class TestIfElse(unittest.TestCase):
def set_test_case(self):
# condiction is: self.data < self.cond_value
self.cond_value = 0.5
self.data = np.random.rand(25, 1).astype(np.float32)
def compare_ifelse_op_and_numpy(self, place):
self.set_test_case()
prog = Program()
startup_prog = Program()
with program_guard(prog, startup_prog):
src = layers.data(name='data', shape=[1], dtype='float32')
cond = layers.fill_constant(
[1], dtype='float32', value=self.cond_value)
ifcond = layers.less_than(x=src, y=cond)
ie = layers.IfElse(ifcond)
with ie.true_block():
true_target = ie.input(src)
ie.output(true_target)
with ie.false_block():
false_target = ie.input(src)
ie.output(false_target)
if_out = ie()
out = layers.reduce_sum(if_out)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
fetch_list = [out]
o1, = exe.run(fluid.default_main_program(),
feed={'data': self.data},
fetch_list=[out])
o2 = np.sum(self.data)
self.assertTrue(
np.allclose(
o1, o2, atol=1e-8),
"IfElse result : " + str(o1) + "\n Numpy result :" + str(o2))
def test_cpu(self):
self.compare_ifelse_op_and_numpy(fluid.CPUPlace())
def test_cuda(self):
if not core.is_compiled_with_cuda():
return
self.compare_ifelse_op_and_numpy(fluid.CUDAPlace(0))
class TestIfElseTrueBranch(TestIfElse):
def set_test_case(self):
# condiction is: self.data < self.cond_value
self.cond_value = 10.
self.data = np.random.rand(25, 1).astype(np.float32)
class TestIfElseFalseBranch(TestIfElse):
def set_test_case(self):
# condiction is: self.data < self.cond_value
self.cond_value = -10.
self.data = np.random.rand(25, 1).astype(np.float32)
if __name__ == '__main__':
# temp disable if else unittest since it could be buggy.
exit(0)
unittest.main()
from setuptools import setup, Distribution, Extension
import subprocess
import shutil
import os
import re
import shutil
class BinaryDistribution(Distribution):
def has_ext_modules(foo):
return True
MAJOR = 0
MINOR = 14
PATCH = 0
RC = 0
ISTAGED = False
......@@ -22,14 +19,47 @@ def git_commit():
git_commit = 'Unknown'
return git_commit
def _get_version_detail(idx):
assert idx < 3, "vesion info consists of %(major)d.%(minor)d.%(patch)d, \
so detail index must less than 3"
if re.match('@TAG_VERSION_REGEX@', '@PADDLE_VERSION@'):
version_details = '@PADDLE_VERSION@'.split('.')
if len(version_details) == 3:
return version_details[idx]
return 0
def get_major():
return int(_get_version_detail(0))
def get_minor():
return int(_get_version_detail(1))
def get_patch():
return str(_get_version_detail(2))
def is_taged():
try:
cmd = ['git', 'describe', '--exact-match', '--tags']
git_tag = subprocess.Popen(cmd, stdout = subprocess.PIPE).communicate()[0].strip()
except:
return False
if git_tag.replace('v', '') == '@PADDLE_VERSION@':
return True
else:
return False
def write_version_py(filename='paddle/version.py'):
cnt = '''
# THIS FILE IS GENERATED FROM PADDLEPADDLE SETUP.PY
#
full_version = '%(major)d.%(minor)d.%(patch)d'
full_version = '%(major)d.%(minor)d.%(patch)s'
major = '%(major)d'
minor = '%(minor)d'
patch = '%(patch)d'
patch = '%(patch)s'
rc = '%(rc)d'
istaged = %(istaged)s
commit = '%(commit)s'
......@@ -51,13 +81,13 @@ def mkl():
commit = git_commit()
with open(filename, 'w') as f:
f.write(cnt % {
'major': MAJOR,
'minor': MINOR,
'patch': PATCH,
'major': get_major(),
'minor': get_minor(),
'patch': get_patch(),
'rc': RC,
'version': '${PADDLE_VERSION}',
'commit': commit,
'istaged': ISTAGED,
'istaged': is_taged(),
'with_mkl': '@WITH_MKL@'})
write_version_py(filename='@PADDLE_BINARY_DIR@/python/paddle/version.py')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册