diff --git a/cmake/version.cmake b/cmake/version.cmake index cde650128a068faf32f4abfff5cdfdeb656d8577..79b8e8ac496250d85427b77fbd6a9924a962a15b 100644 --- a/cmake/version.cmake +++ b/cmake/version.cmake @@ -1,16 +1,21 @@ # Get the latest git tag. set(PADDLE_VERSION $ENV{PADDLE_VERSION}) set(tmp_version "HEAD") +set(TAG_VERSION_REGEX "[0-9]+\\.[0-9]+\\.[0-9]+(\\.(a|b|rc)\\.[0-9]+)?") +set(COMMIT_VERSION_REGEX "[0-9a-f]+[0-9a-f]+[0-9a-f]+[0-9a-f]+[0-9a-f]+") while ("${PADDLE_VERSION}" STREQUAL "") execute_process( - COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 ${tmp_version} + COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 --always ${tmp_version} WORKING_DIRECTORY ${PADDLE_SOURCE_DIR} OUTPUT_VARIABLE GIT_TAG_NAME RESULT_VARIABLE GIT_RESULT ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) if (NOT ${GIT_RESULT}) # Check the tag is a correct version - if (${GIT_TAG_NAME} MATCHES "v[0-9]+\\.[0-9]+\\.[0-9]+(\\.(a|b|rc)\\.[0-9]+)?") + if (${GIT_TAG_NAME} MATCHES "${COMMIT_VERSION_REGEX}") + # if no tag was found, set PADDLE_VERSION to latest + set(PADDLE_VERSION "latest") + elseif (${GIT_TAG_NAME} MATCHES "v${TAG_VERSION_REGEX}") string(REPLACE "v" "" PADDLE_VERSION ${GIT_TAG_NAME}) else() # otherwise, get the previous git tag name. set(tmp_version "${GIT_TAG_NAME}~1") diff --git a/paddle/fluid/framework/op_info.cc b/paddle/fluid/framework/op_info.cc index f1261dee0319440995951d1bee145404186a8ad4..af75baa5c4b98f7d092834c05eb57e9c7e131b29 100644 --- a/paddle/fluid/framework/op_info.cc +++ b/paddle/fluid/framework/op_info.cc @@ -21,8 +21,8 @@ namespace framework { // a static local variable is already being initialized. // https://stackoverflow.com/questions/11711920/how-to-implement-multithread-safe-singleton-in-c11-without-using-mutex OpInfoMap& OpInfoMap::Instance() { - static OpInfoMap* g_op_info_map = new OpInfoMap(); - return *g_op_info_map; + static OpInfoMap g_op_info_map; + return g_op_info_map; } } // namespace framework } // namespace paddle diff --git a/paddle/fluid/memory/detail/buddy_allocator.cc b/paddle/fluid/memory/detail/buddy_allocator.cc index 4194ba197948b47003863196efdac1c08a7ae4f6..01a8501dd4abe73cbc71dc4c08734cae66df08ef 100644 --- a/paddle/fluid/memory/detail/buddy_allocator.cc +++ b/paddle/fluid/memory/detail/buddy_allocator.cc @@ -19,8 +19,9 @@ namespace paddle { namespace memory { namespace detail { -BuddyAllocator::BuddyAllocator(SystemAllocator* system_allocator, - size_t min_chunk_size, size_t max_chunk_size) +BuddyAllocator::BuddyAllocator( + std::unique_ptr system_allocator, size_t min_chunk_size, + size_t max_chunk_size) : min_chunk_size_(min_chunk_size), max_chunk_size_(max_chunk_size), cache_(system_allocator->UseGpu()), diff --git a/paddle/fluid/memory/detail/buddy_allocator.h b/paddle/fluid/memory/detail/buddy_allocator.h index 2f39d774d6fb6a2bc37877eb2f8b90bebd3cda28..f0c83efc23ce39c4fc89296d672e1e55751851bf 100644 --- a/paddle/fluid/memory/detail/buddy_allocator.h +++ b/paddle/fluid/memory/detail/buddy_allocator.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include // NOLINT #include #include @@ -32,8 +33,8 @@ namespace detail { class BuddyAllocator { public: - BuddyAllocator(SystemAllocator* system_allocator, size_t min_chunk_size, - size_t max_chunk_size); + BuddyAllocator(std::unique_ptr system_allocator, + size_t min_chunk_size, size_t max_chunk_size); ~BuddyAllocator(); @@ -103,7 +104,7 @@ class BuddyAllocator { private: /*! Allocate CPU/GPU memory from system */ - SystemAllocator* system_allocator_; + std::unique_ptr system_allocator_; std::mutex mutex_; }; diff --git a/paddle/fluid/memory/malloc.cc b/paddle/fluid/memory/malloc.cc index bd98ed81899440a46415d30b6d74fec2dac4c155..7c800b3c164049244770ceb2070b177d8307e85e 100644 --- a/paddle/fluid/memory/malloc.cc +++ b/paddle/fluid/memory/malloc.cc @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include + #include "paddle/fluid/memory/malloc.h" #include "glog/logging.h" @@ -34,12 +36,15 @@ namespace memory { using BuddyAllocator = detail::BuddyAllocator; BuddyAllocator* GetCPUBuddyAllocator() { + static std::once_flag init_flag; static detail::BuddyAllocator* a = nullptr; - if (a == nullptr) { - a = new detail::BuddyAllocator(new detail::CPUAllocator, - platform::CpuMinChunkSize(), - platform::CpuMaxChunkSize()); - } + + std::call_once(init_flag, []() { + a = new detail::BuddyAllocator( + std::unique_ptr(new detail::CPUAllocator), + platform::CpuMinChunkSize(), platform::CpuMaxChunkSize()); + }); + return a; } @@ -68,27 +73,33 @@ size_t Used(platform::CPUPlace place) { #ifdef PADDLE_WITH_CUDA BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { - static BuddyAllocator** as = NULL; - if (as == NULL) { + static std::once_flag init_flag; + static detail::BuddyAllocator** a_arr = nullptr; + + std::call_once(init_flag, [gpu_id]() { int gpu_num = platform::GetCUDADeviceCount(); - as = new BuddyAllocator*[gpu_num]; - for (int gpu = 0; gpu < gpu_num; gpu++) { - as[gpu] = nullptr; + PADDLE_ENFORCE(gpu_id < gpu_num, "gpu_id:%d should < gpu_num:%d", gpu_id, + gpu_num); + + a_arr = new BuddyAllocator*[gpu_num]; + for (int i = 0; i < gpu_num; i++) { + a_arr[i] = nullptr; + platform::SetDeviceId(i); + a_arr[i] = new BuddyAllocator( + std::unique_ptr(new detail::GPUAllocator(i)), + platform::GpuMinChunkSize(), platform::GpuMaxChunkSize()); + + VLOG(10) << "\n\nNOTE: each GPU device use " + << FLAGS_fraction_of_gpu_memory_to_use * 100 + << "% of GPU memory.\n" + << "You can set GFlags environment variable '" + << "FLAGS_fraction_of_gpu_memory_to_use" + << "' to change the fraction of GPU usage.\n\n"; } - } + }); + platform::SetDeviceId(gpu_id); - if (!as[gpu_id]) { - as[gpu_id] = new BuddyAllocator(new detail::GPUAllocator(gpu_id), - platform::GpuMinChunkSize(), - platform::GpuMaxChunkSize()); - VLOG(10) << "\n\nNOTE: each GPU device use " - << FLAGS_fraction_of_gpu_memory_to_use * 100 - << "% of GPU memory.\n" - << "You can set GFlags environment variable '" - << "FLAGS_fraction_of_gpu_memory_to_use" - << "' to change the fraction of GPU usage.\n\n"; - } - return as[gpu_id]; + return a_arr[gpu_id]; } template <> @@ -125,12 +136,16 @@ void Free(platform::CUDAPlace place, void* p) { } BuddyAllocator* GetCUDAPinnedBuddyAllocator() { - static BuddyAllocator* ba = NULL; - if (ba == NULL) { - ba = new BuddyAllocator(new detail::CUDAPinnedAllocator, + static std::once_flag init_flag; + static BuddyAllocator* ba = nullptr; + + std::call_once(init_flag, []() { + ba = new BuddyAllocator(std::unique_ptr( + new detail::CUDAPinnedAllocator), platform::CUDAPinnedMinChunkSize(), platform::CUDAPinnedMaxChunkSize()); - } + }); + return ba; } diff --git a/paddle/fluid/operators/conditional_block_op.cc b/paddle/fluid/operators/conditional_block_op.cc index 8cc1d94260baccfe28d213b7e021956819e2e79e..580fde753816c30b188b8a99cc63fcbafde64e25 100644 --- a/paddle/fluid/operators/conditional_block_op.cc +++ b/paddle/fluid/operators/conditional_block_op.cc @@ -205,9 +205,10 @@ class ConditionalBlockGradInferShape : public framework::InferShapeBase { context->SetOutputsDim(framework::GradVarName("Params"), context->GetInputsDim("Params")); } - PADDLE_ENFORCE(context->HasOutputs(framework::GradVarName("X"))); - context->SetOutputsDim(framework::GradVarName("X"), - context->GetInputsDim("X")); + if (context->HasOutputs(framework::GradVarName("X"))) { + context->SetOutputsDim(framework::GradVarName("X"), + context->GetInputsDim("X")); + } } }; diff --git a/paddle/fluid/operators/merge_lod_tensor_op.cc b/paddle/fluid/operators/merge_lod_tensor_op.cc index a16861b3b77fc980ab932b9d88859b38ec36108b..2dc1467b0d4816d5cc0535eb62e936cf342a241c 100644 --- a/paddle/fluid/operators/merge_lod_tensor_op.cc +++ b/paddle/fluid/operators/merge_lod_tensor_op.cc @@ -44,8 +44,10 @@ class MergeLoDTensorOp : public framework::OperatorBase { scope.FindVar(Output("Out"))->GetMutable(); auto level = static_cast(Attr("level")); - auto &mask_dim = mask.dims(); + PADDLE_ENFORCE(in_true.numel() || in_false.numel(), + "Input(InTrue) or Input(InFalse) should be initialized."); + auto &mask_dim = mask.dims(); std::unique_ptr cpu_mask{new framework::LoDTensor()}; if (platform::is_cpu_place(mask.place())) { cpu_mask->ShareDataWith(mask); @@ -59,19 +61,27 @@ class MergeLoDTensorOp : public framework::OperatorBase { } auto *mask_data = cpu_mask->data(); - int rank = in_true.dims().size(); - platform::Place place = in_true.place(); - std::type_index data_type = in_true.type(); - framework::DDim in_true_dims = - framework::slice_ddim(in_true.dims(), 1, rank); - + platform::Place place = dev_place; int64_t batch_size = in_true.dims()[0] + in_false.dims()[0]; - auto in_true_dim_vec = framework::vectorize(in_true_dims); - in_true_dim_vec.insert(in_true_dim_vec.begin(), batch_size); + std::type_index data_type = + in_true.IsInitialized() ? in_true.type() : in_false.type(); + int rank; + framework::DDim in_dims; + if (in_true.IsInitialized()) { + rank = in_true.dims().size(); + in_dims = framework::slice_ddim(in_true.dims(), 1, rank); + } else { + rank = in_false.dims().size(); + in_dims = framework::slice_ddim(in_false.dims(), 1, rank); + } + + auto in_dim_vec = framework::vectorize(in_dims); + in_dim_vec.insert(in_dim_vec.begin(), batch_size); - framework::DDim out_dims = framework::make_ddim(in_true_dim_vec); + framework::DDim out_dims = framework::make_ddim(in_dim_vec); out->Resize(out_dims); + out->mutable_data(place, data_type); auto *out_lod = out->mutable_lod(); diff --git a/paddle/fluid/operators/squeeze_op.cc b/paddle/fluid/operators/squeeze_op.cc index 07555e98c3118b47153eb8dc84ebc74d4408c2a6..1b656ea138ea9623e4ee45313342a3d1bd9e2b6f 100644 --- a/paddle/fluid/operators/squeeze_op.cc +++ b/paddle/fluid/operators/squeeze_op.cc @@ -50,14 +50,14 @@ class SqueezeOpInferShape : public framework::InferShapeBase { static framework::DDim GetOutputShape(const std::vector squeeze_dims, const framework::DDim &in_dims) { - int num_squeeze_dims = squeeze_dims.size(); + int num_squeeze_dims = static_cast(squeeze_dims.size()); int cnt_squeezed_dims = 0; bool should_squeeze[9] = {false}; // Determines number of dimensions of output tensor after squeeze. // Mark and count the dimensions need to be squeezed if (num_squeeze_dims == 0) { - for (int idx = 0; idx < in_dims.size(); ++idx) { + for (int idx = 0; idx < static_cast(in_dims.size()); ++idx) { if (in_dims[idx] == 1) { should_squeeze[idx] = true; ++cnt_squeezed_dims; @@ -84,7 +84,8 @@ class SqueezeOpInferShape : public framework::InferShapeBase { // Make output dimensions std::vector output_shape(in_dims.size() - cnt_squeezed_dims, 0); - for (int in_idx = 0, out_idx = 0; in_idx < in_dims.size(); ++in_idx) { + for (int in_idx = 0, out_idx = 0; in_idx < static_cast(in_dims.size()); + ++in_idx) { if (!should_squeeze[in_idx]) { output_shape[out_idx++] = in_dims[in_idx]; } @@ -151,6 +152,8 @@ class SqueezeOpMaker : public framework::OpProtoAndCheckerMaker { Case 2: Given X.shape = (1, 3, 1, 5) + and + axes = [] we get: Out.shape = (3, 5) )DOC"); diff --git a/python/paddle/fluid/tests/test_mnist_if_else_op.py b/python/paddle/fluid/tests/test_if_else_op.py similarity index 66% rename from python/paddle/fluid/tests/test_mnist_if_else_op.py rename to python/paddle/fluid/tests/test_if_else_op.py index d34f52db5ffc889f17513d034ad2c99f696b0cdf..1b58925599de62510ea9048f5210bb0b7e49f933 100644 --- a/python/paddle/fluid/tests/test_mnist_if_else_op.py +++ b/python/paddle/fluid/tests/test_if_else_op.py @@ -14,10 +14,11 @@ import paddle import paddle.fluid.layers as layers -from paddle.fluid.framework import Program, program_guard, default_main_program, default_startup_program +from paddle.fluid.framework import Program, program_guard from paddle.fluid.executor import Executor from paddle.fluid.optimizer import MomentumOptimizer import paddle.fluid.core as core +import paddle.fluid as fluid import unittest import numpy as np @@ -31,14 +32,13 @@ class TestMNISTIfElseOp(unittest.TestCase): label = layers.data(name='y', shape=[1], dtype='int64') - limit = layers.fill_constant_batch_size_like( - input=label, dtype='int64', shape=[1], value=5.0) + limit = layers.fill_constant(shape=[1], dtype='int64', value=5) cond = layers.less_than(x=label, y=limit) true_image, false_image = layers.split_lod_tensor( input=image, mask=cond) true_out = layers.create_tensor(dtype='float32') - true_cond = layers.ConditionalBlock([true_image]) + true_cond = layers.ConditionalBlock([cond]) with true_cond.block(): hidden = layers.fc(input=true_image, size=100, act='tanh') @@ -46,7 +46,7 @@ class TestMNISTIfElseOp(unittest.TestCase): layers.assign(input=prob, output=true_out) false_out = layers.create_tensor(dtype='float32') - false_cond = layers.ConditionalBlock([false_image]) + false_cond = layers.ConditionalBlock([cond]) with false_cond.block(): hidden = layers.fc(input=false_image, size=200, act='tanh') @@ -64,7 +64,7 @@ class TestMNISTIfElseOp(unittest.TestCase): train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), - batch_size=200) + batch_size=10) place = core.CPUPlace() exe = Executor(place) @@ -94,8 +94,7 @@ class TestMNISTIfElseOp(unittest.TestCase): label = layers.data(name='y', shape=[1], dtype='int64') - limit = layers.fill_constant_batch_size_like( - input=label, dtype='int64', shape=[1], value=5.0) + limit = layers.fill_constant(shape=[1], dtype='int64', value=5) cond = layers.less_than(x=label, y=limit) ie = layers.IfElse(cond) @@ -125,7 +124,7 @@ class TestMNISTIfElseOp(unittest.TestCase): place = core.CPUPlace() exe = Executor(place) - exe.run(kwargs['startup_program']) + exe.run(startup_prog) PASS_NUM = 100 for pass_id in range(PASS_NUM): for data in train_reader(): @@ -133,7 +132,7 @@ class TestMNISTIfElseOp(unittest.TestCase): y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = y_data.reshape((y_data.shape[0], 1)) - outs = exe.run(kwargs['main_program'], + outs = exe.run(prog, feed={'x': x_data, 'y': y_data}, fetch_list=[avg_loss]) @@ -143,6 +142,67 @@ class TestMNISTIfElseOp(unittest.TestCase): self.assertFalse(True) +class TestIfElse(unittest.TestCase): + def set_test_case(self): + # condiction is: self.data < self.cond_value + self.cond_value = 0.5 + self.data = np.random.rand(25, 1).astype(np.float32) + + def compare_ifelse_op_and_numpy(self, place): + self.set_test_case() + + prog = Program() + startup_prog = Program() + with program_guard(prog, startup_prog): + src = layers.data(name='data', shape=[1], dtype='float32') + cond = layers.fill_constant( + [1], dtype='float32', value=self.cond_value) + ifcond = layers.less_than(x=src, y=cond) + ie = layers.IfElse(ifcond) + with ie.true_block(): + true_target = ie.input(src) + ie.output(true_target) + + with ie.false_block(): + false_target = ie.input(src) + ie.output(false_target) + if_out = ie() + out = layers.reduce_sum(if_out) + + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + fetch_list = [out] + o1, = exe.run(fluid.default_main_program(), + feed={'data': self.data}, + fetch_list=[out]) + o2 = np.sum(self.data) + self.assertTrue( + np.allclose( + o1, o2, atol=1e-8), + "IfElse result : " + str(o1) + "\n Numpy result :" + str(o2)) + + def test_cpu(self): + self.compare_ifelse_op_and_numpy(fluid.CPUPlace()) + + def test_cuda(self): + if not core.is_compiled_with_cuda(): + return + self.compare_ifelse_op_and_numpy(fluid.CUDAPlace(0)) + + +class TestIfElseTrueBranch(TestIfElse): + def set_test_case(self): + # condiction is: self.data < self.cond_value + self.cond_value = 10. + self.data = np.random.rand(25, 1).astype(np.float32) + + +class TestIfElseFalseBranch(TestIfElse): + def set_test_case(self): + # condiction is: self.data < self.cond_value + self.cond_value = -10. + self.data = np.random.rand(25, 1).astype(np.float32) + + if __name__ == '__main__': - # temp disable if else unittest since it could be buggy. - exit(0) + unittest.main() diff --git a/python/setup.py.in b/python/setup.py.in index 5506443733650631fe045be3f701a41519352e8d..a0cb39070bf7a89e3ea4cb1d31f54f919d6ff74e 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -1,16 +1,13 @@ from setuptools import setup, Distribution, Extension import subprocess -import shutil import os +import re +import shutil class BinaryDistribution(Distribution): def has_ext_modules(foo): return True -MAJOR = 0 -MINOR = 14 -PATCH = 0 RC = 0 -ISTAGED = False @@ -22,14 +19,47 @@ def git_commit(): git_commit = 'Unknown' return git_commit +def _get_version_detail(idx): + assert idx < 3, "vesion info consists of %(major)d.%(minor)d.%(patch)d, \ + so detail index must less than 3" + + if re.match('@TAG_VERSION_REGEX@', '@PADDLE_VERSION@'): + version_details = '@PADDLE_VERSION@'.split('.') + + if len(version_details) == 3: + return version_details[idx] + + return 0 + +def get_major(): + return int(_get_version_detail(0)) + +def get_minor(): + return int(_get_version_detail(1)) + +def get_patch(): + return str(_get_version_detail(2)) + +def is_taged(): + try: + cmd = ['git', 'describe', '--exact-match', '--tags'] + git_tag = subprocess.Popen(cmd, stdout = subprocess.PIPE).communicate()[0].strip() + except: + return False + + if git_tag.replace('v', '') == '@PADDLE_VERSION@': + return True + else: + return False + def write_version_py(filename='paddle/version.py'): cnt = ''' # THIS FILE IS GENERATED FROM PADDLEPADDLE SETUP.PY # -full_version = '%(major)d.%(minor)d.%(patch)d' +full_version = '%(major)d.%(minor)d.%(patch)s' major = '%(major)d' minor = '%(minor)d' -patch = '%(patch)d' +patch = '%(patch)s' rc = '%(rc)d' istaged = %(istaged)s commit = '%(commit)s' @@ -51,13 +81,13 @@ def mkl(): commit = git_commit() with open(filename, 'w') as f: f.write(cnt % { - 'major': MAJOR, - 'minor': MINOR, - 'patch': PATCH, + 'major': get_major(), + 'minor': get_minor(), + 'patch': get_patch(), 'rc': RC, 'version': '${PADDLE_VERSION}', 'commit': commit, - 'istaged': ISTAGED, + 'istaged': is_taged(), 'with_mkl': '@WITH_MKL@'}) write_version_py(filename='@PADDLE_BINARY_DIR@/python/paddle/version.py')