diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 3ef317bb7a1c25c5738342f34ae7994b0184a7de..dd172ff9c97814c089ddb2e5bf729880cf0c9cdb 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -336,6 +336,7 @@ paddle.fluid.contrib.BeamSearchDecoder.decode ArgSpec(args=['self'], varargs=Non paddle.fluid.contrib.BeamSearchDecoder.early_stop ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.BeamSearchDecoder.read_array ArgSpec(args=['self', 'init', 'is_ids', 'is_scores'], varargs=None, keywords=None, defaults=(False, False)) paddle.fluid.contrib.BeamSearchDecoder.update_array ArgSpec(args=['self', 'array', 'value'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.memory_usage ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None) paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.transpiler.DistributeTranspiler.create_splited_vars ArgSpec(args=['self', 'source_var', 'block', 'tag'], varargs=None, keywords=None, defaults=None) paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None) diff --git a/python/paddle/fluid/contrib/__init__.py b/python/paddle/fluid/contrib/__init__.py index 12cd5d918e93181c6b7e328e6aee4ad941b0a0da..9de9e9504510baec9aefb47f91793c364450795a 100644 --- a/python/paddle/fluid/contrib/__init__.py +++ b/python/paddle/fluid/contrib/__init__.py @@ -14,5 +14,7 @@ import decoder from decoder import * +import memory_usage_calc +from memory_usage_calc import * -__all__ = decoder.__all__ +__all__ = decoder.__all__ + memory_usage_calc.__all__ diff --git a/python/paddle/fluid/contrib/memory_usage_calc.py b/python/paddle/fluid/contrib/memory_usage_calc.py new file mode 100644 index 0000000000000000000000000000000000000000..5da846edb63c28efd791fdfac4046cfa56c24181 --- /dev/null +++ b/python/paddle/fluid/contrib/memory_usage_calc.py @@ -0,0 +1,102 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This module privides a memory usage calculate function for user. +The purpose of this API is to allow users to estimate memory usage of +a program under a special batch size, then user can set appropriate +batch size to fully utilize a GPU. + +This API is still under active development and may change drastically. +""" + +from .. import core +from ..framework import Program, Variable + +__all__ = ['memory_usage'] + +dtype_to_size = { + core.VarDesc.VarType.FP16: 2, + core.VarDesc.VarType.FP32: 4, + core.VarDesc.VarType.FP64: 8, + core.VarDesc.VarType.INT16: 2, + core.VarDesc.VarType.INT32: 4, + core.VarDesc.VarType.INT64: 8, + core.VarDesc.VarType.BOOL: 1, + core.VarDesc.VarType.UINT8: 1, +} + +DEBUG = False + + +def memory_usage(program, batch_size): + """ + Get the estimate memory usage of program with input batch size. + + Args: + program(Program): The current Program. + batch_size(int): The current input data batch_size. + + Returns: + min_total_memory(float): the estimate memory usage lower bound. + max_total_memory(float): the estimate memory usage upper bound. + unit_str(string): the unit of estimate usage result. + + Examples: + + >>> import paddle.fluid as fluid + >>> lower_usage, upper_usage, unit = fluid.contrib.memory_usage( + fluid.default_main_program(), batch_size=10) + >>> print "memory usage is about %.3f - %.3f %s" % \ + (lower_usage, upper_usage, unit) + + """ + + # Parameters check + if not isinstance(program, Program): + raise TypeError( + "Calculating Memory Usage requires Program as its Parameter." + "But you passed in %s" % (type(prgram))) + if batch_size <= 0: + raise ValueError("The batch size need to be positive.") + + # Get the var_name list of first block and calculate + total_memory = 0.0 + for var in program.global_block().vars.itervalues(): + data_count = 1 + for x in var.shape: + if x == -1: + data_count *= batch_size + else: + data_count *= x + var_memory = data_count * dtype_to_size[var.dtype] + if DEBUG: + print "%s memory usage: %d" % (var.name, var_memory) + total_memory += var_memory + if DEBUG: + print "total memory usage: %.2f" % (total_memory) + + # Convert appropriate unit + unit_str = "B" + if total_memory > 1024: + total_memory /= 1024 + unit_str = "KB" + if total_memory > 1024: + total_memory /= 1024 + unit_str = "MB" + + # Append extra memory consumption (5% - 10%) + min_total_memory = total_memory * 1.05 + max_total_memory = total_memory * 1.1 + + return min_total_memory, max_total_memory, unit_str diff --git a/python/paddle/fluid/tests/unittests/test_memory_usage.py b/python/paddle/fluid/tests/unittests/test_memory_usage.py new file mode 100644 index 0000000000000000000000000000000000000000..f9daf83652e18faab0ab31402b9f5889a0beceaf --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_memory_usage.py @@ -0,0 +1,69 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import paddle +import paddle.fluid as fluid +import contextlib +import unittest + + +def train_simulator(test_batch_size=10): + if test_batch_size <= 0: + raise ValueError("batch_size should be a positive integeral value, " + "but got batch_size={}".format(test_batch_size)) + + x = fluid.layers.data(name='x', shape=[13], dtype='float32') + y_predict = fluid.layers.fc(input=x, size=1, act=None) + y = fluid.layers.data(name='y', shape=[1], dtype='float32') + + cost = fluid.layers.square_error_cost(input=y_predict, label=y) + avg_cost = fluid.layers.mean(cost) + + sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) + sgd_optimizer.minimize(avg_cost) + + # Calculate memory usage in current network config + lower_usage, upper_usage, unit = fluid.contrib.memory_usage( + fluid.default_main_program(), batch_size=test_batch_size) + + print("memory usage is about %.3f - %.3f %s" % + (lower_usage, upper_usage, unit)) + + +class TestMemoryUsage(unittest.TestCase): + def test_with_unit_B(self): + with self.program_scope_guard(): + train_simulator() + + def test_with_unit_KB(self): + with self.program_scope_guard(): + train_simulator(test_batch_size=1000) + + def test_with_unit_MB(self): + with self.program_scope_guard(): + train_simulator(test_batch_size=100000) + + @contextlib.contextmanager + def program_scope_guard(self): + prog = fluid.Program() + startup_prog = fluid.Program() + scope = fluid.core.Scope() + with fluid.scope_guard(scope): + with fluid.program_guard(prog, startup_prog): + yield + + +if __name__ == '__main__': + unittest.main()