Merge pull request #7384 from dzhwinter/feature/sync_wait

Feature/sync wait

Merge pull request #7384 from dzhwinter/feature/sync_wait
Feature/sync wait
95c0c126 · ranqiu92 · GitHub · 9867a379 · 92eb247f · 95c0c126
隐藏空白更改
内联并排

Showing with 14 addition and 3 deletion

paddle/framework/operator.cc paddle/framework/operator.cc +13 -2

python/paddle/v2/fluid/__init__.py python/paddle/v2/fluid/__init__.py +1 -1

未找到文件。
--- a/paddle/framework/operator.cc
+++ b/paddle/framework/operator.cc
@@ -11,6 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#include <gflags/gflags.h>
 #include <glog/logging.h>

 #include <algorithm>
@@ -21,6 +22,10 @@ limitations under the License. */
 #include "paddle/framework/shape_inference.h"
 #include "paddle/framework/var_type.h"

+DEFINE_bool(op_sync, false,
+            "Default cuda is asynchronous device, set to True will"
+            "force op run in synchronous mode.");
+
 namespace paddle {
 namespace framework {

@@ -542,8 +547,14 @@ void OperatorWithKernel::Run(const Scope& scope,

  auto kernel_iter = kernels.find(expected_kernel_key);

-  kernel_iter->second->Compute(ExecutionContext(
-      *this, new_scope, *pool.Get(expected_kernel_key.place_)));
+  auto* new_dev_ctx = pool.Get(expected_kernel_key.place_);
+  kernel_iter->second->Compute(
+      ExecutionContext(*this, new_scope, *new_dev_ctx));
+
+  /*For profiling/benchmark only*/
+  if (FLAGS_op_sync) {
+    new_dev_ctx->Wait();
+  }
 }

 proto::DataType OperatorWithKernel::IndicateDataType(

--- a/python/paddle/v2/fluid/__init__.py
+++ b/python/paddle/v2/fluid/__init__.py
@@ -58,7 +58,7 @@ def __bootstrap__():

    read_env_flags = ['use_pinned_memory', 'check_nan_inf']
    if core.is_compile_gpu():
-        read_env_flags.append('fraction_of_gpu_memory_to_use')
+        read_env_flags += ['fraction_of_gpu_memory_to_use', 'op_sync']
    core.init_gflags([sys.argv[0]] +
                     ["--tryfromenv=" + ",".join(read_env_flags)])
    core.init_glog(sys.argv[0])