未验证 提交 41b8cf0b 编写于 作者: Z Zhen Wang 提交者: GitHub

Merge pull request #16162 from wzzju/fix_nan_static_quant

Fix NaN bugs for static quantization strategy (mutil-cards train). 
......@@ -16,6 +16,7 @@
#include <algorithm>
#include <deque>
#include <iterator>
#include <memory>
#include <stack>
#include <string>
#include <unordered_map>
......@@ -263,6 +264,10 @@ void InplacePass::WithdrawModify(const NodeSwapQueue& nodes,
void InplacePass::TryInplaceOpInputOutput(ir::Node* op,
ir::Graph* graph) const {
VLOG(4) << "Try to inplace op " << op->Name();
// FIXME(liuwei1031): Graph is not aware of the existence of BlockDescs and
// ProgramDescs.
// The operations related to BlockDesc or ProgramDesc should perform on Graph
// or Node directly!
PADDLE_ENFORCE(op->Op() != nullptr && op->Op()->Block() != nullptr,
"op_desc is nullptr");
// some pre-requirments need to meet if the op want to inplaced.
......
......@@ -24,6 +24,7 @@
#include <sstream>
#include <string>
#include <type_traits>
#include <unordered_set>
#include <vector>
#include "gflags/gflags.h"
#include "paddle/fluid/framework/data_type.h"
......@@ -191,6 +192,10 @@ void MemoryOptimizePass::SubGraphOptimize(OpDesc* op_desc) const {
// immediately to make the subblock variable reuse strategy take
// effect. Because it is a single op in graph. No need to
// update the ir nodes.
// FIXME(liuwei1031): Graph is not aware of the existence of
// BlockDescs and ProgramDescs.
// The operations related to BlockDesc or ProgramDesc should perform
// on Graph or Node directly!
sub_op_desc->Rename(var->Name(), cache->Name());
if (sub_op_desc->Block() != nullptr &&
sub_op_desc->Block()->HasVar(var->Name())) {
......
......@@ -200,7 +200,12 @@ class Graph {
// WARN: After a series of passes, the current graph can be quite
// different from OriginProgram. Caller shouldn't assume much from
// the returned OriginProgram.
const ProgramDesc &OriginProgram() const { return program_; }
const ProgramDesc &OriginProgram() const {
LOG(WARNING) << "WARN: After a series of passes, the current graph can be "
"quite different from OriginProgram. So, please avoid "
"using the `OriginProgram()` method!";
return program_;
}
// This method takes ownership of `node`.
ir::Node *AddNode(ir::Node *node) {
......
......@@ -224,12 +224,10 @@ class CompiledProgram(object):
self._build_strategy.trainers_endpoints = tps
self._persistable_vars = []
for block_id in range(self._program_desc.num_blocks()):
bdesc = self._program_desc.block(block_id)
self._persistable_vars.extend([
cpt.to_text(v.name()) for v in bdesc.all_vars()
if v.persistable() and v.type() != core.VarDesc.VarType.RAW
])
for node in self._graph.nodes():
if node.is_var() and node.var() is not None and node.var().persistable() and \
node.var().type() != core.VarDesc.VarType.RAW:
self._persistable_vars.append(cpt.to_text(node.name()))
places = list(map(_place_obj, self._places))
......
......@@ -12,6 +12,7 @@
# see the license for the specific language governing permissions and
# limitations under the license.
import os
import unittest
import random
import numpy as np
......@@ -25,6 +26,9 @@ from paddle.fluid.contrib.slim.quantization import ConvertToInt8Pass
from paddle.fluid.contrib.slim.quantization import TransformForMobilePass
from paddle.fluid import core
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["CPU_NUM"] = "1"
def linear_fc(num):
data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
......@@ -249,7 +253,11 @@ class TestQuantizationFreezePass(unittest.TestCase):
marked_nodes.add(op)
test_graph.draw('.', 'test' + dev_name + quant_type, marked_nodes)
quantized_main_program = main_graph.to_program()
build_strategy = fluid.BuildStrategy()
build_strategy.memory_optimize = False
build_strategy.enable_inplace = False
binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
loss_name=loss.name, build_strategy=build_strategy)
quantized_test_program = test_graph.to_program()
iters = 5
batch_size = 8
......@@ -264,7 +272,7 @@ class TestQuantizationFreezePass(unittest.TestCase):
with fluid.scope_guard(scope):
for _ in range(iters):
data = next(train_reader())
loss_v = exe.run(program=quantized_main_program,
loss_v = exe.run(binary,
feed=feeder.feed(data),
fetch_list=[loss])
if not for_ci:
......
......@@ -2245,10 +2245,10 @@ class IrGraph(object):
Notes: the `graph` cannot contain a circle.
Returns:
set(IrNode): nodes in topology order.
list(IrNode): nodes in topology order.
"""
ordered_nodes = core.topology_sort(self.graph)
return {IrNode(n) for n in ordered_nodes}
return [IrNode(n) for n in ordered_nodes]
def build_adjacency_list(self):
"""
......@@ -2316,7 +2316,7 @@ class IrGraph(object):
"""
Convert the graph into a Program.
Notes: When the graph includes backward operator nodes, the
WARN: When the graph includes backward operator nodes, the
conversion process may be failed. Usually, this function is
only used to convert a test graph.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册