未验证 提交 41b8cf0b 编写于 作者: Z Zhen Wang 提交者: GitHub

Merge pull request #16162 from wzzju/fix_nan_static_quant

Fix NaN bugs for static quantization strategy (mutil-cards train). 
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <algorithm> #include <algorithm>
#include <deque> #include <deque>
#include <iterator> #include <iterator>
#include <memory>
#include <stack> #include <stack>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
...@@ -263,6 +264,10 @@ void InplacePass::WithdrawModify(const NodeSwapQueue& nodes, ...@@ -263,6 +264,10 @@ void InplacePass::WithdrawModify(const NodeSwapQueue& nodes,
void InplacePass::TryInplaceOpInputOutput(ir::Node* op, void InplacePass::TryInplaceOpInputOutput(ir::Node* op,
ir::Graph* graph) const { ir::Graph* graph) const {
VLOG(4) << "Try to inplace op " << op->Name(); VLOG(4) << "Try to inplace op " << op->Name();
// FIXME(liuwei1031): Graph is not aware of the existence of BlockDescs and
// ProgramDescs.
// The operations related to BlockDesc or ProgramDesc should perform on Graph
// or Node directly!
PADDLE_ENFORCE(op->Op() != nullptr && op->Op()->Block() != nullptr, PADDLE_ENFORCE(op->Op() != nullptr && op->Op()->Block() != nullptr,
"op_desc is nullptr"); "op_desc is nullptr");
// some pre-requirments need to meet if the op want to inplaced. // some pre-requirments need to meet if the op want to inplaced.
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <sstream> #include <sstream>
#include <string> #include <string>
#include <type_traits> #include <type_traits>
#include <unordered_set>
#include <vector> #include <vector>
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
...@@ -191,6 +192,10 @@ void MemoryOptimizePass::SubGraphOptimize(OpDesc* op_desc) const { ...@@ -191,6 +192,10 @@ void MemoryOptimizePass::SubGraphOptimize(OpDesc* op_desc) const {
// immediately to make the subblock variable reuse strategy take // immediately to make the subblock variable reuse strategy take
// effect. Because it is a single op in graph. No need to // effect. Because it is a single op in graph. No need to
// update the ir nodes. // update the ir nodes.
// FIXME(liuwei1031): Graph is not aware of the existence of
// BlockDescs and ProgramDescs.
// The operations related to BlockDesc or ProgramDesc should perform
// on Graph or Node directly!
sub_op_desc->Rename(var->Name(), cache->Name()); sub_op_desc->Rename(var->Name(), cache->Name());
if (sub_op_desc->Block() != nullptr && if (sub_op_desc->Block() != nullptr &&
sub_op_desc->Block()->HasVar(var->Name())) { sub_op_desc->Block()->HasVar(var->Name())) {
......
...@@ -200,7 +200,12 @@ class Graph { ...@@ -200,7 +200,12 @@ class Graph {
// WARN: After a series of passes, the current graph can be quite // WARN: After a series of passes, the current graph can be quite
// different from OriginProgram. Caller shouldn't assume much from // different from OriginProgram. Caller shouldn't assume much from
// the returned OriginProgram. // the returned OriginProgram.
const ProgramDesc &OriginProgram() const { return program_; } const ProgramDesc &OriginProgram() const {
LOG(WARNING) << "WARN: After a series of passes, the current graph can be "
"quite different from OriginProgram. So, please avoid "
"using the `OriginProgram()` method!";
return program_;
}
// This method takes ownership of `node`. // This method takes ownership of `node`.
ir::Node *AddNode(ir::Node *node) { ir::Node *AddNode(ir::Node *node) {
......
...@@ -224,12 +224,10 @@ class CompiledProgram(object): ...@@ -224,12 +224,10 @@ class CompiledProgram(object):
self._build_strategy.trainers_endpoints = tps self._build_strategy.trainers_endpoints = tps
self._persistable_vars = [] self._persistable_vars = []
for block_id in range(self._program_desc.num_blocks()): for node in self._graph.nodes():
bdesc = self._program_desc.block(block_id) if node.is_var() and node.var() is not None and node.var().persistable() and \
self._persistable_vars.extend([ node.var().type() != core.VarDesc.VarType.RAW:
cpt.to_text(v.name()) for v in bdesc.all_vars() self._persistable_vars.append(cpt.to_text(node.name()))
if v.persistable() and v.type() != core.VarDesc.VarType.RAW
])
places = list(map(_place_obj, self._places)) places = list(map(_place_obj, self._places))
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
# see the license for the specific language governing permissions and # see the license for the specific language governing permissions and
# limitations under the license. # limitations under the license.
import os
import unittest import unittest
import random import random
import numpy as np import numpy as np
...@@ -25,6 +26,9 @@ from paddle.fluid.contrib.slim.quantization import ConvertToInt8Pass ...@@ -25,6 +26,9 @@ from paddle.fluid.contrib.slim.quantization import ConvertToInt8Pass
from paddle.fluid.contrib.slim.quantization import TransformForMobilePass from paddle.fluid.contrib.slim.quantization import TransformForMobilePass
from paddle.fluid import core from paddle.fluid import core
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["CPU_NUM"] = "1"
def linear_fc(num): def linear_fc(num):
data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32') data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
...@@ -249,7 +253,11 @@ class TestQuantizationFreezePass(unittest.TestCase): ...@@ -249,7 +253,11 @@ class TestQuantizationFreezePass(unittest.TestCase):
marked_nodes.add(op) marked_nodes.add(op)
test_graph.draw('.', 'test' + dev_name + quant_type, marked_nodes) test_graph.draw('.', 'test' + dev_name + quant_type, marked_nodes)
quantized_main_program = main_graph.to_program() build_strategy = fluid.BuildStrategy()
build_strategy.memory_optimize = False
build_strategy.enable_inplace = False
binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
loss_name=loss.name, build_strategy=build_strategy)
quantized_test_program = test_graph.to_program() quantized_test_program = test_graph.to_program()
iters = 5 iters = 5
batch_size = 8 batch_size = 8
...@@ -264,7 +272,7 @@ class TestQuantizationFreezePass(unittest.TestCase): ...@@ -264,7 +272,7 @@ class TestQuantizationFreezePass(unittest.TestCase):
with fluid.scope_guard(scope): with fluid.scope_guard(scope):
for _ in range(iters): for _ in range(iters):
data = next(train_reader()) data = next(train_reader())
loss_v = exe.run(program=quantized_main_program, loss_v = exe.run(binary,
feed=feeder.feed(data), feed=feeder.feed(data),
fetch_list=[loss]) fetch_list=[loss])
if not for_ci: if not for_ci:
......
...@@ -2245,10 +2245,10 @@ class IrGraph(object): ...@@ -2245,10 +2245,10 @@ class IrGraph(object):
Notes: the `graph` cannot contain a circle. Notes: the `graph` cannot contain a circle.
Returns: Returns:
set(IrNode): nodes in topology order. list(IrNode): nodes in topology order.
""" """
ordered_nodes = core.topology_sort(self.graph) ordered_nodes = core.topology_sort(self.graph)
return {IrNode(n) for n in ordered_nodes} return [IrNode(n) for n in ordered_nodes]
def build_adjacency_list(self): def build_adjacency_list(self):
""" """
...@@ -2316,7 +2316,7 @@ class IrGraph(object): ...@@ -2316,7 +2316,7 @@ class IrGraph(object):
""" """
Convert the graph into a Program. Convert the graph into a Program.
Notes: When the graph includes backward operator nodes, the WARN: When the graph includes backward operator nodes, the
conversion process may be failed. Usually, this function is conversion process may be failed. Usually, this function is
only used to convert a test graph. only used to convert a test graph.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册