Quantize transpiler

1d0353f1 · Dang Qingqing · 6e2e0ec8 · 1d0353f1 · 1d0353f1 · 1d0353f1
4 changed file
--- a/doc/fluid/design/quantization/fixed_point_quantization.md
+++ b/doc/fluid/design/quantization/fixed_point_quantization.md
@@ -13,7 +13,7 @@ $$ r = min(max(x, a), b)$$
 $$ s = \frac{b - a}{n - 1} $$
 $$ q = \left \lfloor \frac{r - a}{s} \right \rceil $$

-where, $x$ is the float value to be quantized, $[a, b]$ is the quantization range, $a$ is the minimum value and $b$ is the maximal value. $\left \lfloor \right \rceil$  denotes rounding to the nearest integer. If the quantization level is $k$, $n$ is $2^k$, for example, $k$ is 8 and $n$ is 256. $q$ is the quantized integer. 
+where, $x$ is the float value to be quantized, $[a, b]$ is the quantization range, $a$ is the minimum value and $b$ is the maximal value. $\left \lfloor \right \rceil$  denotes rounding to the nearest integer. If the quantization level is $k$, $n$ is $2^{k - 1}$, for example, $k$ is 8 and $n$ is 128. $q$ is the quantized integer. 


 The quantization we applied is parameterized by the number of quantization levels and maximum absolute value:
@@ -21,7 +21,7 @@ The quantization we applied is parameterized by the number of quantization level
 $$ M  = max(abs(x))  $$
 $$ q = \left \lfloor \frac{x}{M} * (n - 1) \right \rceil $$

-where, $x$ is the float value to be quantized, $M$ is maximum absolute value. $\left \lfloor \right \rceil$ denotes rounding to the nearest integer.  For 8 bit quantization, $n=2^{8}=256$. $q$ is the quantized integer. 
+where, $x$ is the float value to be quantized, $M$ is maximum absolute value. $\left \lfloor \right \rceil$ denotes rounding to the nearest integer.  For 8 bit quantization, $n=2^{8 - 1}=128$. $q$ is the quantized integer. 


 Wether the *min-max* quantization or *max-abs* quantization, they also can be represent:

--- a/python/paddle/fluid/__init__.py
+++ b/python/paddle/fluid/__init__.py
@@ -47,7 +47,7 @@ from .param_attr import ParamAttr, WeightNormParamAttr
 from .data_feeder import DataFeeder
 from .core import LoDTensor, LoDTensorArray, CPUPlace, CUDAPlace, CUDAPinnedPlace, Scope
 from .transpiler import DistributeTranspiler, InferenceTranspiler, \
-    memory_optimize, release_memory, DistributeTranspilerConfig
+    memory_optimize, release_memory, DistributeTranspilerConfig, QuantizeTranspiler
 from .lod_tensor import create_lod_tensor, create_random_int_lodtensor
 from . import clip
 from . import profiler

--- a/python/paddle/fluid/tests/CMakeLists.txt
+++ b/python/paddle/fluid/tests/CMakeLists.txt
@@ -8,3 +8,4 @@ endforeach()
 add_subdirectory(unittests)
 add_subdirectory(book)
 add_subdirectory(book_memory_optimization)
+add_subdirectory(transpiler)
--- a/python/paddle/fluid/transpiler/__init__.py
+++ b/python/paddle/fluid/transpiler/__init__.py
@@ -16,10 +16,12 @@ from __future__ import print_function

 from .distribute_transpiler import DistributeTranspiler, DistributeTranspilerConfig
 from .inference_transpiler import InferenceTranspiler
+from .quantize_transpiler import QuantizeTranspiler
 from .memory_optimization_transpiler import memory_optimize, release_memory
 from .ps_dispatcher import HashName, RoundRobin

 __all__ = [
    "DistributeTranspiler", "InferenceTranspiler", "memory_optimize",
-    "release_memory", "HashName", "RoundRobin", "DistributeTranspilerConfig"
+    "release_memory", "HashName", "RoundRobin", "DistributeTranspilerConfig",
+    "QuantizeTranspiler"
 ]