From 1d0353f16242e981942f08ba749e12e71d1c1ab0 Mon Sep 17 00:00:00 2001
From: Dang Qingqing <dangqingqing@baidu.com>
Date: Wed, 5 Sep 2018 10:03:11 +0800
Subject: [PATCH] Quantize transpiler

---
 doc/fluid/design/quantization/fixed_point_quantization.md | 4 ++--
 python/paddle/fluid/__init__.py                           | 2 +-
 python/paddle/fluid/tests/CMakeLists.txt                  | 1 +
 python/paddle/fluid/transpiler/__init__.py                | 4 +++-
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/doc/fluid/design/quantization/fixed_point_quantization.md b/doc/fluid/design/quantization/fixed_point_quantization.md
index 085352fc56..a098e7b6d9 100644
--- a/doc/fluid/design/quantization/fixed_point_quantization.md
+++ b/doc/fluid/design/quantization/fixed_point_quantization.md
@@ -13,7 +13,7 @@ $$ r = min(max(x, a), b)$$
 $$ s = \frac{b - a}{n - 1} $$
 $$ q = \left \lfloor \frac{r - a}{s} \right \rceil $$
 
-where, $x$ is the float value to be quantized, $[a, b]$ is the quantization range, $a$ is the minimum value and $b$ is the maximal value. $\left \lfloor \right \rceil$  denotes rounding to the nearest integer. If the quantization level is $k$, $n$ is $2^k$, for example, $k$ is 8 and $n$ is 256. $q$ is the quantized integer. 
+where, $x$ is the float value to be quantized, $[a, b]$ is the quantization range, $a$ is the minimum value and $b$ is the maximal value. $\left \lfloor \right \rceil$  denotes rounding to the nearest integer. If the quantization level is $k$, $n$ is $2^{k - 1}$, for example, $k$ is 8 and $n$ is 128. $q$ is the quantized integer. 
 
 
 The quantization we applied is parameterized by the number of quantization levels and maximum absolute value:
@@ -21,7 +21,7 @@ The quantization we applied is parameterized by the number of quantization level
 $$ M  = max(abs(x))  $$
 $$ q = \left \lfloor \frac{x}{M} * (n - 1) \right \rceil $$
 
-where, $x$ is the float value to be quantized, $M$ is maximum absolute value. $\left \lfloor \right \rceil$ denotes rounding to the nearest integer.  For 8 bit quantization, $n=2^{8}=256$. $q$ is the quantized integer. 
+where, $x$ is the float value to be quantized, $M$ is maximum absolute value. $\left \lfloor \right \rceil$ denotes rounding to the nearest integer.  For 8 bit quantization, $n=2^{8 - 1}=128$. $q$ is the quantized integer. 
 
 
 Wether the *min-max* quantization or *max-abs* quantization, they also can be represent:
diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py
index 9aac3c7fc1..c2cf7dd84e 100644
--- a/python/paddle/fluid/__init__.py
+++ b/python/paddle/fluid/__init__.py
@@ -47,7 +47,7 @@ from .param_attr import ParamAttr, WeightNormParamAttr
 from .data_feeder import DataFeeder
 from .core import LoDTensor, LoDTensorArray, CPUPlace, CUDAPlace, CUDAPinnedPlace, Scope
 from .transpiler import DistributeTranspiler, InferenceTranspiler, \
-    memory_optimize, release_memory, DistributeTranspilerConfig
+    memory_optimize, release_memory, DistributeTranspilerConfig, QuantizeTranspiler
 from .lod_tensor import create_lod_tensor, create_random_int_lodtensor
 from . import clip
 from . import profiler
diff --git a/python/paddle/fluid/tests/CMakeLists.txt b/python/paddle/fluid/tests/CMakeLists.txt
index d24417bbac..32447dc11e 100644
--- a/python/paddle/fluid/tests/CMakeLists.txt
+++ b/python/paddle/fluid/tests/CMakeLists.txt
@@ -8,3 +8,4 @@ endforeach()
 add_subdirectory(unittests)
 add_subdirectory(book)
 add_subdirectory(book_memory_optimization)
+add_subdirectory(transpiler)
diff --git a/python/paddle/fluid/transpiler/__init__.py b/python/paddle/fluid/transpiler/__init__.py
index 8429e2fd7c..16199890d6 100644
--- a/python/paddle/fluid/transpiler/__init__.py
+++ b/python/paddle/fluid/transpiler/__init__.py
@@ -16,10 +16,12 @@ from __future__ import print_function
 
 from .distribute_transpiler import DistributeTranspiler, DistributeTranspilerConfig
 from .inference_transpiler import InferenceTranspiler
+from .quantize_transpiler import QuantizeTranspiler
 from .memory_optimization_transpiler import memory_optimize, release_memory
 from .ps_dispatcher import HashName, RoundRobin
 
 __all__ = [
     "DistributeTranspiler", "InferenceTranspiler", "memory_optimize",
-    "release_memory", "HashName", "RoundRobin", "DistributeTranspilerConfig"
+    "release_memory", "HashName", "RoundRobin", "DistributeTranspilerConfig",
+    "QuantizeTranspiler"
 ]
-- 
GitLab