Merge remote-tracking branch 'origin/develop' into feature/TD-10501

92605701 · dapan1121 · ec47eea8 · b5c96535 · 92605701 · 92605701
41 changed file
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -201,7 +201,7 @@ pipeline {
  stages {
      stage('pre_build'){
          agent{label 'master'}
-          when {
+          when{
                changeRequest()
          }
          steps {
@@ -255,6 +255,7 @@ pipeline {
               expression{
                return skipbuild.trim() == '2'
              }
+              not{ expression { env.CHANGE_BRANCH =~ /docs\// }}
            }
          }
      parallel {

--- a/deps/lz4/inc/lz4.h
+++ b/deps/lz4/inc/lz4.h
 /*
 *  LZ4 - Fast LZ compression algorithm
 *  Header File
- *  Copyright (C) 2011-2017, Yann Collet.
+ *  Copyright (C) 2011-present, Yann Collet.

   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)

@@ -46,24 +46,31 @@ extern "C" {
 /**
  Introduction

-  LZ4 is lossless compression algorithm, providing compression speed at 400 MB/s per core,
+  LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core,
  scalable with multi-cores CPU. It features an extremely fast decoder, with speed in
  multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.

  The LZ4 compression library provides in-memory compression and decompression functions.
+  It gives full buffer control to user.
  Compression can be done in:
    - a single step (described as Simple Functions)
    - a single step, reusing a context (described in Advanced Functions)
    - unbounded multiple steps (described as Streaming compression)

-  lz4.h provides block compression functions. It gives full buffer control to user.
-  Decompressing an lz4-compressed block also requires metadata (such as compressed size).
-  Each application is free to encode such metadata in whichever way it wants.
+  lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md).
+  Decompressing such a compressed block requires additional metadata.
+  Exact metadata depends on exact decompression function.
+  For the typical case of LZ4_decompress_safe(),
+  metadata includes block's compressed size, and maximum bound of decompressed size.
+  Each application is free to encode and pass such metadata in whichever way it wants.

-  An additional format, called LZ4 frame specification (doc/lz4_Frame_format.md),
-  take care of encoding standard metadata alongside LZ4-compressed blocks.
-  If your application requires interoperability, it's recommended to use it.
-  A library is provided to take care of it, see lz4frame.h.
+  lz4.h only handle blocks, it can not generate Frames.
+
+  Blocks are different from Frames (doc/lz4_Frame_format.md).
+  Frames bundle both blocks and metadata in a specified manner.
+  Embedding metadata is required for compressed data to be self-contained and portable.
+  Frame format is delivered through a companion API, declared in lz4frame.h.
+  The `lz4` CLI can only manage frames.
 */

 /*^***************************************************************
@@ -72,27 +79,28 @@ extern "C" {
 /*
 *  LZ4_DLL_EXPORT :
 *  Enable exporting of functions when building a Windows DLL
-*  LZ4LIB_API :
+*  LZ4LIB_VISIBILITY :
 *  Control library symbols visibility.
 */
-
-#include <stdint.h>
-
+#ifndef LZ4LIB_VISIBILITY
+#  if defined(__GNUC__) && (__GNUC__ >= 4)
+#    define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default")))
+#  else
+#    define LZ4LIB_VISIBILITY
+#  endif
+#endif
 #if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1)
-#  define LZ4LIB_API __declspec(dllexport)
+#  define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY
 #elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1)
-#  define LZ4LIB_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
-#elif defined(__GNUC__) && (__GNUC__ >= 4)
-#  define LZ4LIB_API __attribute__ ((__visibility__ ("default")))
+#  define LZ4LIB_API __declspec(dllimport) LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
 #else
-#  define LZ4LIB_API
+#  define LZ4LIB_API LZ4LIB_VISIBILITY
 #endif

-
 /*------   Version   ------*/
 #define LZ4_VERSION_MAJOR    1    /* for breaking interface changes  */
-#define LZ4_VERSION_MINOR    8    /* for new (non-breaking) interface capabilities */
-#define LZ4_VERSION_RELEASE  0    /* for tweaks, bug-fixes, or development */
+#define LZ4_VERSION_MINOR    9    /* for new (non-breaking) interface capabilities */
+#define LZ4_VERSION_RELEASE  3    /* for tweaks, bug-fixes, or development */

 #define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)

@@ -101,8 +109,8 @@ extern "C" {
 #define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str)
 #define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION)

-LZ4LIB_API int LZ4_versionNumber (void);  /**< library version number; to be used when checking dll version */
-LZ4LIB_API const char* LZ4_versionString (void);   /**< library version string; to be used when checking dll version */
+LZ4LIB_API int LZ4_versionNumber (void);  /**< library version number; useful to check dll version */
+LZ4LIB_API const char* LZ4_versionString (void);   /**< library version string; useful to check dll version */


 /*-************************************
@@ -111,42 +119,49 @@ LZ4LIB_API const char* LZ4_versionString (void);   /**< library version string;
 /*!
 * LZ4_MEMORY_USAGE :
 * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
- * Increasing memory usage improves compression ratio
- * Reduced memory usage can improve speed, due to cache effect
+ * Increasing memory usage improves compression ratio.
+ * Reduced memory usage may improve speed, thanks to better cache locality.
 * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
 */
 #ifndef LZ4_MEMORY_USAGE
 # define LZ4_MEMORY_USAGE 14
 #endif

+
 /*-************************************
 *  Simple Functions
 **************************************/
 /*! LZ4_compress_default() :
-    Compresses 'sourceSize' bytes from buffer 'source'
-    into already allocated 'dest' buffer of size 'maxDestSize'.
-    Compression is guaranteed to succeed if 'maxDestSize' >= LZ4_compressBound(sourceSize).
-    It also runs faster, so it's a recommended setting.
-    If the function cannot compress 'source' into a more limited 'dest' budget,
-    compression stops *immediately*, and the function result is zero.
-    As a consequence, 'dest' content is not valid.
-    This function never writes outside 'dest' buffer, nor read outside 'source' buffer.
-        sourceSize  : Max supported value is LZ4_MAX_INPUT_VALUE
-        maxDestSize : full or partial size of buffer 'dest' (which must be already allocated)
-        return : the number of bytes written into buffer 'dest' (necessarily <= maxOutputSize)
-              or 0 if compression fails */
-LZ4LIB_API int LZ4_compress_default(const char* source, char* dest, int sourceSize, int maxDestSize);
+ *  Compresses 'srcSize' bytes from buffer 'src'
+ *  into already allocated 'dst' buffer of size 'dstCapacity'.
+ *  Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
+ *  It also runs faster, so it's a recommended setting.
+ *  If the function cannot compress 'src' into a more limited 'dst' budget,
+ *  compression stops *immediately*, and the function result is zero.
+ *  In which case, 'dst' content is undefined (invalid).
+ *      srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
+ *      dstCapacity : size of buffer 'dst' (which must be already allocated)
+ *     @return  : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
+ *                or 0 if compression fails
+ * Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
+ */
+LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity);

 /*! LZ4_decompress_safe() :
-    compressedSize : is the precise full size of the compressed block.
-    maxDecompressedSize : is the size of destination buffer, which must be already allocated.
-    return : the number of bytes decompressed into destination buffer (necessarily <= maxDecompressedSize)
-             If destination buffer is not large enough, decoding will stop and output an error code (<0).
-             If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             This function is protected against buffer overflow exploits, including malicious data packets.
-             It never writes outside output buffer, nor reads outside input buffer.
-*/
-LZ4LIB_API int LZ4_decompress_safe (const char* source, char* dest, int compressedSize, int maxDecompressedSize);
+ *  compressedSize : is the exact complete size of the compressed block.
+ *  dstCapacity : is the size of destination buffer (which must be already allocated), presumed an upper bound of decompressed size.
+ * @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
+ *           If destination buffer is not large enough, decoding will stop and output an error code (negative value).
+ *           If the source stream is detected malformed, the function will stop decoding and return a negative result.
+ * Note 1 : This function is protected against malicious data packets :
+ *          it will never writes outside 'dst' buffer, nor read outside 'source' buffer,
+ *          even if the compressed block is maliciously modified to order the decoder to do these actions.
+ *          In such case, the decoder stops immediately, and considers the compressed block malformed.
+ * Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them.
+ *          The implementation is free to send / store / derive this information in whichever way is most beneficial.
+ *          If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead.
+ */
+LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity);


 /*-************************************
@@ -155,80 +170,99 @@ LZ4LIB_API int LZ4_decompress_safe (const char* source, char* dest, int compress
 #define LZ4_MAX_INPUT_SIZE        0x7E000000   /* 2 113 929 216 bytes */
 #define LZ4_COMPRESSBOUND(isize)  ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)

-/*!
-LZ4_compressBound() :
+/*! LZ4_compressBound() :
    Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
    This function is primarily useful for memory allocation purposes (destination buffer size).
    Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
-    Note that LZ4_compress_default() compress faster when dest buffer size is >= LZ4_compressBound(srcSize)
+    Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize)
        inputSize  : max supported value is LZ4_MAX_INPUT_SIZE
        return : maximum output size in a "worst case" scenario
-              or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE)
+              or 0, if input size is incorrect (too large or negative)
 */
 LZ4LIB_API int LZ4_compressBound(int inputSize);

-/*!
-LZ4_compress_fast() :
-    Same as LZ4_compress_default(), but allows to select an "acceleration" factor.
+/*! LZ4_compress_fast() :
+    Same as LZ4_compress_default(), but allows selection of "acceleration" factor.
    The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
    It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
    An acceleration value of "1" is the same as regular LZ4_compress_default()
-    Values <= 0 will be replaced by ACCELERATION_DEFAULT (see lz4.c), which is 1.
+    Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c).
+    Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c).
 */
-LZ4LIB_API int LZ4_compress_fast (const char* source, char* dest, int sourceSize, int maxDestSize, int acceleration);
+LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);


-/*!
-LZ4_compress_fast_extState() :
-    Same compression function, just using an externally allocated memory space to store compression state.
-    Use LZ4_sizeofState() to know how much memory must be allocated,
-    and allocate it on 8-bytes boundaries (using malloc() typically).
-    Then, provide it as 'void* state' to compression function.
-*/
+/*! LZ4_compress_fast_extState() :
+ *  Same as LZ4_compress_fast(), using an externally allocated memory space for its state.
+ *  Use LZ4_sizeofState() to know how much memory must be allocated,
+ *  and allocate it on 8-bytes boundaries (using `malloc()` typically).
+ *  Then, provide this buffer as `void* state` to compression function.
+ */
 LZ4LIB_API int LZ4_sizeofState(void);
-LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* source, char* dest, int inputSize, int maxDestSize, int acceleration);
-
+LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);

-/*!
-LZ4_compress_destSize() :
-    Reverse the logic, by compressing as much data as possible from 'source' buffer
-    into already allocated buffer 'dest' of size 'targetDestSize'.
-    This function either compresses the entire 'source' content into 'dest' if it's large enough,
-    or fill 'dest' buffer completely with as much data as possible from 'source'.
-        *sourceSizePtr : will be modified to indicate how many bytes where read from 'source' to fill 'dest'.
-                         New value is necessarily <= old value.
-        return : Nb bytes written into 'dest' (necessarily <= targetDestSize)
-              or 0 if compression fails
-*/
-LZ4LIB_API int LZ4_compress_destSize (const char* source, char* dest, int* sourceSizePtr, int targetDestSize);

+/*! LZ4_compress_destSize() :
+ *  Reverse the logic : compresses as much data as possible from 'src' buffer
+ *  into already allocated buffer 'dst', of size >= 'targetDestSize'.
+ *  This function either compresses the entire 'src' content into 'dst' if it's large enough,
+ *  or fill 'dst' buffer completely with as much data as possible from 'src'.
+ *  note: acceleration parameter is fixed to "default".
+ *
+ * *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'.
+ *               New value is necessarily <= input value.
+ * @return : Nb bytes written into 'dst' (necessarily <= targetDestSize)
+ *           or 0 if compression fails.
+ *
+ * Note : from v1.8.2 to v1.9.1, this function had a bug (fixed un v1.9.2+):
+ *        the produced compressed content could, in specific circumstances,
+ *        require to be decompressed into a destination buffer larger
+ *        by at least 1 byte than the content to decompress.
+ *        If an application uses `LZ4_compress_destSize()`,
+ *        it's highly recommended to update liblz4 to v1.9.2 or better.
+ *        If this can't be done or ensured,
+ *        the receiving decompression function should provide
+ *        a dstCapacity which is > decompressedSize, by at least 1 byte.
+ *        See https://github.com/lz4/lz4/issues/859 for details
+ */
+LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize);

-/*!
-LZ4_decompress_fast() :
-    originalSize : is the original and therefore uncompressed size
-    return : the number of bytes read from the source buffer (in other words, the compressed size)
-             If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             Destination buffer must be already allocated. Its size must be a minimum of 'originalSize' bytes.
-    note : This function fully respect memory boundaries for properly formed compressed data.
-           It is a bit faster than LZ4_decompress_safe().
-           However, it does not provide any protection against intentionally modified data stream (malicious input).
-           Use this function in trusted environment only (data to decode comes from a trusted source).
-*/
-LZ4LIB_API int LZ4_decompress_fast (const char* source, char* dest, int originalSize);

-/*!
-LZ4_decompress_safe_partial() :
-    This function decompress a compressed block of size 'compressedSize' at position 'source'
-    into destination buffer 'dest' of size 'maxDecompressedSize'.
-    The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached,
-    reducing decompression time.
-    return : the number of bytes decoded in the destination buffer (necessarily <= maxDecompressedSize)
-       Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller.
-             Always control how many bytes were decoded.
-             If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets
-*/
-LZ4LIB_API int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize);
+/*! LZ4_decompress_safe_partial() :
+ *  Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
+ *  into destination buffer 'dst' of size 'dstCapacity'.
+ *  Up to 'targetOutputSize' bytes will be decoded.
+ *  The function stops decoding on reaching this objective.
+ *  This can be useful to boost performance
+ *  whenever only the beginning of a block is required.
+ *
+ * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize)
+ *           If source stream is detected malformed, function returns a negative result.
+ *
+ *  Note 1 : @return can be < targetOutputSize, if compressed block contains less data.
+ *
+ *  Note 2 : targetOutputSize must be <= dstCapacity
+ *
+ *  Note 3 : this function effectively stops decoding on reaching targetOutputSize,
+ *           so dstCapacity is kind of redundant.
+ *           This is because in older versions of this function,
+ *           decoding operation would still write complete sequences.
+ *           Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize,
+ *           it could write more bytes, though only up to dstCapacity.
+ *           Some "margin" used to be required for this operation to work properly.
+ *           Thankfully, this is no longer necessary.
+ *           The function nonetheless keeps the same signature, in an effort to preserve API compatibility.
+ *
+ *  Note 4 : If srcSize is the exact size of the block,
+ *           then targetOutputSize can be any value,
+ *           including larger than the block's decompressed size.
+ *           The function will, at most, generate block's decompressed size.
+ *
+ *  Note 5 : If srcSize is _larger_ than block's compressed size,
+ *           then targetOutputSize **MUST** be <= block's decompressed size.
+ *           Otherwise, *silent corruption will occur*.
+ */
+LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);


 /*-*********************************************
@@ -236,241 +270,503 @@ LZ4LIB_API int LZ4_decompress_safe_partial (const char* source, char* dest, int
 ***********************************************/
 typedef union LZ4_stream_u LZ4_stream_t;  /* incomplete type (defined later) */

-/*! LZ4_createStream() and LZ4_freeStream() :
- *  LZ4_createStream() will allocate and initialize an `LZ4_stream_t` structure.
- *  LZ4_freeStream() releases its memory.
- */
 LZ4LIB_API LZ4_stream_t* LZ4_createStream(void);
 LZ4LIB_API int           LZ4_freeStream (LZ4_stream_t* streamPtr);

-/*! LZ4_resetStream() :
- *  An LZ4_stream_t structure can be allocated once and re-used multiple times.
- *  Use this function to start compressing a new stream.
+/*! LZ4_resetStream_fast() : v1.9.0+
+ *  Use this to prepare an LZ4_stream_t for a new chain of dependent blocks
+ *  (e.g., LZ4_compress_fast_continue()).
+ *
+ *  An LZ4_stream_t must be initialized once before usage.
+ *  This is automatically done when created by LZ4_createStream().
+ *  However, should the LZ4_stream_t be simply declared on stack (for example),
+ *  it's necessary to initialize it first, using LZ4_initStream().
+ *
+ *  After init, start any new stream with LZ4_resetStream_fast().
+ *  A same LZ4_stream_t can be re-used multiple times consecutively
+ *  and compress multiple streams,
+ *  provided that it starts each new stream with LZ4_resetStream_fast().
+ *
+ *  LZ4_resetStream_fast() is much faster than LZ4_initStream(),
+ *  but is not compatible with memory regions containing garbage data.
+ *
+ *  Note: it's only useful to call LZ4_resetStream_fast()
+ *        in the context of streaming compression.
+ *        The *extState* functions perform their own resets.
+ *        Invoking LZ4_resetStream_fast() before is redundant, and even counterproductive.
 */
-LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr);
+LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr);

 /*! LZ4_loadDict() :
- *  Use this function to load a static dictionary into LZ4_stream_t.
- *  Any previous data will be forgotten, only 'dictionary' will remain in memory.
+ *  Use this function to reference a static dictionary into LZ4_stream_t.
+ *  The dictionary must remain available during compression.
+ *  LZ4_loadDict() triggers a reset, so any previous data will be forgotten.
+ *  The same dictionary will have to be loaded on decompression side for successful decoding.
+ *  Dictionary are useful for better compression of small data (KB range).
+ *  While LZ4 accept any input as dictionary,
+ *  results are generally better when using Zstandard's Dictionary Builder.
 *  Loading a size of 0 is allowed, and is the same as reset.
- * @return : dictionary size, in bytes (necessarily <= 64 KB)
+ * @return : loaded dictionary size, in bytes (necessarily <= 64 KB)
 */
 LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);

 /*! LZ4_compress_fast_continue() :
- *  Compress content into 'src' using data from previously compressed blocks, improving compression ratio.
+ *  Compress 'src' content using data from previously compressed blocks, for better compression ratio.
 * 'dst' buffer must be already allocated.
 *  If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
 *
- *  Important : Up to 64KB of previously compressed data is assumed to remain present and unmodified in memory !
- *  Special 1 : If input buffer is a double-buffer, it can have any size, including < 64 KB.
- *  Special 2 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
- *
 * @return : size of compressed block
- *           or 0 if there is an error (typically, compressed data cannot fit into 'dst')
- *  After an error, the stream status is invalid, it can only be reset or freed.
+ *           or 0 if there is an error (typically, cannot fit into 'dst').
+ *
+ *  Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block.
+ *           Each block has precise boundaries.
+ *           Each block must be decompressed separately, calling LZ4_decompress_*() with relevant metadata.
+ *           It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together.
+ *
+ *  Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory !
+ *
+ *  Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB.
+ *           Make sure that buffers are separated, by at least one byte.
+ *           This construction ensures that each block only depends on previous block.
+ *
+ *  Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
+ *
+ *  Note 5 : After an error, the stream status is undefined (invalid), it can only be reset or freed.
 */
 LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);

 /*! LZ4_saveDict() :
- *  If previously compressed data block is not guaranteed to remain available at its current memory location,
+ *  If last 64KB data cannot be guaranteed to remain available at its current memory location,
 *  save it into a safer place (char* safeBuffer).
- *  Note : it's not necessary to call LZ4_loadDict() after LZ4_saveDict(), dictionary is immediately usable.
- *  @return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error.
+ *  This is schematically equivalent to a memcpy() followed by LZ4_loadDict(),
+ *  but is much faster, because LZ4_saveDict() doesn't need to rebuild tables.
+ * @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error.
 */
-LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int dictSize);
+LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize);


 /*-**********************************************
 *  Streaming Decompression Functions
 *  Bufferless synchronous API
 ************************************************/
-typedef union LZ4_streamDecode_u LZ4_streamDecode_t;   /* incomplete type (defined later) */
+typedef union LZ4_streamDecode_u LZ4_streamDecode_t;   /* tracking context */

 /*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() :
- *  creation / destruction of streaming decompression tracking structure.
- *  A tracking structure can be re-used multiple times sequentially. */
+ *  creation / destruction of streaming decompression tracking context.
+ *  A tracking context can be re-used multiple times.
+ */
 LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void);
 LZ4LIB_API int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);

 /*! LZ4_setStreamDecode() :
- *  An LZ4_streamDecode_t structure can be allocated once and re-used multiple times.
+ *  An LZ4_streamDecode_t context can be allocated once and re-used multiple times.
 *  Use this function to start decompression of a new stream of blocks.
- *  A dictionary can optionnally be set. Use NULL or size 0 for a simple reset order.
+ *  A dictionary can optionally be set. Use NULL or size 0 for a reset order.
+ *  Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
 * @return : 1 if OK, 0 if error
 */
 LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);

+/*! LZ4_decoderRingBufferSize() : v1.8.2+
+ *  Note : in a ring buffer scenario (optional),
+ *  blocks are presumed decompressed next to each other
+ *  up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize),
+ *  at which stage it resumes from beginning of ring buffer.
+ *  When setting such a ring buffer for streaming decompression,
+ *  provides the minimum size of this ring buffer
+ *  to be compatible with any source respecting maxBlockSize condition.
+ * @return : minimum ring buffer size,
+ *           or 0 if there is an error (invalid maxBlockSize).
+ */
+LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize);
+#define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) (65536 + 14 + (maxBlockSize))  /* for static allocation; maxBlockSize presumed valid */
+
 /*! LZ4_decompress_*_continue() :
 *  These decoding functions allow decompression of consecutive blocks in "streaming" mode.
 *  A block is an unsplittable entity, it must be presented entirely to a decompression function.
- *  Decompression functions only accept one block at a time.
- *  Previously decoded blocks *must* remain available at the memory position where they were decoded (up to 64 KB).
- *
- *  Special : if application sets a ring buffer for decompression, it must respect one of the following conditions :
- *  - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions)
- *    In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB).
- *  - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
- *    maxBlockSize is implementation dependent. It's the maximum size of any single block.
+ *  Decompression functions only accepts one block at a time.
+ *  The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded.
+ *  If less than 64KB of data has been decoded, all the data must be present.
+ *
+ *  Special : if decompression side sets a ring buffer, it must respect one of the following conditions :
+ *  - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize).
+ *    maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes.
+ *    In which case, encoding and decoding buffers do not need to be synchronized.
+ *    Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize.
+ *  - Synchronized mode :
+ *    Decompression buffer size is _exactly_ the same as compression buffer size,
+ *    and follows exactly same update rule (block boundaries at same positions),
+ *    and decoding function is provided with exact decompressed size of each block (exception for last block of the stream),
+ *    _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB).
+ *  - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes.
 *    In which case, encoding and decoding buffers do not need to be synchronized,
 *    and encoding ring buffer can have any size, including small ones ( < 64 KB).
- *  - _At least_ 64 KB + 8 bytes + maxBlockSize.
- *    In which case, encoding and decoding buffers do not need to be synchronized,
- *    and encoding ring buffer can have any size, including larger than decoding buffer.
- *  Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer,
- *  and indicate where it is saved using LZ4_setStreamDecode() before decompressing next block.
+ *
+ *  Whenever these conditions are not possible,
+ *  save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression,
+ *  then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block.
 */
-LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxDecompressedSize);
-LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize);
+LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity);


 /*! LZ4_decompress_*_usingDict() :
 *  These decoding functions work the same as
 *  a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue()
 *  They are stand-alone, and don't need an LZ4_streamDecode_t structure.
+ *  Dictionary is presumed stable : it must remain accessible and unmodified during decompression.
+ *  Performance tip : Decompression speed can be substantially increased
+ *                    when dst == dictStart + dictSize.
 */
-LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize);
-LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize);
+LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize);
+
+#endif /* LZ4_H_2983827168210 */


-/*^**********************************************
+/*^*************************************
 * !!!!!!   STATIC LINKING ONLY   !!!!!!
- ***********************************************/
-/*-************************************
- *  Private definitions
- **************************************
- * Do not use these definitions.
- * They are exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
- * Using these definitions will expose code to API and/or ABI break in future versions of the library.
- **************************************/
+ ***************************************/
+
+/*-****************************************************************************
+ * Experimental section
+ *
+ * Symbols declared in this section must be considered unstable. Their
+ * signatures or semantics may change, or they may be removed altogether in the
+ * future. They are therefore only safe to depend on when the caller is
+ * statically linked against the library.
+ *
+ * To protect against unsafe usage, not only are the declarations guarded,
+ * the definitions are hidden by default
+ * when building LZ4 as a shared/dynamic library.
+ *
+ * In order to access these declarations,
+ * define LZ4_STATIC_LINKING_ONLY in your application
+ * before including LZ4's headers.
+ *
+ * In order to make their implementations accessible dynamically, you must
+ * define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library.
+ ******************************************************************************/
+
+#ifdef LZ4_STATIC_LINKING_ONLY
+
+#ifndef LZ4_STATIC_3504398509
+#define LZ4_STATIC_3504398509
+
+#ifdef LZ4_PUBLISH_STATIC_FUNCTIONS
+#define LZ4LIB_STATIC_API LZ4LIB_API
+#else
+#define LZ4LIB_STATIC_API
+#endif
+
+
+/*! LZ4_compress_fast_extState_fastReset() :
+ *  A variant of LZ4_compress_fast_extState().
+ *
+ *  Using this variant avoids an expensive initialization step.
+ *  It is only safe to call if the state buffer is known to be correctly initialized already
+ *  (see above comment on LZ4_resetStream_fast() for a definition of "correctly initialized").
+ *  From a high level, the difference is that
+ *  this function initializes the provided state with a call to something like LZ4_resetStream_fast()
+ *  while LZ4_compress_fast_extState() starts with a call to LZ4_resetStream().
+ */
+LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+/*! LZ4_attach_dictionary() :
+ *  This is an experimental API that allows
+ *  efficient use of a static dictionary many times.
+ *
+ *  Rather than re-loading the dictionary buffer into a working context before
+ *  each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a
+ *  working LZ4_stream_t, this function introduces a no-copy setup mechanism,
+ *  in which the working stream references the dictionary stream in-place.
+ *
+ *  Several assumptions are made about the state of the dictionary stream.
+ *  Currently, only streams which have been prepared by LZ4_loadDict() should
+ *  be expected to work.
+ *
+ *  Alternatively, the provided dictionaryStream may be NULL,
+ *  in which case any existing dictionary stream is unset.
+ *
+ *  If a dictionary is provided, it replaces any pre-existing stream history.
+ *  The dictionary contents are the only history that can be referenced and
+ *  logically immediately precede the data compressed in the first subsequent
+ *  compression call.
+ *
+ *  The dictionary will only remain attached to the working stream through the
+ *  first compression call, at the end of which it is cleared. The dictionary
+ *  stream (and source buffer) must remain in-place / accessible / unchanged
+ *  through the completion of the first compression call on the stream.
+ */
+LZ4LIB_STATIC_API void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream);
+
+
+/*! In-place compression and decompression
+ *
+ * It's possible to have input and output sharing the same buffer,
+ * for highly contrained memory environments.
+ * In both cases, it requires input to lay at the end of the buffer,
+ * and decompression to start at beginning of the buffer.
+ * Buffer size must feature some margin, hence be larger than final size.
+ *
+ * |<------------------------buffer--------------------------------->|
+ *                             |<-----------compressed data--------->|
+ * |<-----------decompressed size------------------>|
+ *                                                  |<----margin---->|
+ *
+ * This technique is more useful for decompression,
+ * since decompressed size is typically larger,
+ * and margin is short.
+ *
+ * In-place decompression will work inside any buffer
+ * which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize).
+ * This presumes that decompressedSize > compressedSize.
+ * Otherwise, it means compression actually expanded data,
+ * and it would be more efficient to store such data with a flag indicating it's not compressed.
+ * This can happen when data is not compressible (already compressed, or encrypted).
+ *
+ * For in-place compression, margin is larger, as it must be able to cope with both
+ * history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX,
+ * and data expansion, which can happen when input is not compressible.
+ * As a consequence, buffer size requirements are much higher,
+ * and memory savings offered by in-place compression are more limited.
+ *
+ * There are ways to limit this cost for compression :
+ * - Reduce history size, by modifying LZ4_DISTANCE_MAX.
+ *   Note that it is a compile-time constant, so all compressions will apply this limit.
+ *   Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX,
+ *   so it's a reasonable trick when inputs are known to be small.
+ * - Require the compressor to deliver a "maximum compressed size".
+ *   This is the `dstCapacity` parameter in `LZ4_compress*()`.
+ *   When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail,
+ *   in which case, the return code will be 0 (zero).
+ *   The caller must be ready for these cases to happen,
+ *   and typically design a backup scheme to send data uncompressed.
+ * The combination of both techniques can significantly reduce
+ * the amount of margin required for in-place compression.
+ *
+ * In-place compression can work in any buffer
+ * which size is >= (maxCompressedSize)
+ * with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success.
+ * LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX,
+ * so it's possible to reduce memory requirements by playing with them.
+ */
+
+#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize)          (((compressedSize) >> 8) + 32)
+#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize)   ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize))  /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */
+
+#ifndef LZ4_DISTANCE_MAX   /* history window size; can be user-defined at compile time */
+#  define LZ4_DISTANCE_MAX 65535   /* set to maximum value by default */
+#endif
+
+#define LZ4_COMPRESS_INPLACE_MARGIN                           (LZ4_DISTANCE_MAX + 32)   /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */
+#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize)   ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN)  /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */
+
+#endif   /* LZ4_STATIC_3504398509 */
+#endif   /* LZ4_STATIC_LINKING_ONLY */
+
+
+
+#ifndef LZ4_H_98237428734687
+#define LZ4_H_98237428734687
+
+/*-************************************************************
+ *  Private Definitions
+ **************************************************************
+ * Do not use these definitions directly.
+ * They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
+ * Accessing members will expose user code to API and/or ABI break in future versions of the library.
+ **************************************************************/
 #define LZ4_HASHLOG   (LZ4_MEMORY_USAGE-2)
 #define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
 #define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)       /* required as macro for static allocation */

 #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-#include <stdint.h>
-
-typedef struct {
-    uint32_t hashTable[LZ4_HASH_SIZE_U32];
-    uint32_t currentOffset;
-    uint32_t initCheck;
-    const uint8_t* dictionary;
-    uint8_t* bufferStart;   /* obsolete, used for slideInputBuffer */
-    uint32_t dictSize;
-} LZ4_stream_t_internal;
-
-typedef struct {
-    const uint8_t* externalDict;
-    size_t extDictSize;
-    const uint8_t* prefixEnd;
-    size_t prefixSize;
-} LZ4_streamDecode_t_internal;
-
+# include <stdint.h>
+  typedef  int8_t  LZ4_i8;
+  typedef uint8_t  LZ4_byte;
+  typedef uint16_t LZ4_u16;
+  typedef uint32_t LZ4_u32;
 #else
+  typedef   signed char  LZ4_i8;
+  typedef unsigned char  LZ4_byte;
+  typedef unsigned short LZ4_u16;
+  typedef unsigned int   LZ4_u32;
+#endif

-typedef struct {
-    unsigned int hashTable[LZ4_HASH_SIZE_U32];
-    unsigned int currentOffset;
-    unsigned int initCheck;
-    const unsigned char* dictionary;
-    unsigned char* bufferStart;   /* obsolete, used for slideInputBuffer */
-    unsigned int dictSize;
-} LZ4_stream_t_internal;
+typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
+struct LZ4_stream_t_internal {
+    LZ4_u32 hashTable[LZ4_HASH_SIZE_U32];
+    LZ4_u32 currentOffset;
+    LZ4_u32 tableType;
+    const LZ4_byte* dictionary;
+    const LZ4_stream_t_internal* dictCtx;
+    LZ4_u32 dictSize;
+};

 typedef struct {
-    const unsigned char* externalDict;
+    const LZ4_byte* externalDict;
    size_t extDictSize;
-    const unsigned char* prefixEnd;
+    const LZ4_byte* prefixEnd;
    size_t prefixSize;
 } LZ4_streamDecode_t_internal;

-#endif

-/*!
- * LZ4_stream_t :
- * information structure to track an LZ4 stream.
- * init this structure before first use.
- * note : only use in association with static linking !
- *        this definition is not API/ABI safe,
- *        it may change in a future version !
+/*! LZ4_stream_t :
+ *  Do not use below internal definitions directly !
+ *  Declare or allocate an LZ4_stream_t instead.
+ *  LZ4_stream_t can also be created using LZ4_createStream(), which is recommended.
+ *  The structure definition can be convenient for static allocation
+ *  (on stack, or as part of larger structure).
+ *  Init this structure with LZ4_initStream() before first use.
+ *  note : only use this definition in association with static linking !
+ *  this definition is not API/ABI safe, and may change in future versions.
 */
-#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
-#define LZ4_STREAMSIZE     (LZ4_STREAMSIZE_U64 * sizeof(uint64_t))
+#define LZ4_STREAMSIZE       16416  /* static size, for inter-version compatibility */
+#define LZ4_STREAMSIZE_VOIDP (LZ4_STREAMSIZE / sizeof(void*))
 union LZ4_stream_u {
-    uint64_t table[LZ4_STREAMSIZE_U64];
+    void* table[LZ4_STREAMSIZE_VOIDP];
    LZ4_stream_t_internal internal_donotuse;
-} ;  /* previously typedef'd to LZ4_stream_t */
+}; /* previously typedef'd to LZ4_stream_t */


-/*!
- * LZ4_streamDecode_t :
+/*! LZ4_initStream() : v1.9.0+
+ *  An LZ4_stream_t structure must be initialized at least once.
+ *  This is automatically done when invoking LZ4_createStream(),
+ *  but it's not when the structure is simply declared on stack (for example).
+ *
+ *  Use LZ4_initStream() to properly initialize a newly declared LZ4_stream_t.
+ *  It can also initialize any arbitrary buffer of sufficient size,
+ *  and will @return a pointer of proper type upon initialization.
+ *
+ *  Note : initialization fails if size and alignment conditions are not respected.
+ *         In which case, the function will @return NULL.
+ *  Note2: An LZ4_stream_t structure guarantees correct alignment and size.
+ *  Note3: Before v1.9.0, use LZ4_resetStream() instead
+ */
+LZ4LIB_API LZ4_stream_t* LZ4_initStream (void* buffer, size_t size);
+
+
+/*! LZ4_streamDecode_t :
 *  information structure to track an LZ4 stream during decompression.
- * init this structure  using LZ4_setStreamDecode (or memset()) before first use
+ *  init this structure  using LZ4_setStreamDecode() before first use.
 *  note : only use in association with static linking !
 *         this definition is not API/ABI safe,
 *         and may change in a future version !
 */
-#define LZ4_STREAMDECODESIZE_U64  4
-#define LZ4_STREAMDECODESIZE     (LZ4_STREAMDECODESIZE_U64 * sizeof(uint64_t))
+#define LZ4_STREAMDECODESIZE_U64 (4 + ((sizeof(void*)==16) ? 2 : 0) /*AS-400*/ )
+#define LZ4_STREAMDECODESIZE     (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
 union LZ4_streamDecode_u {
-    uint64_t table[LZ4_STREAMDECODESIZE_U64];
+    unsigned long long table[LZ4_STREAMDECODESIZE_U64];
    LZ4_streamDecode_t_internal internal_donotuse;
 } ;   /* previously typedef'd to LZ4_streamDecode_t */


+
 /*-************************************
 *  Obsolete Functions
 **************************************/

 /*! Deprecation warnings
-   Should deprecation warnings be a problem,
-   it is generally possible to disable them,
-   typically with -Wno-deprecated-declarations for gcc
-   or _CRT_SECURE_NO_WARNINGS in Visual.
-   Otherwise, it's also possible to define LZ4_DISABLE_DEPRECATE_WARNINGS */
+ *
+ *  Deprecated functions make the compiler generate a warning when invoked.
+ *  This is meant to invite users to update their source code.
+ *  Should deprecation warnings be a problem, it is generally possible to disable them,
+ *  typically with -Wno-deprecated-declarations for gcc
+ *  or _CRT_SECURE_NO_WARNINGS in Visual.
+ *
+ *  Another method is to define LZ4_DISABLE_DEPRECATE_WARNINGS
+ *  before including the header file.
+ */
 #ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
 #  define LZ4_DEPRECATED(message)   /* disable deprecation warnings */
 #else
-#  define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
-#  if defined(__clang__) /* clang doesn't handle mixed C++11 and CNU attributes */
-#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
-#  elif defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
+#  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
 #    define LZ4_DEPRECATED(message) [[deprecated(message)]]
-#  elif (LZ4_GCC_VERSION >= 405)
-#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
-#  elif (LZ4_GCC_VERSION >= 301)
-#    define LZ4_DEPRECATED(message) __attribute__((deprecated))
 #  elif defined(_MSC_VER)
 #    define LZ4_DEPRECATED(message) __declspec(deprecated(message))
+#  elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45))
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
+#  elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31)
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated))
 #  else
-#    pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler")
-#    define LZ4_DEPRECATED(message)
+#    pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler")
+#    define LZ4_DEPRECATED(message)   /* disabled */
 #  endif
 #endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */

-/* Obsolete compression functions */
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_default() instead") int LZ4_compress               (const char* source, char* dest, int sourceSize);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_default() instead") int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") int LZ4_compress_limitedOutput_continue  (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+/*! Obsolete compression functions (since v1.7.3) */
+LZ4_DEPRECATED("use LZ4_compress_default() instead")       LZ4LIB_API int LZ4_compress               (const char* src, char* dest, int srcSize);
+LZ4_DEPRECATED("use LZ4_compress_default() instead")       LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue  (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);

-/* Obsolete decompression functions */
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_decompress_fast() instead") int LZ4_uncompress (const char* source, char* dest, int outputSize);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_decompress_safe() instead") int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
+/*! Obsolete decompression functions (since v1.8.0) */
+LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize);
+LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
+
+/* Obsolete streaming functions (since v1.7.0)
+ * degraded functionality; do not use!
+ *
+ * In order to perform streaming compression, these functions depended on data
+ * that is no longer tracked in the state. They have been preserved as well as
+ * possible: using them will still produce a correct output. However, they don't
+ * actually retain any history between compression calls. The compression ratio
+ * achieved will therefore be no better than compressing each chunk
+ * independently.
+ */
+LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer);
+LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int   LZ4_sizeofStreamState(void);
+LZ4_DEPRECATED("Use LZ4_resetStream() instead")  LZ4LIB_API int   LZ4_resetStreamState(void* state, char* inputBuffer);
+LZ4_DEPRECATED("Use LZ4_saveDict() instead")     LZ4LIB_API char* LZ4_slideInputBuffer (void* state);
+
+/*! Obsolete streaming decoding functions (since v1.7.0) */
+LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
+LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
+
+/*! Obsolete LZ4_decompress_fast variants (since v1.9.0) :
+ *  These functions used to be faster than LZ4_decompress_safe(),
+ *  but this is no longer the case. They are now slower.
+ *  This is because LZ4_decompress_fast() doesn't know the input size,
+ *  and therefore must progress more cautiously into the input buffer to not read beyond the end of block.
+ *  On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability.
+ *  As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated.
+ *
+ *  The last remaining LZ4_decompress_fast() specificity is that
+ *  it can decompress a block without knowing its compressed size.
+ *  Such functionality can be achieved in a more secure manner
+ *  by employing LZ4_decompress_safe_partial().
+ *
+ *  Parameters:
+ *  originalSize : is the uncompressed size to regenerate.
+ *                 `dst` must be already allocated, its size must be >= 'originalSize' bytes.
+ * @return : number of bytes read from source buffer (== compressed size).
+ *           The function expects to finish at block's end exactly.
+ *           If the source stream is detected malformed, the function stops decoding and returns a negative result.
+ *  note : LZ4_decompress_fast*() requires originalSize. Thanks to this information, it never writes past the output buffer.
+ *         However, since it doesn't know its 'src' size, it may read an unknown amount of input, past input buffer bounds.
+ *         Also, since match offsets are not validated, match reads from 'src' may underflow too.
+ *         These issues never happen if input (compressed) data is correct.
+ *         But they may happen if input data is invalid (error or intentional tampering).
+ *         As a consequence, use these functions in trusted environments with trusted data **only**.
+ */
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead")
+LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead")
+LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_usingDict() instead")
+LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize);

-/* Obsolete streaming functions; use new streaming interface whenever possible */
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_createStream() instead") void* LZ4_create (char* inputBuffer);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_createStream() instead") int   LZ4_sizeofStreamState(void);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_resetStream() instead")  int   LZ4_resetStreamState(void* state, char* inputBuffer);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_saveDict() instead")     char* LZ4_slideInputBuffer (void* state);
+/*! LZ4_resetStream() :
+ *  An LZ4_stream_t structure must be initialized at least once.
+ *  This is done with LZ4_initStream(), or LZ4_resetStream().
+ *  Consider switching to LZ4_initStream(),
+ *  invoking LZ4_resetStream() will trigger deprecation warnings in the future.
+ */
+LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr);

-/* Obsolete streaming decoding functions */
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);

-#endif /* LZ4_H_2983827168210 */
+#endif /* LZ4_H_98237428734687 */


 #if defined (__cplusplus)

--- a/deps/lz4/src/lz4.c
+++ b/deps/lz4/src/lz4.c
 /*
   LZ4 - Fast LZ compression algorithm
-   Copyright (C) 2011-2017, Yann Collet.
+   Copyright (C) 2011-present, Yann Collet.

   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)

@@ -32,7 +32,6 @@
    - LZ4 source repository : https://github.com/lz4/lz4
 */

-
 /*-************************************
 *  Tuning parameters
 **************************************/
@@ -46,10 +45,16 @@
 #endif

 /*
- * ACCELERATION_DEFAULT :
+ * LZ4_ACCELERATION_DEFAULT :
 * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
 */
-#define ACCELERATION_DEFAULT 1
+#define LZ4_ACCELERATION_DEFAULT 1
+/*
+ * LZ4_ACCELERATION_MAX :
+ * Any "acceleration" value higher than this threshold
+ * get treated as LZ4_ACCELERATION_MAX instead (fix #876)
+ */
+#define LZ4_ACCELERATION_MAX 65537


 /*-************************************
@@ -69,9 +74,11 @@
 * Prefer these methods in priority order (0 > 1 > 2)
 */
 #ifndef LZ4_FORCE_MEMORY_ACCESS   /* can be defined externally */
-#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#  if defined(__GNUC__) && \
+  ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \
+  || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
 #    define LZ4_FORCE_MEMORY_ACCESS 2
-#  elif defined(__INTEL_COMPILER) || defined(__GNUC__)
+#  elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__)
 #    define LZ4_FORCE_MEMORY_ACCESS 1
 #  endif
 #endif
@@ -80,14 +87,33 @@
 * LZ4_FORCE_SW_BITCOUNT
 * Define this parameter if your target system or compiler does not support hardware bit count
 */
-#if defined(_MSC_VER) && defined(_WIN32_WCE)   /* Visual Studio for Windows CE does not support Hardware bit count */
+#if defined(_MSC_VER) && defined(_WIN32_WCE)   /* Visual Studio for WinCE doesn't support Hardware bit count */
+#  undef  LZ4_FORCE_SW_BITCOUNT  /* avoid double def */
 #  define LZ4_FORCE_SW_BITCOUNT
 #endif


+
 /*-************************************
 *  Dependency
 **************************************/
+/*
+ * LZ4_SRC_INCLUDED:
+ * Amalgamation flag, whether lz4.c is included
+ */
+#ifndef LZ4_SRC_INCLUDED
+#  define LZ4_SRC_INCLUDED 1
+#endif
+
+#ifndef LZ4_STATIC_LINKING_ONLY
+#define LZ4_STATIC_LINKING_ONLY
+#endif
+
+#ifndef LZ4_DISABLE_DEPRECATE_WARNINGS
+#define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */
+#endif
+
+#define LZ4_STATIC_LINKING_ONLY  /* LZ4_DISTANCE_MAX */
 #include "lz4.h"
 /* see also "memory routines" below */

@@ -95,10 +121,9 @@
 /*-************************************
 *  Compiler Options
 **************************************/
-#ifdef _MSC_VER    /* Visual Studio */
-#  include <intrin.h>
+#if defined(_MSC_VER) && (_MSC_VER >= 1400)  /* Visual Studio 2005+ */
+#  include <intrin.h>               /* only present in VS2005+ */
 #  pragma warning(disable : 4127)   /* disable: C4127: conditional expression is constant */
-#  pragma warning(disable : 4293)        /* disable: C4293: too large shift (32-bits) */
 #endif  /* _MSC_VER */

 #ifndef LZ4_FORCE_INLINE
@@ -117,29 +142,135 @@
 #  endif  /* _MSC_VER */
 #endif /* LZ4_FORCE_INLINE */

+/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE
+ * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
+ * together with a simple 8-byte copy loop as a fall-back path.
+ * However, this optimization hurts the decompression speed by >30%,
+ * because the execution does not go to the optimized loop
+ * for typical compressible data, and all of the preamble checks
+ * before going to the fall-back path become useless overhead.
+ * This optimization happens only with the -O3 flag, and -O2 generates
+ * a simple 8-byte copy loop.
+ * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8
+ * functions are annotated with __attribute__((optimize("O2"))),
+ * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute
+ * of LZ4_wildCopy8 does not affect the compression speed.
+ */
+#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
+#  define LZ4_FORCE_O2  __attribute__((optimize("O2")))
+#  undef LZ4_FORCE_INLINE
+#  define LZ4_FORCE_INLINE  static __inline __attribute__((optimize("O2"),always_inline))
+#else
+#  define LZ4_FORCE_O2
+#endif
+
 #if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
 #  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
 #else
 #  define expect(expr,value)    (expr)
 #endif

+#ifndef likely
 #define likely(expr)     expect((expr) != 0, 1)
+#endif
+#ifndef unlikely
 #define unlikely(expr)   expect((expr) != 0, 0)
+#endif
+
+/* Should the alignment test prove unreliable, for some reason,
+ * it can be disabled by setting LZ4_ALIGN_TEST to 0 */
+#ifndef LZ4_ALIGN_TEST  /* can be externally provided */
+# define LZ4_ALIGN_TEST 1
+#endif


 /*-************************************
 *  Memory routines
 **************************************/
-#include <stdlib.h>   /* malloc, calloc, free */
-#define ALLOCATOR(n,s) calloc(n,s)
-#define FREEMEM        free
+#ifdef LZ4_USER_MEMORY_FUNCTIONS
+/* memory management functions can be customized by user project.
+ * Below functions must exist somewhere in the Project
+ * and be available at link time */
+void* LZ4_malloc(size_t s);
+void* LZ4_calloc(size_t n, size_t s);
+void  LZ4_free(void* p);
+# define ALLOC(s)          LZ4_malloc(s)
+# define ALLOC_AND_ZERO(s) LZ4_calloc(1,s)
+# define FREEMEM(p)        LZ4_free(p)
+#else
+# include <stdlib.h>   /* malloc, calloc, free */
+# define ALLOC(s)          malloc(s)
+# define ALLOC_AND_ZERO(s) calloc(1,s)
+# define FREEMEM(p)        free(p)
+#endif
+
 #include <string.h>   /* memset, memcpy */
-#define MEM_INIT       memset
+#define MEM_INIT(p,v,s)   memset((p),(v),(s))
+
+
+/*-************************************
+*  Common Constants
+**************************************/
+#define MINMATCH 4
+
+#define WILDCOPYLENGTH 8
+#define LASTLITERALS   5   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
+#define MFLIMIT       12   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
+#define MATCH_SAFEGUARD_DISTANCE  ((2*WILDCOPYLENGTH) - MINMATCH)   /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
+#define FASTLOOP_SAFE_DISTANCE 64
+static const int LZ4_minLength = (MFLIMIT+1);
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define LZ4_DISTANCE_ABSOLUTE_MAX 65535
+#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX)   /* max supported by LZ4 format */
+#  error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
+#endif
+
+#define ML_BITS  4
+#define ML_MASK  ((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
+
+
+/*-************************************
+*  Error detection
+**************************************/
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
+#  include <assert.h>
+#else
+#  ifndef assert
+#    define assert(condition) ((void)0)
+#  endif
+#endif
+
+#define LZ4_STATIC_ASSERT(c)   { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use after variable declarations */
+
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
+#  include <stdio.h>
+   static int g_debuglog_enable = 1;
+#  define DEBUGLOG(l, ...) {                          \
+        if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) {  \
+            fprintf(stderr, __FILE__ ": ");           \
+            fprintf(stderr, __VA_ARGS__);             \
+            fprintf(stderr, " \n");                   \
+    }   }
+#else
+#  define DEBUGLOG(l, ...) {}    /* disabled */
+#endif
+
+static int LZ4_isAligned(const void* ptr, size_t alignment)
+{
+    return ((size_t)ptr & (alignment -1)) == 0;
+}


 /*-************************************
-*  Basic Types
+*  Types
 **************************************/
+#include <limits.h>
 #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
 # include <stdint.h>
  typedef  uint8_t BYTE;
@@ -149,11 +280,14 @@
  typedef uint64_t U64;
  typedef uintptr_t uptrval;
 #else
+# if UINT_MAX != 4294967295UL
+#   error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4"
+# endif
  typedef unsigned char       BYTE;
  typedef unsigned short      U16;
  typedef unsigned int        U32;
  typedef   signed int        S32;
-  typedef uint64_t            U64;
+  typedef unsigned long long  U64;
  typedef size_t              uptrval;   /* generally true, except OpenVMS-64 */
 #endif

@@ -163,9 +297,31 @@
  typedef size_t reg_t;   /* 32-bits in x32 mode */
 #endif

+typedef enum {
+    notLimited = 0,
+    limitedOutput = 1,
+    fillOutput = 2
+} limitedOutput_directive;
+
+
 /*-************************************
 *  Reading and writing into memory
 **************************************/
+
+/**
+ * LZ4 relies on memcpy with a constant size being inlined. In freestanding
+ * environments, the compiler can't assume the implementation of memcpy() is
+ * standard compliant, so it can't apply its specialized memcpy() inlining
+ * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze
+ * memcpy() as if it were standard compliant, so it can inline it in freestanding
+ * environments. This is needed when decompressing the Linux Kernel, for example.
+ */
+#if defined(__GNUC__) && (__GNUC__ >= 4)
+#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
+#else
+#define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
+#endif
+
 static unsigned LZ4_isLittleEndian(void)
 {
    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental */
@@ -196,31 +352,31 @@ static reg_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArc
 static void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
 static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }

-#else  /* safe and portable access through memcpy() */
+#else  /* safe and portable access using memcpy() */

 static U16 LZ4_read16(const void* memPtr)
 {
-    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+    U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
 }

 static U32 LZ4_read32(const void* memPtr)
 {
-    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+    U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
 }

 static reg_t LZ4_read_ARCH(const void* memPtr)
 {
-    reg_t val; memcpy(&val, memPtr, sizeof(val)); return val;
+    reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
 }

 static void LZ4_write16(void* memPtr, U16 value)
 {
-    memcpy(memPtr, &value, sizeof(value));
+    LZ4_memcpy(memPtr, &value, sizeof(value));
 }

 static void LZ4_write32(void* memPtr, U32 value)
 {
-    memcpy(memPtr, &value, sizeof(value));
+    LZ4_memcpy(memPtr, &value, sizeof(value));
 }

 #endif /* LZ4_FORCE_MEMORY_ACCESS */
@@ -247,130 +403,216 @@ static void LZ4_writeLE16(void* memPtr, U16 value)
    }
 }

-static void LZ4_copy8(void* dst, const void* src)
-{
-    memcpy(dst,src,8);
-}
-
 /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
-static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
+LZ4_FORCE_INLINE
+void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
 {
    BYTE* d = (BYTE*)dstPtr;
    const BYTE* s = (const BYTE*)srcPtr;
    BYTE* const e = (BYTE*)dstEnd;

-    do { LZ4_copy8(d,s); d+=8; s+=8; } while (d<e);
+    do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e);
 }

+static const unsigned inc32table[8] = {0, 1, 2,  1,  0,  4, 4, 4};
+static const int      dec64table[8] = {0, 0, 0, -1, -4,  1, 2, 3};

-/*-************************************
-*  Common Constants
-**************************************/
-#define MINMATCH 4

-#define WILDCOPYLENGTH 8
-#define LASTLITERALS 5
-#define MFLIMIT (WILDCOPYLENGTH+MINMATCH)
-static const int LZ4_minLength = (MFLIMIT+1);
+#ifndef LZ4_FAST_DEC_LOOP
+#  if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
+#    define LZ4_FAST_DEC_LOOP 1
+#  elif defined(__aarch64__) && !defined(__clang__)
+     /* On aarch64, we disable this optimization for clang because on certain
+      * mobile chipsets, performance is reduced with clang. For information
+      * refer to https://github.com/lz4/lz4/pull/707 */
+#    define LZ4_FAST_DEC_LOOP 1
+#  else
+#    define LZ4_FAST_DEC_LOOP 0
+#  endif
+#endif

-#define KB *(1 <<10)
-#define MB *(1 <<20)
-#define GB *(1U<<30)
+#if LZ4_FAST_DEC_LOOP

-#define MAXD_LOG 16
-#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
+LZ4_FORCE_INLINE void
+LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
+{
+    assert(srcPtr + offset == dstPtr);
+    if (offset < 8) {
+        LZ4_write32(dstPtr, 0);   /* silence an msan warning when offset==0 */
+        dstPtr[0] = srcPtr[0];
+        dstPtr[1] = srcPtr[1];
+        dstPtr[2] = srcPtr[2];
+        dstPtr[3] = srcPtr[3];
+        srcPtr += inc32table[offset];
+        LZ4_memcpy(dstPtr+4, srcPtr, 4);
+        srcPtr -= dec64table[offset];
+        dstPtr += 8;
+    } else {
+        LZ4_memcpy(dstPtr, srcPtr, 8);
+        dstPtr += 8;
+        srcPtr += 8;
+    }

-#define ML_BITS  4
-#define ML_MASK  ((1U<<ML_BITS)-1)
-#define RUN_BITS (8-ML_BITS)
-#define RUN_MASK ((1U<<RUN_BITS)-1)
+    LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
+}

+/* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
+ * this version copies two times 16 bytes (instead of one time 32 bytes)
+ * because it must be compatible with offsets >= 16. */
+LZ4_FORCE_INLINE void
+LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
+{
+    BYTE* d = (BYTE*)dstPtr;
+    const BYTE* s = (const BYTE*)srcPtr;
+    BYTE* const e = (BYTE*)dstEnd;

-/*-************************************
-*  Error detection
-**************************************/
-#define LZ4_STATIC_ASSERT(c)   { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+    do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
+}

-#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
-#  include <stdio.h>
-#  define DEBUGLOG(l, ...) {                          \
-                if (l<=LZ4_DEBUG) {                   \
-                    fprintf(stderr, __FILE__ ": ");   \
-                    fprintf(stderr, __VA_ARGS__);     \
-                    fprintf(stderr, " \n");           \
-            }   }
-#else
-#  define DEBUGLOG(l, ...)      {}    /* disabled */
+/* LZ4_memcpy_using_offset()  presumes :
+ * - dstEnd >= dstPtr + MINMATCH
+ * - there is at least 8 bytes available to write after dstEnd */
+LZ4_FORCE_INLINE void
+LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
+{
+    BYTE v[8];
+
+    assert(dstEnd >= dstPtr + MINMATCH);
+
+    switch(offset) {
+    case 1:
+        MEM_INIT(v, *srcPtr, 8);
+        break;
+    case 2:
+        LZ4_memcpy(v, srcPtr, 2);
+        LZ4_memcpy(&v[2], srcPtr, 2);
+        LZ4_memcpy(&v[4], v, 4);
+        break;
+    case 4:
+        LZ4_memcpy(v, srcPtr, 4);
+        LZ4_memcpy(&v[4], srcPtr, 4);
+        break;
+    default:
+        LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
+        return;
+    }
+
+    LZ4_memcpy(dstPtr, v, 8);
+    dstPtr += 8;
+    while (dstPtr < dstEnd) {
+        LZ4_memcpy(dstPtr, v, 8);
+        dstPtr += 8;
+    }
+}
 #endif


 /*-************************************
 *  Common functions
 **************************************/
-static unsigned LZ4_NbCommonBytes (register reg_t val)
+static unsigned LZ4_NbCommonBytes (reg_t val)
 {
+    assert(val != 0);
    if (LZ4_isLittleEndian()) {
-        if (sizeof(val)==8) {
-#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            uint64_t r = 0;
-            _BitScanForward64( &r, (U64)val );
-            return (int)(r>>3);
-#       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            return (__builtin_ctzll((U64)val) >> 3);
+        if (sizeof(val) == 8) {
+#       if defined(_MSC_VER) && (_MSC_VER >= 1800) && defined(_M_AMD64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */
+            return (unsigned)_tzcnt_u64(val) >> 3;
+#       elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanForward64(&r, (U64)val);
+            return (unsigned)r >> 3;
+#       elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+                                        !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (unsigned)__builtin_ctzll((U64)val) >> 3;
 #       else
-            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
-            return DeBruijnBytePos[((U64)((val & -(int64_t)val) * 0x0218A392CDABBD3FULL)) >> 58];
+            const U64 m = 0x0101010101010101ULL;
+            val ^= val - 1;
+            return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56);
 #       endif
        } else /* 32 bits */ {
-#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            uint64_t r;
-            _BitScanForward( &r, (U32)val );
-            return (int)(r>>3);
-#       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            return (__builtin_ctz((U32)val) >> 3);
+#       if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r;
+            _BitScanForward(&r, (U32)val);
+            return (unsigned)r >> 3;
+#       elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+                        !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (unsigned)__builtin_ctz((U32)val) >> 3;
 #       else
-            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
-            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+            const U32 m = 0x01010101;
+            return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24;
 #       endif
        }
    } else   /* Big Endian CPU */ {
        if (sizeof(val)==8) {
-#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            uint64_t r = 0;
-            _BitScanReverse64( &r, val );
-            return (unsigned)(r>>3);
-#       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            return (__builtin_clzll((U64)val) >> 3);
+#       if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+                        !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (unsigned)__builtin_clzll((U64)val) >> 3;
 #       else
+#if 1
+            /* this method is probably faster,
+             * but adds a 128 bytes lookup table */
+            static const unsigned char ctz7_tab[128] = {
+                7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+            };
+            U64 const mask = 0x0101010101010101ULL;
+            U64 const t = (((val >> 8) - mask) | val) & mask;
+            return ctz7_tab[(t * 0x0080402010080402ULL) >> 57];
+#else
+            /* this method doesn't consume memory space like the previous one,
+             * but it contains several branches,
+             * that may end up slowing execution */
+            static const U32 by32 = sizeof(val)*4;  /* 32 on 64 bits (goal), 16 on 32 bits.
+            Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
+            Note that this code path is never triggered in 32-bits mode. */
            unsigned r;
-            if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
+            if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; }
            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
            r += (!val);
            return r;
+#endif
 #       endif
        } else /* 32 bits */ {
-#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            uint64_t r = 0;
-            _BitScanReverse( &r, (uint64_t)val );
-            return (unsigned)(r>>3);
-#       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            return (__builtin_clz((U32)val) >> 3);
+#       if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+                                        !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (unsigned)__builtin_clz((U32)val) >> 3;
 #       else
-            unsigned r;
-            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
-            r += (!val);
-            return r;
+            val >>= 8;
+            val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) |
+              (val + 0x00FF0000)) >> 24;
+            return (unsigned)val ^ 3;
 #       endif
        }
    }
 }

+
 #define STEPSIZE sizeof(reg_t)
-static unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
+LZ4_FORCE_INLINE
+unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
 {
    const BYTE* const pStart = pIn;

-    while (likely(pIn<pInLimit-(STEPSIZE-1))) {
+    if (likely(pIn < pInLimit-(STEPSIZE-1))) {
+        reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+        if (!diff) {
+            pIn+=STEPSIZE; pMatch+=STEPSIZE;
+        } else {
+            return LZ4_NbCommonBytes(diff);
+    }   }
+
+    while (likely(pIn < pInLimit-(STEPSIZE-1))) {
        reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
        if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
        pIn += LZ4_NbCommonBytes(diff);
@@ -395,15 +637,34 @@ static const U32 LZ4_skipTrigger = 6;  /* Increase this value ==> compression ru
 /*-************************************
 *  Local Structures and types
 **************************************/
-typedef enum { notLimited = 0, limitedOutput = 1 } limitedOutput_directive;
-typedef enum { byPtr, byU32, byU16 } tableType_t;
-
-typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
+typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
+
+/**
+ * This enum distinguishes several different modes of accessing previous
+ * content in the stream.
+ *
+ * - noDict        : There is no preceding content.
+ * - withPrefix64k : Table entries up to ctx->dictSize before the current blob
+ *                   blob being compressed are valid and refer to the preceding
+ *                   content (of length ctx->dictSize), which is available
+ *                   contiguously preceding in memory the content currently
+ *                   being compressed.
+ * - usingExtDict  : Like withPrefix64k, but the preceding content is somewhere
+ *                   else in memory, starting at ctx->dictionary with length
+ *                   ctx->dictSize.
+ * - usingDictCtx  : Like usingExtDict, but everything concerning the preceding
+ *                   content is in a separate context, pointed to by
+ *                   ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table
+ *                   entries in the current context that refer to positions
+ *                   preceding the beginning of the current compression are
+ *                   ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
+ *                   ->dictSize describe the location and size of the preceding
+ *                   content, and matches are found by looking in the ctx
+ *                   ->dictCtx->hashTable.
+ */
+typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
 typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;

-typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
-typedef enum { full = 0, partial = 1 } earlyEnd_directive;
-

 /*-************************************
 *  Local Utils
@@ -411,13 +672,30 @@ typedef enum { full = 0, partial = 1 } earlyEnd_directive;
 int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
 const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; }
 int LZ4_compressBound(int isize)  { return LZ4_COMPRESSBOUND(isize); }
-int LZ4_sizeofState() { return LZ4_STREAMSIZE; }
+int LZ4_sizeofState(void) { return LZ4_STREAMSIZE; }
+
+
+/*-************************************
+*  Internal Definitions used in Tests
+**************************************/
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize);
+
+int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
+                                     int compressedSize, int maxOutputSize,
+                                     const void* dictStart, size_t dictSize);

+#if defined (__cplusplus)
+}
+#endif

 /*-******************************
 *  Compression functions
 ********************************/
-static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
+LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
 {
    if (tableType == byU16)
        return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
@@ -425,15 +703,16 @@ static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
        return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
 }

-static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
+LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
 {
-    static const U64 prime5bytes = 889523592379ULL;
-    static const U64 prime8bytes = 11400714785074694791ULL;
    const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
-    if (LZ4_isLittleEndian())
+    if (LZ4_isLittleEndian()) {
+        const U64 prime5bytes = 889523592379ULL;
        return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
-    else
+    } else {
+        const U64 prime8bytes = 11400714785074694791ULL;
        return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
+    }
 }

 LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType)
@@ -442,10 +721,37 @@ LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tab
    return LZ4_hash4(LZ4_read32(p), tableType);
 }

-static void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t const tableType, const BYTE* srcBase)
+LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
+{
+    switch (tableType)
+    {
+    default: /* fallthrough */
+    case clearedTable: { /* illegal! */ assert(0); return; }
+    case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; }
+    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; }
+    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; }
+    }
+}
+
+LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
 {
    switch (tableType)
    {
+    default: /* fallthrough */
+    case clearedTable: /* fallthrough */
+    case byPtr: { /* illegal! */ assert(0); return; }
+    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; }
+    case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; }
+    }
+}
+
+LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h,
+                                  void* tableBase, tableType_t const tableType,
+                            const BYTE* srcBase)
+{
+    switch (tableType)
+    {
+    case clearedTable: { /* illegal! */ assert(0); return; }
    case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; }
    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; }
    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; }
@@ -458,70 +764,160 @@ LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_
    LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
 }

-static const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+/* LZ4_getIndexOnHash() :
+ * Index of match position registered in hash table.
+ * hash position must be calculated by using base+index, or dictBase+index.
+ * Assumption 1 : only valid if tableType == byU32 or byU16.
+ * Assumption 2 : h is presumed valid (within limits of hash table)
+ */
+LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
+{
+    LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2);
+    if (tableType == byU32) {
+        const U32* const hashTable = (const U32*) tableBase;
+        assert(h < (1U << (LZ4_MEMORY_USAGE-2)));
+        return hashTable[h];
+    }
+    if (tableType == byU16) {
+        const U16* const hashTable = (const U16*) tableBase;
+        assert(h < (1U << (LZ4_MEMORY_USAGE-1)));
+        return hashTable[h];
+    }
+    assert(0); return 0;  /* forbidden case */
+}
+
+static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType, const BYTE* srcBase)
 {
-    if (tableType == byPtr) { const BYTE** hashTable = (const BYTE**) tableBase; return hashTable[h]; }
-    if (tableType == byU32) { const U32* const hashTable = (U32*) tableBase; return hashTable[h] + srcBase; }
-    { const U16* const hashTable = (U16*) tableBase; return hashTable[h] + srcBase; }   /* default, to ensure a return */
+    if (tableType == byPtr) { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; }
+    if (tableType == byU32) { const U32* const hashTable = (const U32*) tableBase; return hashTable[h] + srcBase; }
+    { const U16* const hashTable = (const U16*) tableBase; return hashTable[h] + srcBase; }   /* default, to ensure a return */
 }

-LZ4_FORCE_INLINE const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+LZ4_FORCE_INLINE const BYTE*
+LZ4_getPosition(const BYTE* p,
+                const void* tableBase, tableType_t tableType,
+                const BYTE* srcBase)
 {
    U32 const h = LZ4_hashPosition(p, tableType);
    return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
 }

+LZ4_FORCE_INLINE void
+LZ4_prepareTable(LZ4_stream_t_internal* const cctx,
+           const int inputSize,
+           const tableType_t tableType) {
+    /* If the table hasn't been used, it's guaranteed to be zeroed out, and is
+     * therefore safe to use no matter what mode we're in. Otherwise, we figure
+     * out if it's safe to leave as is or whether it needs to be reset.
+     */
+    if ((tableType_t)cctx->tableType != clearedTable) {
+        assert(inputSize >= 0);
+        if ((tableType_t)cctx->tableType != tableType
+          || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU)
+          || ((tableType == byU32) && cctx->currentOffset > 1 GB)
+          || tableType == byPtr
+          || inputSize >= 4 KB)
+        {
+            DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx);
+            MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE);
+            cctx->currentOffset = 0;
+            cctx->tableType = (U32)clearedTable;
+        } else {
+            DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)");
+        }
+    }
+
+    /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back, is faster
+     * than compressing without a gap. However, compressing with
+     * currentOffset == 0 is faster still, so we preserve that case.
+     */
+    if (cctx->currentOffset != 0 && tableType == byU32) {
+        DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset");
+        cctx->currentOffset += 64 KB;
+    }
+
+    /* Finally, clear history */
+    cctx->dictCtx = NULL;
+    cctx->dictionary = NULL;
+    cctx->dictSize = 0;
+}

 /** LZ4_compress_generic() :
-    inlined, to ensure branches are decided at compilation time */
-LZ4_FORCE_INLINE int LZ4_compress_generic(
+ *  inlined, to ensure branches are decided at compilation time.
+ *  Presumed already validated at this stage:
+ *  - source != NULL
+ *  - inputSize > 0
+ */
+LZ4_FORCE_INLINE int LZ4_compress_generic_validated(
                 LZ4_stream_t_internal* const cctx,
                 const char* const source,
                 char* const dest,
                 const int inputSize,
+                 int *inputConsumed, /* only written when outputDirective == fillOutput */
                 const int maxOutputSize,
-                 const limitedOutput_directive outputLimited,
+                 const limitedOutput_directive outputDirective,
                 const tableType_t tableType,
-                 const dict_directive dict,
+                 const dict_directive dictDirective,
                 const dictIssue_directive dictIssue,
-                 const U32 acceleration)
+                 const int acceleration)
 {
+    int result;
    const BYTE* ip = (const BYTE*) source;
-    const BYTE* base;
+
+    U32 const startIndex = cctx->currentOffset;
+    const BYTE* base = (const BYTE*) source - startIndex;
    const BYTE* lowLimit;
-    const BYTE* const lowRefLimit = ip - cctx->dictSize;
-    const BYTE* const dictionary = cctx->dictionary;
-    const BYTE* const dictEnd = dictionary + cctx->dictSize;
-    const ptrdiff_t dictDelta = dictEnd - (const BYTE*)source;
+
+    const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx;
+    const BYTE* const dictionary =
+        dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary;
+    const U32 dictSize =
+        dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize;
+    const U32 dictDelta = (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0;   /* make indexes in dictCtx comparable with index in current context */
+
+    int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx);
+    U32 const prefixIdxLimit = startIndex - dictSize;   /* used when dictDirective == dictSmall */
+    const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary;
    const BYTE* anchor = (const BYTE*) source;
    const BYTE* const iend = ip + inputSize;
-    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1;
    const BYTE* const matchlimit = iend - LASTLITERALS;

+    /* the dictCtx currentOffset is indexed on the start of the dictionary,
+     * while a dictionary in the current context precedes the currentOffset */
+    const BYTE* dictBase = !dictionary ? NULL : (dictDirective == usingDictCtx) ?
+                            dictionary + dictSize - dictCtx->currentOffset :
+                            dictionary + dictSize - startIndex;
+
    BYTE* op = (BYTE*) dest;
    BYTE* const olimit = op + maxOutputSize;

+    U32 offset = 0;
    U32 forwardH;

-    /* Init conditions */
-    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0;   /* Unsupported inputSize, too large (or negative) */
-    switch(dict)
-    {
-    case noDict:
-    default:
-        base = (const BYTE*)source;
-        lowLimit = (const BYTE*)source;
-        break;
-    case withPrefix64k:
-        base = (const BYTE*)source - cctx->currentOffset;
-        lowLimit = (const BYTE*)source - cctx->dictSize;
-        break;
-    case usingExtDict:
-        base = (const BYTE*)source - cctx->currentOffset;
-        lowLimit = (const BYTE*)source;
-        break;
+    DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType);
+    assert(ip != NULL);
+    /* If init conditions are not met, we don't have to mark stream
+     * as having dirty context, since no action was taken yet */
+    if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */
+    if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) { return 0; }  /* Size too large (not within 64K limit) */
+    if (tableType==byPtr) assert(dictDirective==noDict);      /* only supported use case with byPtr */
+    assert(acceleration >= 1);
+
+    lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0);
+
+    /* Update context state */
+    if (dictDirective == usingDictCtx) {
+        /* Subsequent linked blocks can't use the dictionary. */
+        /* Instead, they use the block we just compressed. */
+        cctx->dictCtx = NULL;
+        cctx->dictSize = (U32)inputSize;
+    } else {
+        cctx->dictSize += (U32)inputSize;
    }
-    if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) return 0;   /* Size too large (not within 64K limit) */
+    cctx->currentOffset += (U32)inputSize;
+    cctx->tableType = (U32)tableType;
+
    if (inputSize<LZ4_minLength) goto _last_literals;        /* Input too small, no compression (all literals) */

    /* First Byte */
@@ -530,50 +926,112 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(

    /* Main Loop */
    for ( ; ; ) {
-        ptrdiff_t refDelta = 0;
        const BYTE* match;
        BYTE* token;
+        const BYTE* filledIp;

        /* Find a match */
-        {   const BYTE* forwardIp = ip;
-            unsigned step = 1;
-            unsigned searchMatchNb = acceleration << LZ4_skipTrigger;
+        if (tableType == byPtr) {
+            const BYTE* forwardIp = ip;
+            int step = 1;
+            int searchMatchNb = acceleration << LZ4_skipTrigger;
            do {
                U32 const h = forwardH;
                ip = forwardIp;
                forwardIp += step;
                step = (searchMatchNb++ >> LZ4_skipTrigger);

-                if (unlikely(forwardIp > mflimit)) goto _last_literals;
+                if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
+                assert(ip < mflimitPlusOne);

                match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base);
-                if (dict==usingExtDict) {
-                    if (match < (const BYTE*)source) {
-                        refDelta = dictDelta;
+                forwardH = LZ4_hashPosition(forwardIp, tableType);
+                LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base);
+
+            } while ( (match+LZ4_DISTANCE_MAX < ip)
+                   || (LZ4_read32(match) != LZ4_read32(ip)) );
+
+        } else {   /* byU32, byU16 */
+
+            const BYTE* forwardIp = ip;
+            int step = 1;
+            int searchMatchNb = acceleration << LZ4_skipTrigger;
+            do {
+                U32 const h = forwardH;
+                U32 const current = (U32)(forwardIp - base);
+                U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
+                assert(matchIndex <= current);
+                assert(forwardIp - base < (ptrdiff_t)(2 GB - 1));
+                ip = forwardIp;
+                forwardIp += step;
+                step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+                if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
+                assert(ip < mflimitPlusOne);
+
+                if (dictDirective == usingDictCtx) {
+                    if (matchIndex < startIndex) {
+                        /* there was no match, try the dictionary */
+                        assert(tableType == byU32);
+                        matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
+                        match = dictBase + matchIndex;
+                        matchIndex += dictDelta;   /* make dictCtx index comparable with current context */
                        lowLimit = dictionary;
                    } else {
-                        refDelta = 0;
+                        match = base + matchIndex;
                        lowLimit = (const BYTE*)source;
-                }   }
+                    }
+                } else if (dictDirective==usingExtDict) {
+                    if (matchIndex < startIndex) {
+                        DEBUGLOG(7, "extDict candidate: matchIndex=%5u  <  startIndex=%5u", matchIndex, startIndex);
+                        assert(startIndex - matchIndex >= MINMATCH);
+                        match = dictBase + matchIndex;
+                        lowLimit = dictionary;
+                    } else {
+                        match = base + matchIndex;
+                        lowLimit = (const BYTE*)source;
+                    }
+                } else {   /* single continuous memory segment */
+                    match = base + matchIndex;
+                }
                forwardH = LZ4_hashPosition(forwardIp, tableType);
-                LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base);
+                LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
+
+                DEBUGLOG(7, "candidate at pos=%u  (offset=%u \n", matchIndex, current - matchIndex);
+                if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; }    /* match outside of valid area */
+                assert(matchIndex < current);
+                if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX))
+                  && (matchIndex+LZ4_DISTANCE_MAX < current)) {
+                    continue;
+                } /* too far */
+                assert((current - matchIndex) <= LZ4_DISTANCE_MAX);  /* match now expected within distance */
+
+                if (LZ4_read32(match) == LZ4_read32(ip)) {
+                    if (maybe_extMem) offset = current - matchIndex;
+                    break;   /* match found */
+                }

-            } while ( ((dictIssue==dictSmall) ? (match < lowRefLimit) : 0)
-                || ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip))
-                || (LZ4_read32(match+refDelta) != LZ4_read32(ip)) );
+            } while(1);
        }

        /* Catch up */
-        while (((ip>anchor) & (match+refDelta > lowLimit)) && (unlikely(ip[-1]==match[refDelta-1]))) { ip--; match--; }
+        filledIp = ip;
+        while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }

        /* Encode Literals */
        {   unsigned const litLength = (unsigned)(ip - anchor);
            token = op++;
-            if ((outputLimited) &&  /* Check output buffer overflow */
-                (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)))
-                return 0;
+            if ((outputDirective == limitedOutput) &&  /* Check output buffer overflow */
+                (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) {
+                return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
+            }
+            if ((outputDirective == fillOutput) &&
+                (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) {
+                op--;
+                goto _last_literals;
+            }
            if (litLength >= RUN_MASK) {
-                int len = (int)litLength-RUN_MASK;
+                int len = (int)(litLength - RUN_MASK);
                *token = (RUN_MASK<<ML_BITS);
                for(; len >= 255 ; len-=255) *op++ = 255;
                *op++ = (BYTE)len;
@@ -581,37 +1039,87 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
            else *token = (BYTE)(litLength<<ML_BITS);

            /* Copy Literals */
-            LZ4_wildCopy(op, anchor, op+litLength);
+            LZ4_wildCopy8(op, anchor, op+litLength);
            op+=litLength;
+            DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
+                        (int)(anchor-(const BYTE*)source), litLength, (int)(ip-(const BYTE*)source));
        }

 _next_match:
+        /* at this stage, the following variables must be correctly set :
+         * - ip : at start of LZ operation
+         * - match : at start of previous pattern occurence; can be within current prefix, or within extDict
+         * - offset : if maybe_ext_memSegment==1 (constant)
+         * - lowLimit : must be == dictionary to mean "match is within extDict"; must be == source otherwise
+         * - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written
+         */
+
+        if ((outputDirective == fillOutput) &&
+            (op + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit)) {
+            /* the match was too close to the end, rewind and go to last literals */
+            op = token;
+            goto _last_literals;
+        }
+
        /* Encode Offset */
-        LZ4_writeLE16(op, (U16)(ip-match)); op+=2;
+        if (maybe_extMem) {   /* static test */
+            DEBUGLOG(6, "             with offset=%u  (ext if > %i)", offset, (int)(ip - (const BYTE*)source));
+            assert(offset <= LZ4_DISTANCE_MAX && offset > 0);
+            LZ4_writeLE16(op, (U16)offset); op+=2;
+        } else  {
+            DEBUGLOG(6, "             with offset=%u  (same segment)", (U32)(ip - match));
+            assert(ip-match <= LZ4_DISTANCE_MAX);
+            LZ4_writeLE16(op, (U16)(ip - match)); op+=2;
+        }

        /* Encode MatchLength */
        {   unsigned matchCode;

-            if ((dict==usingExtDict) && (lowLimit==dictionary)) {
-                const BYTE* limit;
-                match += refDelta;
-                limit = ip + (dictEnd-match);
+            if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx)
+              && (lowLimit==dictionary) /* match within extDict */ ) {
+                const BYTE* limit = ip + (dictEnd-match);
+                assert(dictEnd > match);
                if (limit > matchlimit) limit = matchlimit;
                matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
-                ip += MINMATCH + matchCode;
+                ip += (size_t)matchCode + MINMATCH;
                if (ip==limit) {
-                    unsigned const more = LZ4_count(ip, (const BYTE*)source, matchlimit);
+                    unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit);
                    matchCode += more;
                    ip += more;
                }
+                DEBUGLOG(6, "             with matchLength=%u starting in extDict", matchCode+MINMATCH);
            } else {
                matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
-                ip += MINMATCH + matchCode;
+                ip += (size_t)matchCode + MINMATCH;
+                DEBUGLOG(6, "             with matchLength=%u", matchCode+MINMATCH);
            }

-            if ( outputLimited &&    /* Check output buffer overflow */
-                (unlikely(op + (1 + LASTLITERALS) + (matchCode>>8) > olimit)) )
-                return 0;
+            if ((outputDirective) &&    /* Check output buffer overflow */
+                (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) {
+                if (outputDirective == fillOutput) {
+                    /* Match description too long : reduce it */
+                    U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255;
+                    ip -= matchCode - newMatchCode;
+                    assert(newMatchCode < matchCode);
+                    matchCode = newMatchCode;
+                    if (unlikely(ip <= filledIp)) {
+                        /* We have already filled up to filledIp so if ip ends up less than filledIp
+                         * we have positions in the hash table beyond the current position. This is
+                         * a problem if we reuse the hash table. So we have to remove these positions
+                         * from the hash table.
+                         */
+                        const BYTE* ptr;
+                        DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip));
+                        for (ptr = ip; ptr <= filledIp; ++ptr) {
+                            U32 const h = LZ4_hashPosition(ptr, tableType);
+                            LZ4_clearHash(h, cctx->hashTable, tableType);
+                        }
+                    }
+                } else {
+                    assert(outputDirective == limitedOutput);
+                    return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
+                }
+            }
            if (matchCode >= ML_MASK) {
                *token += ML_MASK;
                matchCode -= ML_MASK;
@@ -626,41 +1134,89 @@ _next_match:
            } else
                *token += (BYTE)(matchCode);
        }
+        /* Ensure we have enough space for the last literals. */
+        assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit));

        anchor = ip;

        /* Test end of chunk */
-        if (ip > mflimit) break;
+        if (ip >= mflimitPlusOne) break;

        /* Fill table */
        LZ4_putPosition(ip-2, cctx->hashTable, tableType, base);

        /* Test next position */
+        if (tableType == byPtr) {
+
            match = LZ4_getPosition(ip, cctx->hashTable, tableType, base);
-        if (dict==usingExtDict) {
-            if (match < (const BYTE*)source) {
-                refDelta = dictDelta;
-                lowLimit = dictionary;
-            } else {
-                refDelta = 0;
-                lowLimit = (const BYTE*)source;
-        }   }
            LZ4_putPosition(ip, cctx->hashTable, tableType, base);
-        if ( ((dictIssue==dictSmall) ? (match>=lowRefLimit) : 1)
-            && (match+MAX_DISTANCE>=ip)
-            && (LZ4_read32(match+refDelta)==LZ4_read32(ip)) )
+            if ( (match+LZ4_DISTANCE_MAX >= ip)
+              && (LZ4_read32(match) == LZ4_read32(ip)) )
            { token=op++; *token=0; goto _next_match; }

-        /* Prepare next loop */
-        forwardH = LZ4_hashPosition(++ip, tableType);
+        } else {   /* byU32, byU16 */
+
+            U32 const h = LZ4_hashPosition(ip, tableType);
+            U32 const current = (U32)(ip-base);
+            U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
+            assert(matchIndex < current);
+            if (dictDirective == usingDictCtx) {
+                if (matchIndex < startIndex) {
+                    /* there was no match, try the dictionary */
+                    matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
+                    match = dictBase + matchIndex;
+                    lowLimit = dictionary;   /* required for match length counter */
+                    matchIndex += dictDelta;
+                } else {
+                    match = base + matchIndex;
+                    lowLimit = (const BYTE*)source;  /* required for match length counter */
                }
-
-_last_literals:
-    /* Encode Last Literals */
-    {   size_t const lastRun = (size_t)(iend - anchor);
-        if ( (outputLimited) &&  /* Check output buffer overflow */
-            ((op - (BYTE*)dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) )
-            return 0;
+            } else if (dictDirective==usingExtDict) {
+                if (matchIndex < startIndex) {
+                    match = dictBase + matchIndex;
+                    lowLimit = dictionary;   /* required for match length counter */
+                } else {
+                    match = base + matchIndex;
+                    lowLimit = (const BYTE*)source;   /* required for match length counter */
+                }
+            } else {   /* single memory segment */
+                match = base + matchIndex;
+            }
+            LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
+            assert(matchIndex < current);
+            if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1)
+              && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current))
+              && (LZ4_read32(match) == LZ4_read32(ip)) ) {
+                token=op++;
+                *token=0;
+                if (maybe_extMem) offset = current - matchIndex;
+                DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
+                            (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source));
+                goto _next_match;
+            }
+        }
+
+        /* Prepare next loop */
+        forwardH = LZ4_hashPosition(++ip, tableType);
+
+    }
+
+_last_literals:
+    /* Encode Last Literals */
+    {   size_t lastRun = (size_t)(iend - anchor);
+        if ( (outputDirective) &&  /* Check output buffer overflow */
+            (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) {
+            if (outputDirective == fillOutput) {
+                /* adapt lastRun to fill 'dst' */
+                assert(olimit >= op);
+                lastRun  = (size_t)(olimit-op) - 1/*token*/;
+                lastRun -= (lastRun + 256 - RUN_MASK) / 256;  /*additional length tokens*/
+            } else {
+                assert(outputDirective == limitedOutput);
+                return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
+            }
+        }
+        DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun);
        if (lastRun >= RUN_MASK) {
            size_t accumulator = lastRun - RUN_MASK;
            *op++ = RUN_MASK << ML_BITS;
@@ -669,45 +1225,143 @@ _last_literals:
        } else {
            *op++ = (BYTE)(lastRun<<ML_BITS);
        }
-        memcpy(op, anchor, lastRun);
+        LZ4_memcpy(op, anchor, lastRun);
+        ip = anchor + lastRun;
        op += lastRun;
    }

-    /* End */
-    return (int) (((char*)op)-dest);
+    if (outputDirective == fillOutput) {
+        *inputConsumed = (int) (((const char*)ip)-source);
+    }
+    result = (int)(((char*)op) - dest);
+    assert(result > 0);
+    DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result);
+    return result;
+}
+
+/** LZ4_compress_generic() :
+ *  inlined, to ensure branches are decided at compilation time;
+ *  takes care of src == (NULL, 0)
+ *  and forward the rest to LZ4_compress_generic_validated */
+LZ4_FORCE_INLINE int LZ4_compress_generic(
+                 LZ4_stream_t_internal* const cctx,
+                 const char* const src,
+                 char* const dst,
+                 const int srcSize,
+                 int *inputConsumed, /* only written when outputDirective == fillOutput */
+                 const int dstCapacity,
+                 const limitedOutput_directive outputDirective,
+                 const tableType_t tableType,
+                 const dict_directive dictDirective,
+                 const dictIssue_directive dictIssue,
+                 const int acceleration)
+{
+    DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i",
+                srcSize, dstCapacity);
+
+    if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; }  /* Unsupported srcSize, too large (or negative) */
+    if (srcSize == 0) {   /* src == NULL supported if srcSize == 0 */
+        if (outputDirective != notLimited && dstCapacity <= 0) return 0;  /* no output, can't write anything */
+        DEBUGLOG(5, "Generating an empty block");
+        assert(outputDirective == notLimited || dstCapacity >= 1);
+        assert(dst != NULL);
+        dst[0] = 0;
+        if (outputDirective == fillOutput) {
+            assert (inputConsumed != NULL);
+            *inputConsumed = 0;
+        }
+        return 1;
+    }
+    assert(src != NULL);
+
+    return LZ4_compress_generic_validated(cctx, src, dst, srcSize,
+                inputConsumed, /* only written into if outputDirective == fillOutput */
+                dstCapacity, outputDirective,
+                tableType, dictDirective, dictIssue, acceleration);
 }


 int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
 {
-    LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse;
-    LZ4_resetStream((LZ4_stream_t*)state);
-    if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
-
+    LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse;
+    assert(ctx != NULL);
+    if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
+    if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
    if (maxOutputSize >= LZ4_compressBound(inputSize)) {
-        if (inputSize < LZ4_64Klimit)
-            return LZ4_compress_generic(ctx, source, dest, inputSize,             0,    notLimited,                        byU16, noDict, noDictIssue, acceleration);
-        else
-            return LZ4_compress_generic(ctx, source, dest, inputSize,             0,    notLimited, (sizeof(void*)==8) ? byU32 : byPtr, noDict, noDictIssue, acceleration);
+        if (inputSize < LZ4_64Klimit) {
+            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration);
        } else {
-        if (inputSize < LZ4_64Klimit)
-            return LZ4_compress_generic(ctx, source, dest, inputSize, maxOutputSize, limitedOutput,                        byU16, noDict, noDictIssue, acceleration);
-        else
-            return LZ4_compress_generic(ctx, source, dest, inputSize, maxOutputSize, limitedOutput, (sizeof(void*)==8) ? byU32 : byPtr, noDict, noDictIssue, acceleration);
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+        }
+    } else {
+        if (inputSize < LZ4_64Klimit) {
+            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration);
+        } else {
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+        }
+    }
+}
+
+/**
+ * LZ4_compress_fast_extState_fastReset() :
+ * A variant of LZ4_compress_fast_extState().
+ *
+ * Using this variant avoids an expensive initialization step. It is only safe
+ * to call if the state buffer is known to be correctly initialized already
+ * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of
+ * "correctly initialized").
+ */
+int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
+{
+    LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse;
+    if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
+    if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
+
+    if (dstCapacity >= LZ4_compressBound(srcSize)) {
+        if (srcSize < LZ4_64Klimit) {
+            const tableType_t tableType = byU16;
+            LZ4_prepareTable(ctx, srcSize, tableType);
+            if (ctx->currentOffset) {
+                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration);
+            } else {
+                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+            }
+        } else {
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+            LZ4_prepareTable(ctx, srcSize, tableType);
+            return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+        }
+    } else {
+        if (srcSize < LZ4_64Klimit) {
+            const tableType_t tableType = byU16;
+            LZ4_prepareTable(ctx, srcSize, tableType);
+            if (ctx->currentOffset) {
+                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration);
+            } else {
+                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+            }
+        } else {
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+            LZ4_prepareTable(ctx, srcSize, tableType);
+            return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+        }
    }
 }


 int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
 {
+    int result;
 #if (LZ4_HEAPMODE)
-    void* ctxPtr = ALLOCATOR(1, sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
+    LZ4_stream_t* ctxPtr = ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
+    if (ctxPtr == NULL) return 0;
 #else
    LZ4_stream_t ctx;
-    void* const ctxPtr = &ctx;
+    LZ4_stream_t* const ctxPtr = &ctx;
 #endif
-
-    int const result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration);
+    result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration);

 #if (LZ4_HEAPMODE)
    FREEMEM(ctxPtr);
@@ -716,205 +1370,37 @@ int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutp
 }


-int LZ4_compress_default(const char* source, char* dest, int inputSize, int maxOutputSize)
-{
-    return LZ4_compress_fast(source, dest, inputSize, maxOutputSize, 1);
-}
-
-
-/* hidden debug function */
-/* strangely enough, gcc generates faster code when this function is uncommented, even if unused */
-int LZ4_compress_fast_force(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+int LZ4_compress_default(const char* src, char* dst, int srcSize, int maxOutputSize)
 {
-    LZ4_stream_t ctx;
-    LZ4_resetStream(&ctx);
-
-    if (inputSize < LZ4_64Klimit)
-        return LZ4_compress_generic(&ctx.internal_donotuse, source, dest, inputSize, maxOutputSize, limitedOutput, byU16,                        noDict, noDictIssue, acceleration);
-    else
-        return LZ4_compress_generic(&ctx.internal_donotuse, source, dest, inputSize, maxOutputSize, limitedOutput, sizeof(void*)==8 ? byU32 : byPtr, noDict, noDictIssue, acceleration);
-}
-
-
-/*-******************************
-*  *_destSize() variant
-********************************/
-
-static int LZ4_compress_destSize_generic(
-                       LZ4_stream_t_internal* const ctx,
-                 const char* const src,
-                       char* const dst,
-                       int*  const srcSizePtr,
-                 const int targetDstSize,
-                 const tableType_t tableType)
-{
-    const BYTE* ip = (const BYTE*) src;
-    const BYTE* base = (const BYTE*) src;
-    const BYTE* lowLimit = (const BYTE*) src;
-    const BYTE* anchor = ip;
-    const BYTE* const iend = ip + *srcSizePtr;
-    const BYTE* const mflimit = iend - MFLIMIT;
-    const BYTE* const matchlimit = iend - LASTLITERALS;
-
-    BYTE* op = (BYTE*) dst;
-    BYTE* const oend = op + targetDstSize;
-    BYTE* const oMaxLit = op + targetDstSize - 2 /* offset */ - 8 /* because 8+MINMATCH==MFLIMIT */ - 1 /* token */;
-    BYTE* const oMaxMatch = op + targetDstSize - (LASTLITERALS + 1 /* token */);
-    BYTE* const oMaxSeq = oMaxLit - 1 /* token */;
-
-    U32 forwardH;
-
-
-    /* Init conditions */
-    if (targetDstSize < 1) return 0;                                     /* Impossible to store anything */
-    if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0;            /* Unsupported input size, too large (or negative) */
-    if ((tableType == byU16) && (*srcSizePtr>=LZ4_64Klimit)) return 0;   /* Size too large (not within 64K limit) */
-    if (*srcSizePtr<LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */
-
-    /* First Byte */
-    *srcSizePtr = 0;
-    LZ4_putPosition(ip, ctx->hashTable, tableType, base);
-    ip++; forwardH = LZ4_hashPosition(ip, tableType);
-
-    /* Main Loop */
-    for ( ; ; ) {
-        const BYTE* match;
-        BYTE* token;
-
-        /* Find a match */
-        {   const BYTE* forwardIp = ip;
-            unsigned step = 1;
-            unsigned searchMatchNb = 1 << LZ4_skipTrigger;
-
-            do {
-                U32 h = forwardH;
-                ip = forwardIp;
-                forwardIp += step;
-                step = (searchMatchNb++ >> LZ4_skipTrigger);
-
-                if (unlikely(forwardIp > mflimit)) goto _last_literals;
-
-                match = LZ4_getPositionOnHash(h, ctx->hashTable, tableType, base);
-                forwardH = LZ4_hashPosition(forwardIp, tableType);
-                LZ4_putPositionOnHash(ip, h, ctx->hashTable, tableType, base);
-
-            } while ( ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip))
-                || (LZ4_read32(match) != LZ4_read32(ip)) );
-        }
-
-        /* Catch up */
-        while ((ip>anchor) && (match > lowLimit) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
-
-        /* Encode Literal length */
-        {   unsigned litLength = (unsigned)(ip - anchor);
-            token = op++;
-            if (op + ((litLength+240)/255) + litLength > oMaxLit) {
-                /* Not enough space for a last match */
-                op--;
-                goto _last_literals;
-            }
-            if (litLength>=RUN_MASK) {
-                unsigned len = litLength - RUN_MASK;
-                *token=(RUN_MASK<<ML_BITS);
-                for(; len >= 255 ; len-=255) *op++ = 255;
-                *op++ = (BYTE)len;
-            }
-            else *token = (BYTE)(litLength<<ML_BITS);
-
-            /* Copy Literals */
-            LZ4_wildCopy(op, anchor, op+litLength);
-            op += litLength;
-        }
-
-_next_match:
-        /* Encode Offset */
-        LZ4_writeLE16(op, (U16)(ip-match)); op+=2;
-
-        /* Encode MatchLength */
-        {   size_t matchLength = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
-
-            if (op + ((matchLength+240)/255) > oMaxMatch) {
-                /* Match description too long : reduce it */
-                matchLength = (15-1) + (oMaxMatch-op) * 255;
-            }
-            ip += MINMATCH + matchLength;
-
-            if (matchLength>=ML_MASK) {
-                *token += ML_MASK;
-                matchLength -= ML_MASK;
-                while (matchLength >= 255) { matchLength-=255; *op++ = 255; }
-                *op++ = (BYTE)matchLength;
-            }
-            else *token += (BYTE)(matchLength);
-        }
-
-        anchor = ip;
-
-        /* Test end of block */
-        if (ip > mflimit) break;
-        if (op > oMaxSeq) break;
-
-        /* Fill table */
-        LZ4_putPosition(ip-2, ctx->hashTable, tableType, base);
-
-        /* Test next position */
-        match = LZ4_getPosition(ip, ctx->hashTable, tableType, base);
-        LZ4_putPosition(ip, ctx->hashTable, tableType, base);
-        if ( (match+MAX_DISTANCE>=ip)
-            && (LZ4_read32(match)==LZ4_read32(ip)) )
-        { token=op++; *token=0; goto _next_match; }
-
-        /* Prepare next loop */
-        forwardH = LZ4_hashPosition(++ip, tableType);
-    }
-
-_last_literals:
-    /* Encode Last Literals */
-    {   size_t lastRunSize = (size_t)(iend - anchor);
-        if (op + 1 /* token */ + ((lastRunSize+240)/255) /* litLength */ + lastRunSize /* literals */ > oend) {
-            /* adapt lastRunSize to fill 'dst' */
-            lastRunSize  = (oend-op) - 1;
-            lastRunSize -= (lastRunSize+240)/255;
-        }
-        ip = anchor + lastRunSize;
-
-        if (lastRunSize >= RUN_MASK) {
-            size_t accumulator = lastRunSize - RUN_MASK;
-            *op++ = RUN_MASK << ML_BITS;
-            for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
-            *op++ = (BYTE) accumulator;
-        } else {
-            *op++ = (BYTE)(lastRunSize<<ML_BITS);
-        }
-        memcpy(op, anchor, lastRunSize);
-        op += lastRunSize;
-    }
-
-    /* End */
-    *srcSizePtr = (int) (((const char*)ip)-src);
-    return (int) (((char*)op)-dst);
+    return LZ4_compress_fast(src, dst, srcSize, maxOutputSize, 1);
 }


+/* Note!: This function leaves the stream in an unclean/broken state!
+ * It is not safe to subsequently use the same state with a _fastReset() or
+ * _continue() call without resetting it. */
 static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize)
 {
-    LZ4_resetStream(state);
+    void* const s = LZ4_initStream(state, sizeof (*state));
+    assert(s != NULL); (void)s;

    if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {  /* compression success is guaranteed */
        return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1);
    } else {
-        if (*srcSizePtr < LZ4_64Klimit)
-            return LZ4_compress_destSize_generic(&state->internal_donotuse, src, dst, srcSizePtr, targetDstSize, byU16);
-        else
-            return LZ4_compress_destSize_generic(&state->internal_donotuse, src, dst, srcSizePtr, targetDstSize, sizeof(void*)==8 ? byU32 : byPtr);
-    }
+        if (*srcSizePtr < LZ4_64Klimit) {
+            return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, 1);
+        } else {
+            tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+            return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, 1);
+    }   }
 }


 int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize)
 {
 #if (LZ4_HEAPMODE)
-    LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOCATOR(1, sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
+    LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
+    if (ctx == NULL) return 0;
 #else
    LZ4_stream_t ctxBody;
    LZ4_stream_t* ctx = &ctxBody;
@@ -936,20 +1422,50 @@ int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targe

 LZ4_stream_t* LZ4_createStream(void)
 {
-    LZ4_stream_t* lz4s = (LZ4_stream_t*)ALLOCATOR(8, LZ4_STREAMSIZE_U64);
+    LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));
    LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal));    /* A compilation error here means LZ4_STREAMSIZE is not large enough */
-    LZ4_resetStream(lz4s);
+    DEBUGLOG(4, "LZ4_createStream %p", lz4s);
+    if (lz4s == NULL) return NULL;
+    LZ4_initStream(lz4s, sizeof(*lz4s));
    return lz4s;
 }

+static size_t LZ4_stream_t_alignment(void)
+{
+#if LZ4_ALIGN_TEST
+    typedef struct { char c; LZ4_stream_t t; } t_a;
+    return sizeof(t_a) - sizeof(LZ4_stream_t);
+#else
+    return 1;  /* effectively disabled */
+#endif
+}
+
+LZ4_stream_t* LZ4_initStream (void* buffer, size_t size)
+{
+    DEBUGLOG(5, "LZ4_initStream");
+    if (buffer == NULL) { return NULL; }
+    if (size < sizeof(LZ4_stream_t)) { return NULL; }
+    if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL;
+    MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal));
+    return (LZ4_stream_t*)buffer;
+}
+
+/* resetStream is now deprecated,
+ * prefer initStream() which is more general */
 void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
 {
-    MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t));
+    DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream);
+    MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal));
+}
+
+void LZ4_resetStream_fast(LZ4_stream_t* ctx) {
+    LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32);
 }

 int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
 {
    if (!LZ4_stream) return 0;   /* support free on NULL */
+    DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream);
    FREEMEM(LZ4_stream);
    return (0);
 }
@@ -959,43 +1475,82 @@ int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
 int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
 {
    LZ4_stream_t_internal* dict = &LZ4_dict->internal_donotuse;
+    const tableType_t tableType = byU32;
    const BYTE* p = (const BYTE*)dictionary;
    const BYTE* const dictEnd = p + dictSize;
    const BYTE* base;

-    if ((dict->initCheck) || (dict->currentOffset > 1 GB))  /* Uninitialized structure, or reuse overflow */
+    DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict);
+
+    /* It's necessary to reset the context,
+     * and not just continue it with prepareTable()
+     * to avoid any risk of generating overflowing matchIndex
+     * when compressing using this dictionary */
    LZ4_resetStream(LZ4_dict);

+    /* We always increment the offset by 64 KB, since, if the dict is longer,
+     * we truncate it to the last 64k, and if it's shorter, we still want to
+     * advance by a whole window length so we can provide the guarantee that
+     * there are only valid offsets in the window, which allows an optimization
+     * in LZ4_compress_fast_continue() where it uses noDictIssue even when the
+     * dictionary isn't a full 64k. */
+    dict->currentOffset += 64 KB;
+
    if (dictSize < (int)HASH_UNIT) {
-        dict->dictionary = NULL;
-        dict->dictSize = 0;
        return 0;
    }

    if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
-    dict->currentOffset += 64 KB;
-    base = p - dict->currentOffset;
+    base = dictEnd - dict->currentOffset;
    dict->dictionary = p;
    dict->dictSize = (U32)(dictEnd - p);
-    dict->currentOffset += dict->dictSize;
+    dict->tableType = (U32)tableType;

    while (p <= dictEnd-HASH_UNIT) {
-        LZ4_putPosition(p, dict->hashTable, byU32, base);
+        LZ4_putPosition(p, dict->hashTable, tableType, base);
        p+=3;
    }

-    return dict->dictSize;
+    return (int)dict->dictSize;
+}
+
+void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream) {
+    const LZ4_stream_t_internal* dictCtx = dictionaryStream == NULL ? NULL :
+        &(dictionaryStream->internal_donotuse);
+
+    DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)",
+             workingStream, dictionaryStream,
+             dictCtx != NULL ? dictCtx->dictSize : 0);
+
+    if (dictCtx != NULL) {
+        /* If the current offset is zero, we will never look in the
+         * external dictionary context, since there is no value a table
+         * entry can take that indicate a miss. In that case, we need
+         * to bump the offset to something non-zero.
+         */
+        if (workingStream->internal_donotuse.currentOffset == 0) {
+            workingStream->internal_donotuse.currentOffset = 64 KB;
+        }
+
+        /* Don't actually attach an empty dictionary.
+         */
+        if (dictCtx->dictSize == 0) {
+            dictCtx = NULL;
+        }
+    }
+    workingStream->internal_donotuse.dictCtx = dictCtx;
 }


-static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, const BYTE* src)
+static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize)
 {
-    if ((LZ4_dict->currentOffset > 0x80000000) ||
-        ((uptrval)LZ4_dict->currentOffset > (uptrval)src)) {   /* address space overflow */
+    assert(nextSize >= 0);
+    if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) {   /* potential ptrdiff_t overflow (32-bits mode) */
        /* rescale hash table */
        U32 const delta = LZ4_dict->currentOffset - 64 KB;
        const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
        int i;
+        DEBUGLOG(4, "LZ4_renormDictT");
        for (i=0; i<LZ4_HASH_SIZE_U32; i++) {
            if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
            else LZ4_dict->hashTable[i] -= delta;
@@ -1007,16 +1562,29 @@ static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, const BYTE* src)
 }


-int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream,
+                                const char* source, char* dest,
+                                int inputSize, int maxOutputSize,
+                                int acceleration)
 {
+    const tableType_t tableType = byU32;
    LZ4_stream_t_internal* streamPtr = &LZ4_stream->internal_donotuse;
-    const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;
+    const BYTE* dictEnd = streamPtr->dictionary + streamPtr->dictSize;
+
+    DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i)", inputSize);

-    const BYTE* smallest = (const BYTE*) source;
-    if (streamPtr->initCheck) return 0;   /* Uninitialized structure detected */
-    if ((streamPtr->dictSize>0) && (smallest>dictEnd)) smallest = dictEnd;
-    LZ4_renormDictT(streamPtr, smallest);
-    if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
+    LZ4_renormDictT(streamPtr, inputSize);   /* avoid index overflow */
+    if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
+    if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
+
+    /* invalidate tiny dictionaries */
+    if ( (streamPtr->dictSize-1 < 4-1)   /* intentional underflow */
+      && (dictEnd != (const BYTE*)source) ) {
+        DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary);
+        streamPtr->dictSize = 0;
+        streamPtr->dictionary = (const BYTE*)source;
+        dictEnd = (const BYTE*)source;
+    }

    /* Check overlapping input/dictionary space */
    {   const BYTE* sourceEnd = (const BYTE*) source + inputSize;
@@ -1030,46 +1598,61 @@ int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, const char* source, ch

    /* prefix mode : source data follows dictionary */
    if (dictEnd == (const BYTE*)source) {
-        int result;
        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
-            result = LZ4_compress_generic(streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, dictSmall, acceleration);
+            return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration);
        else
-            result = LZ4_compress_generic(streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, noDictIssue, acceleration);
-        streamPtr->dictSize += (U32)inputSize;
-        streamPtr->currentOffset += (U32)inputSize;
-        return result;
+            return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration);
    }

    /* external dictionary mode */
    {   int result;
-        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
-            result = LZ4_compress_generic(streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, dictSmall, acceleration);
-        else
-            result = LZ4_compress_generic(streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, noDictIssue, acceleration);
+        if (streamPtr->dictCtx) {
+            /* We depend here on the fact that dictCtx'es (produced by
+             * LZ4_loadDict) guarantee that their tables contain no references
+             * to offsets between dictCtx->currentOffset - 64 KB and
+             * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe
+             * to use noDictIssue even when the dict isn't a full 64 KB.
+             */
+            if (inputSize > 4 KB) {
+                /* For compressing large blobs, it is faster to pay the setup
+                 * cost to copy the dictionary's tables into the active context,
+                 * so that the compression loop is only looking into one table.
+                 */
+                LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr));
+                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
+            } else {
+                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration);
+            }
+        } else {
+            if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
+                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration);
+            } else {
+                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
+            }
+        }
        streamPtr->dictionary = (const BYTE*)source;
        streamPtr->dictSize = (U32)inputSize;
-        streamPtr->currentOffset += (U32)inputSize;
        return result;
    }
 }


-/* Hidden debug function, to force external dictionary mode */
-int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int inputSize)
+/* Hidden debug function, to force-test external dictionary mode */
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize)
 {
    LZ4_stream_t_internal* streamPtr = &LZ4_dict->internal_donotuse;
    int result;
-    const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;

-    const BYTE* smallest = dictEnd;
-    if (smallest > (const BYTE*) source) smallest = (const BYTE*) source;
-    LZ4_renormDictT(streamPtr, smallest);
+    LZ4_renormDictT(streamPtr, srcSize);

-    result = LZ4_compress_generic(streamPtr, source, dest, inputSize, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
+    if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
+        result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1);
+    } else {
+        result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
+    }

    streamPtr->dictionary = (const BYTE*)source;
-    streamPtr->dictSize = (U32)inputSize;
-    streamPtr->currentOffset += (U32)inputSize;
+    streamPtr->dictSize = (U32)srcSize;

    return result;
 }
@@ -1087,9 +1670,11 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
    LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;
    const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize;

-    if ((U32)dictSize > 64 KB) dictSize = 64 KB;   /* useless to define a dictionary > 64 KB */
-    if ((U32)dictSize > dict->dictSize) dictSize = dict->dictSize;
+    if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */
+    if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; }

+    if (safeBuffer == NULL) assert(dictSize == 0);
+    if (dictSize > 0)
        memmove(safeBuffer, previousDictEnd - dictSize, dictSize);

    dict->dictionary = (const BYTE*)safeBuffer;
@@ -1100,212 +1685,602 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)



-/*-*****************************
-*  Decompression functions
-*******************************/
+/*-*******************************
+ *  Decompression functions
+ ********************************/
+
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
+
+#undef MIN
+#define MIN(a,b)    ( (a) < (b) ? (a) : (b) )
+
+/* Read the variable-length literal or match length.
+ *
+ * ip - pointer to use as input.
+ * lencheck - end ip.  Return an error if ip advances >= lencheck.
+ * loop_check - check ip >= lencheck in body of loop.  Returns loop_error if so.
+ * initial_check - check ip >= lencheck before start of loop.  Returns initial_error if so.
+ * error (output) - error code.  Should be set to 0 before call.
+ */
+typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error;
+LZ4_FORCE_INLINE unsigned
+read_variable_length(const BYTE**ip, const BYTE* lencheck,
+                     int loop_check, int initial_check,
+                     variable_length_error* error)
+{
+    U32 length = 0;
+    U32 s;
+    if (initial_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
+        *error = initial_error;
+        return length;
+    }
+    do {
+        s = **ip;
+        (*ip)++;
+        length += s;
+        if (loop_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
+            *error = loop_error;
+            return length;
+        }
+    } while (s==255);
+
+    return length;
+}
+
 /*! LZ4_decompress_generic() :
 *  This generic decompression function covers all use cases.
 *  It shall be instantiated several times, using different sets of directives.
 *  Note that it is important for performance that this function really get inlined,
 *  in order to remove useless branches during compilation optimization.
 */
-LZ4_FORCE_INLINE int LZ4_decompress_generic(
+LZ4_FORCE_INLINE int
+LZ4_decompress_generic(
                 const char* const src,
                 char* const dst,
                 int srcSize,
                 int outputSize,         /* If endOnInput==endOnInputSize, this value is `dstCapacity` */

-                 int endOnInput,         /* endOnOutputSize, endOnInputSize */
-                 int partialDecoding,    /* full, partial */
-                 int targetOutputSize,   /* only used if partialDecoding==partial */
-                 int dict,               /* noDict, withPrefix64k, usingExtDict */
-                 const BYTE* const lowPrefix,  /* == dst when no prefix */
+                 endCondition_directive endOnInput,   /* endOnOutputSize, endOnInputSize */
+                 earlyEnd_directive partialDecoding,  /* full, partial */
+                 dict_directive dict,                 /* noDict, withPrefix64k, usingExtDict */
+                 const BYTE* const lowPrefix,  /* always <= dst, == dst when no prefix */
                 const BYTE* const dictStart,  /* only if dict==usingExtDict */
                 const size_t dictSize         /* note : = 0 if noDict */
                 )
 {
-    const BYTE* ip = (const BYTE*) src;
+    if (src == NULL) { return -1; }
+
+    {   const BYTE* ip = (const BYTE*) src;
        const BYTE* const iend = ip + srcSize;

        BYTE* op = (BYTE*) dst;
        BYTE* const oend = op + outputSize;
        BYTE* cpy;
-    BYTE* oexit = op + targetOutputSize;

-    const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize;
-    const unsigned dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};
-    const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
+        const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize;

        const int safeDecode = (endOnInput==endOnInputSize);
        const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));


-    /* Special cases */
-    if ((partialDecoding) && (oexit > oend-MFLIMIT)) oexit = oend-MFLIMIT;                        /* targetOutputSize too high => decode everything */
-    if ((endOnInput) && (unlikely(outputSize==0))) return ((srcSize==1) && (*ip==0)) ? 0 : -1;  /* Empty output buffer */
-    if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1);
+        /* Set up the "end" pointers for the shortcut. */
+        const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
+        const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;

-    /* Main Loop : decode sequences */
-    while (1) {
-        size_t length;
        const BYTE* match;
        size_t offset;
+        unsigned token;
+        size_t length;

-        /* get literal length */
-        unsigned const token = *ip++;
-        if ((length=(token>>ML_BITS)) == RUN_MASK) {
-            unsigned s;
-            do {
-                s = *ip++;
-                length += s;
-            } while ( likely(endOnInput ? ip<iend-RUN_MASK : 1) & (s==255) );
-            if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) goto _output_error;   /* overflow detection */
-            if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) goto _output_error;   /* overflow detection */
+
+        DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
+
+        /* Special cases */
+        assert(lowPrefix <= op);
+        if ((endOnInput) && (unlikely(outputSize==0))) {
+            /* Empty output buffer */
+            if (partialDecoding) return 0;
+            return ((srcSize==1) && (*ip==0)) ? 0 : -1;
+        }
+        if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); }
+        if ((endOnInput) && unlikely(srcSize==0)) { return -1; }
+
+	/* Currently the fast loop shows a regression on qualcomm arm chips. */
+#if LZ4_FAST_DEC_LOOP
+        if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
+            DEBUGLOG(6, "skip fast decode loop");
+            goto safe_decode;
+        }
+
+        /* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */
+        while (1) {
+            /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
+            assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
+            if (endOnInput) { assert(ip < iend); }
+            token = *ip++;
+            length = token >> ML_BITS;  /* literal length */
+
+            assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
+
+            /* decode literal length */
+            if (length == RUN_MASK) {
+                variable_length_error error = ok;
+                length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
+                if (error == initial_error) { goto _output_error; }
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
+                if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
+
+                /* copy literals */
+                cpy = op+length;
+                LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
+                if (endOnInput) {  /* LZ4_decompress_safe() */
+                    if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; }
+                    LZ4_wildCopy32(op, ip, cpy);
+                } else {   /* LZ4_decompress_fast() */
+                    if (cpy>oend-8) { goto safe_literal_copy; }
+                    LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
+                                                 * it doesn't know input length, and only relies on end-of-block properties */
+                }
+                ip += length; op = cpy;
+            } else {
+                cpy = op+length;
+                if (endOnInput) {  /* LZ4_decompress_safe() */
+                    DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
+                    /* We don't need to check oend, since we check it once for each loop below */
+                    if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; }
+                    /* Literals can only be 14, but hope compilers optimize if we copy by a register size */
+                    LZ4_memcpy(op, ip, 16);
+                } else {  /* LZ4_decompress_fast() */
+                    /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
+                     * it doesn't know input length, and relies on end-of-block properties */
+                    LZ4_memcpy(op, ip, 8);
+                    if (length > 8) { LZ4_memcpy(op+8, ip+8, 8); }
+                }
+                ip += length; op = cpy;
+            }
+
+            /* get offset */
+            offset = LZ4_readLE16(ip); ip+=2;
+            match = op - offset;
+            assert(match <= op);
+
+            /* get matchlength */
+            length = token & ML_MASK;
+
+            if (length == ML_MASK) {
+                variable_length_error error = ok;
+                if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
+                length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
+                if (error != ok) { goto _output_error; }
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
+                length += MINMATCH;
+                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
+                    goto safe_match_copy;
+                }
+            } else {
+                length += MINMATCH;
+                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
+                    goto safe_match_copy;
+                }
+
+                /* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */
+                if ((dict == withPrefix64k) || (match >= lowPrefix)) {
+                    if (offset >= 8) {
+                        assert(match >= lowPrefix);
+                        assert(match <= op);
+                        assert(op + 18 <= oend);
+
+                        LZ4_memcpy(op, match, 8);
+                        LZ4_memcpy(op+8, match+8, 8);
+                        LZ4_memcpy(op+16, match+16, 2);
+                        op += length;
+                        continue;
+            }   }   }
+
+            if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
+            /* match starting within external dictionary */
+            if ((dict==usingExtDict) && (match < lowPrefix)) {
+                if (unlikely(op+length > oend-LASTLITERALS)) {
+                    if (partialDecoding) {
+                        DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
+                        length = MIN(length, (size_t)(oend-op));
+                    } else {
+                        goto _output_error;  /* end-of-block condition violated */
+                }   }
+
+                if (length <= (size_t)(lowPrefix-match)) {
+                    /* match fits entirely within external dictionary : just copy */
+                    memmove(op, dictEnd - (lowPrefix-match), length);
+                    op += length;
+                } else {
+                    /* match stretches into both external dictionary and current block */
+                    size_t const copySize = (size_t)(lowPrefix - match);
+                    size_t const restSize = length - copySize;
+                    LZ4_memcpy(op, dictEnd - copySize, copySize);
+                    op += copySize;
+                    if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
+                        BYTE* const endOfMatch = op + restSize;
+                        const BYTE* copyFrom = lowPrefix;
+                        while (op < endOfMatch) { *op++ = *copyFrom++; }
+                    } else {
+                        LZ4_memcpy(op, lowPrefix, restSize);
+                        op += restSize;
+                }   }
+                continue;
+            }
+
+            /* copy match within block */
+            cpy = op + length;
+
+            assert((op <= oend) && (oend-op >= 32));
+            if (unlikely(offset<16)) {
+                LZ4_memcpy_using_offset(op, match, cpy, offset);
+            } else {
+                LZ4_wildCopy32(op, match, cpy);
+            }
+
+            op = cpy;   /* wildcopy correction */
+        }
+    safe_decode:
+#endif
+
+        /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
+        while (1) {
+            token = *ip++;
+            length = token >> ML_BITS;  /* literal length */
+
+            assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
+
+            /* A two-stage shortcut for the most common case:
+             * 1) If the literal length is 0..14, and there is enough space,
+             * enter the shortcut and copy 16 bytes on behalf of the literals
+             * (in the fast mode, only 8 bytes can be safely copied this way).
+             * 2) Further if the match length is 4..18, copy 18 bytes in a similar
+             * manner; but we ensure that there's enough space in the output for
+             * those 18 bytes earlier, upon entering the shortcut (in other words,
+             * there is a combined check for both stages).
+             */
+            if ( (endOnInput ? length != RUN_MASK : length <= 8)
+                /* strictly "less than" on input, to re-enter the loop with at least one byte */
+              && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) {
+                /* Copy the literals */
+                LZ4_memcpy(op, ip, endOnInput ? 16 : 8);
+                op += length; ip += length;
+
+                /* The second stage: prepare for match copying, decode full info.
+                 * If it doesn't work out, the info won't be wasted. */
+                length = token & ML_MASK; /* match length */
+                offset = LZ4_readLE16(ip); ip += 2;
+                match = op - offset;
+                assert(match <= op); /* check overflow */
+
+                /* Do not deal with overlapping matches. */
+                if ( (length != ML_MASK)
+                  && (offset >= 8)
+                  && (dict==withPrefix64k || match >= lowPrefix) ) {
+                    /* Copy the match. */
+                    LZ4_memcpy(op + 0, match + 0, 8);
+                    LZ4_memcpy(op + 8, match + 8, 8);
+                    LZ4_memcpy(op +16, match +16, 2);
+                    op += length + MINMATCH;
+                    /* Both stages worked, load the next token. */
+                    continue;
+                }
+
+                /* The second stage didn't work out, but the info is ready.
+                 * Propel it right to the point of match copying. */
+                goto _copy_match;
+            }
+
+            /* decode literal length */
+            if (length == RUN_MASK) {
+                variable_length_error error = ok;
+                length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
+                if (error == initial_error) { goto _output_error; }
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
+                if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
            }

            /* copy literals */
            cpy = op+length;
-        if ( ((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
+#if LZ4_FAST_DEC_LOOP
+        safe_literal_copy:
+#endif
+            LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
+            if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) )
              || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
            {
+                /* We've either hit the input parsing restriction or the output parsing restriction.
+                 * In the normal scenario, decoding a full block, it must be the last sequence,
+                 * otherwise it's an error (invalid input or dimensions).
+                 * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
+                 */
                if (partialDecoding) {
-                if (cpy > oend) goto _output_error;                           /* Error : write attempt beyond end of output buffer */
-                if ((endOnInput) && (ip+length > iend)) goto _output_error;   /* Error : read attempt beyond end of input buffer */
+                    /* Since we are partial decoding we may be in this block because of the output parsing
+                     * restriction, which is not valid since the output buffer is allowed to be undersized.
+                     */
+                    assert(endOnInput);
+                    DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
+                    DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
+                    DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
+                    DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
+                    /* Finishing in the middle of a literals segment,
+                     * due to lack of input.
+                     */
+                    if (ip+length > iend) {
+                        length = (size_t)(iend-ip);
+                        cpy = op + length;
+                    }
+                    /* Finishing in the middle of a literals segment,
+                     * due to lack of output space.
+                     */
+                    if (cpy > oend) {
+                        cpy = oend;
+                        assert(op<=oend);
+                        length = (size_t)(oend-op);
+                    }
                } else {
-                if ((!endOnInput) && (cpy != oend)) goto _output_error;       /* Error : block decoding must stop exactly there */
-                if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error;   /* Error : input must be consumed */
+                    /* We must be on the last sequence because of the parsing limitations so check
+                     * that we exactly regenerate the original size (must be exact when !endOnInput).
+                     */
+                    if ((!endOnInput) && (cpy != oend)) { goto _output_error; }
+                     /* We must be on the last sequence (or invalid) because of the parsing limitations
+                      * so check that we exactly consume the input and don't overrun the output buffer.
+                      */
+                    if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) {
+                        DEBUGLOG(6, "should have been last run of literals")
+                        DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend);
+                        DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend);
+                        goto _output_error;
                    }
-            memcpy(op, ip, length);
+                }
+                memmove(op, ip, length);  /* supports overlapping memory regions; only matters for in-place decompression scenarios */
                ip += length;
                op += length;
-            break;     /* Necessarily EOF, due to parsing restrictions */
+                /* Necessarily EOF when !partialDecoding.
+                 * When partialDecoding, it is EOF if we've either
+                 * filled the output buffer or
+                 * can't proceed with reading an offset for following match.
+                 */
+                if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
+                    break;
                }
-        LZ4_wildCopy(op, ip, cpy);
+            } else {
+                LZ4_wildCopy8(op, ip, cpy);   /* may overwrite up to WILDCOPYLENGTH beyond cpy */
                ip += length; op = cpy;
+            }

            /* get offset */
            offset = LZ4_readLE16(ip); ip+=2;
            match = op - offset;
-        if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
-        LZ4_write32(op, (U32)offset);   /* costs ~1%; silence an msan warning when offset==0 */

            /* get matchlength */
            length = token & ML_MASK;
+
+    _copy_match:
            if (length == ML_MASK) {
-            unsigned s;
-            do {
-                s = *ip++;
-                if ((endOnInput) && (ip > iend-LASTLITERALS)) goto _output_error;
-                length += s;
-            } while (s==255);
+              variable_length_error error = ok;
+              length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
+              if (error != ok) goto _output_error;
                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
            }
            length += MINMATCH;

-        /* check external dictionary */
+#if LZ4_FAST_DEC_LOOP
+        safe_match_copy:
+#endif
+            if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
+            /* match starting within external dictionary */
            if ((dict==usingExtDict) && (match < lowPrefix)) {
-            if (unlikely(op+length > oend-LASTLITERALS)) goto _output_error;   /* doesn't respect parsing restriction */
+                if (unlikely(op+length > oend-LASTLITERALS)) {
+                    if (partialDecoding) length = MIN(length, (size_t)(oend-op));
+                    else goto _output_error;   /* doesn't respect parsing restriction */
+                }

                if (length <= (size_t)(lowPrefix-match)) {
-                /* match can be copied as a single segment from external dictionary */
+                    /* match fits entirely within external dictionary : just copy */
                    memmove(op, dictEnd - (lowPrefix-match), length);
                    op += length;
                } else {
-                /* match encompass external dictionary and current block */
-                size_t const copySize = (size_t)(lowPrefix-match);
+                    /* match stretches into both external dictionary and current block */
+                    size_t const copySize = (size_t)(lowPrefix - match);
                    size_t const restSize = length - copySize;
-                memcpy(op, dictEnd - copySize, copySize);
+                    LZ4_memcpy(op, dictEnd - copySize, copySize);
                    op += copySize;
-                if (restSize > (size_t)(op-lowPrefix)) {  /* overlap copy */
+                    if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
                        BYTE* const endOfMatch = op + restSize;
                        const BYTE* copyFrom = lowPrefix;
                        while (op < endOfMatch) *op++ = *copyFrom++;
                    } else {
-                    memcpy(op, lowPrefix, restSize);
+                        LZ4_memcpy(op, lowPrefix, restSize);
                        op += restSize;
                }   }
                continue;
            }
+            assert(match >= lowPrefix);

            /* copy match within block */
            cpy = op + length;
+
+            /* partialDecoding : may end anywhere within the block */
+            assert(op<=oend);
+            if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
+                size_t const mlen = MIN(length, (size_t)(oend-op));
+                const BYTE* const matchEnd = match + mlen;
+                BYTE* const copyEnd = op + mlen;
+                if (matchEnd > op) {   /* overlap copy */
+                    while (op < copyEnd) { *op++ = *match++; }
+                } else {
+                    LZ4_memcpy(op, match, mlen);
+                }
+                op = copyEnd;
+                if (op == oend) { break; }
+                continue;
+            }
+
            if (unlikely(offset<8)) {
-            const int dec64 = dec64table[offset];
+                LZ4_write32(op, 0);   /* silence msan warning when offset==0 */
                op[0] = match[0];
                op[1] = match[1];
                op[2] = match[2];
                op[3] = match[3];
-            match += dec32table[offset];
-            memcpy(op+4, match, 4);
-            match -= dec64;
-        } else { LZ4_copy8(op, match); match+=8; }
+                match += inc32table[offset];
+                LZ4_memcpy(op+4, match, 4);
+                match -= dec64table[offset];
+            } else {
+                LZ4_memcpy(op, match, 8);
+                match += 8;
+            }
            op += 8;

-        if (unlikely(cpy>oend-12)) {
-            BYTE* const oCopyLimit = oend-(WILDCOPYLENGTH-1);
-            if (cpy > oend-LASTLITERALS) goto _output_error;    /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
+            if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
+                BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
+                if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
                if (op < oCopyLimit) {
-                LZ4_wildCopy(op, match, oCopyLimit);
+                    LZ4_wildCopy8(op, match, oCopyLimit);
                    match += oCopyLimit - op;
                    op = oCopyLimit;
                }
-            while (op<cpy) *op++ = *match++;
+                while (op < cpy) { *op++ = *match++; }
            } else {
-            LZ4_copy8(op, match);
-            if (length>16) LZ4_wildCopy(op+8, match+8, cpy);
+                LZ4_memcpy(op, match, 8);
+                if (length > 16)  { LZ4_wildCopy8(op+8, match+8, cpy); }
            }
-        op=cpy;   /* correction */
+            op = cpy;   /* wildcopy correction */
        }

        /* end of decoding */
-    if (endOnInput)
+        if (endOnInput) {
+            DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst));
           return (int) (((char*)op)-dst);     /* Nb of output bytes decoded */
-    else
+       } else {
           return (int) (((const char*)ip)-src);   /* Nb of input bytes read */
+       }

        /* Overflow error detected */
-_output_error:
+    _output_error:
        return (int) (-(((const char*)ip)-src))-1;
+    }
 }


+/*===== Instantiate the API decoding functions. =====*/
+
+LZ4_FORCE_O2
 int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
 {
-    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, full, 0, noDict, (BYTE*)dest, NULL, 0);
+    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
+                                  endOnInputSize, decode_full_block, noDict,
+                                  (BYTE*)dest, NULL, 0);
 }

-int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize)
+LZ4_FORCE_O2
+int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity)
 {
-    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, partial, targetOutputSize, noDict, (BYTE*)dest, NULL, 0);
+    dstCapacity = MIN(targetOutputSize, dstCapacity);
+    return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity,
+                                  endOnInputSize, partial_decode,
+                                  noDict, (BYTE*)dst, NULL, 0);
 }

+LZ4_FORCE_O2
 int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
 {
-    return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)(dest - 64 KB), NULL, 64 KB);
+    return LZ4_decompress_generic(source, dest, 0, originalSize,
+                                  endOnOutputSize, decode_full_block, withPrefix64k,
+                                  (BYTE*)dest - 64 KB, NULL, 0);
+}
+
+/*===== Instantiate a few more decoding cases, used more than once. =====*/
+
+LZ4_FORCE_O2 /* Exported, an obsolete API function. */
+int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                  endOnInputSize, decode_full_block, withPrefix64k,
+                                  (BYTE*)dest - 64 KB, NULL, 0);
+}
+
+/* Another obsolete API function, paired with the previous one. */
+int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
+{
+    /* LZ4_decompress_fast doesn't validate match offsets,
+     * and thus serves well with any prefixed dictionary. */
+    return LZ4_decompress_fast(source, dest, originalSize);
+}
+
+LZ4_FORCE_O2
+static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize,
+                                               size_t prefixSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                  endOnInputSize, decode_full_block, noDict,
+                                  (BYTE*)dest-prefixSize, NULL, 0);
 }

+LZ4_FORCE_O2
+int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
+                                     int compressedSize, int maxOutputSize,
+                                     const void* dictStart, size_t dictSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                  endOnInputSize, decode_full_block, usingExtDict,
+                                  (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+}
+
+LZ4_FORCE_O2
+static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize,
+                                       const void* dictStart, size_t dictSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, originalSize,
+                                  endOnOutputSize, decode_full_block, usingExtDict,
+                                  (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+}
+
+/* The "double dictionary" mode, for use with e.g. ring buffers: the first part
+ * of the dictionary is passed as prefix, and the second via dictStart + dictSize.
+ * These routines are used only once, in LZ4_decompress_*_continue().
+ */
+LZ4_FORCE_INLINE
+int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize,
+                                   size_t prefixSize, const void* dictStart, size_t dictSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                  endOnInputSize, decode_full_block, usingExtDict,
+                                  (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
+}
+
+LZ4_FORCE_INLINE
+int LZ4_decompress_fast_doubleDict(const char* source, char* dest, int originalSize,
+                                   size_t prefixSize, const void* dictStart, size_t dictSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, originalSize,
+                                  endOnOutputSize, decode_full_block, usingExtDict,
+                                  (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
+}

 /*===== streaming decompression functions =====*/

 LZ4_streamDecode_t* LZ4_createStreamDecode(void)
 {
-    LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOCATOR(1, sizeof(LZ4_streamDecode_t));
+    LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t));
+    LZ4_STATIC_ASSERT(LZ4_STREAMDECODESIZE >= sizeof(LZ4_streamDecode_t_internal));    /* A compilation error here means LZ4_STREAMDECODESIZE is not large enough */
    return lz4s;
 }

 int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
 {
-    if (!LZ4_stream) return 0;   /* support free on NULL */
+    if (LZ4_stream == NULL) { return 0; }  /* support free on NULL */
    FREEMEM(LZ4_stream);
    return 0;
 }

-/*!
- * LZ4_setStreamDecode() :
+/*! LZ4_setStreamDecode() :
 *  Use this function to instruct where to find the dictionary.
 *  This function is not necessary if previous data is still available where it was decoded.
 *  Loading a size of 0 is allowed (same effect as no dictionary).
- * Return : 1 if OK, 0 if error
+ * @return : 1 if OK, 0 if error
 */
 int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
 {
@@ -1317,6 +2292,25 @@ int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dicti
    return 1;
 }

+/*! LZ4_decoderRingBufferSize() :
+ *  when setting a ring buffer for streaming decompression (optional scenario),
+ *  provides the minimum size of this ring buffer
+ *  to be compatible with any source respecting maxBlockSize condition.
+ *  Note : in a ring buffer scenario,
+ *  blocks are presumed decompressed next to each other.
+ *  When not enough space remains for next block (remainingSize < maxBlockSize),
+ *  decoding resumes from beginning of ring buffer.
+ * @return : minimum ring buffer size,
+ *           or 0 if there is an error (invalid maxBlockSize).
+ */
+int LZ4_decoderRingBufferSize(int maxBlockSize)
+{
+    if (maxBlockSize < 0) return 0;
+    if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0;
+    if (maxBlockSize < 16) maxBlockSize = 16;
+    return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize);
+}
+
 /*
 *_continue() :
    These decoding functions allow decompression of multiple blocks in "streaming" mode.
@@ -1324,52 +2318,75 @@ int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dicti
    If it's not possible, save the relevant part of decoded data into a safe buffer,
    and indicate where it stands using LZ4_setStreamDecode()
 */
+LZ4_FORCE_O2
 int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
 {
    LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
    int result;

-    if (lz4sd->prefixEnd == (BYTE*)dest) {
-        result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
-                                        endOnInputSize, full, 0,
-                                        usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+    if (lz4sd->prefixSize == 0) {
+        /* The first call, no dictionary yet. */
+        assert(lz4sd->extDictSize == 0);
+        result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
        if (result <= 0) return result;
-        lz4sd->prefixSize += result;
+        lz4sd->prefixSize = (size_t)result;
+        lz4sd->prefixEnd = (BYTE*)dest + result;
+    } else if (lz4sd->prefixEnd == (BYTE*)dest) {
+        /* They're rolling the current segment. */
+        if (lz4sd->prefixSize >= 64 KB - 1)
+            result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
+        else if (lz4sd->extDictSize == 0)
+            result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize,
+                                                         lz4sd->prefixSize);
+        else
+            result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize,
+                                                    lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize += (size_t)result;
        lz4sd->prefixEnd  += result;
    } else {
+        /* The buffer wraps around, or they're switching to another buffer. */
        lz4sd->extDictSize = lz4sd->prefixSize;
        lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
-        result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
-                                        endOnInputSize, full, 0,
-                                        usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize);
+        result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize,
+                                                  lz4sd->externalDict, lz4sd->extDictSize);
        if (result <= 0) return result;
-        lz4sd->prefixSize = result;
+        lz4sd->prefixSize = (size_t)result;
        lz4sd->prefixEnd  = (BYTE*)dest + result;
    }

    return result;
 }

+LZ4_FORCE_O2
 int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
 {
    LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
    int result;
+    assert(originalSize >= 0);

-    if (lz4sd->prefixEnd == (BYTE*)dest) {
-        result = LZ4_decompress_generic(source, dest, 0, originalSize,
-                                        endOnOutputSize, full, 0,
-                                        usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+    if (lz4sd->prefixSize == 0) {
+        assert(lz4sd->extDictSize == 0);
+        result = LZ4_decompress_fast(source, dest, originalSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = (size_t)originalSize;
+        lz4sd->prefixEnd = (BYTE*)dest + originalSize;
+    } else if (lz4sd->prefixEnd == (BYTE*)dest) {
+        if (lz4sd->prefixSize >= 64 KB - 1 || lz4sd->extDictSize == 0)
+            result = LZ4_decompress_fast(source, dest, originalSize);
+        else
+            result = LZ4_decompress_fast_doubleDict(source, dest, originalSize,
+                                                    lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
        if (result <= 0) return result;
-        lz4sd->prefixSize += originalSize;
+        lz4sd->prefixSize += (size_t)originalSize;
        lz4sd->prefixEnd  += originalSize;
    } else {
        lz4sd->extDictSize = lz4sd->prefixSize;
        lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
-        result = LZ4_decompress_generic(source, dest, 0, originalSize,
-                                        endOnOutputSize, full, 0,
-                                        usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize);
+        result = LZ4_decompress_fast_extDict(source, dest, originalSize,
+                                             lz4sd->externalDict, lz4sd->extDictSize);
        if (result <= 0) return result;
-        lz4sd->prefixSize = originalSize;
+        lz4sd->prefixSize = (size_t)originalSize;
        lz4sd->prefixEnd  = (BYTE*)dest + originalSize;
    }

@@ -1384,32 +2401,27 @@ Advanced decoding functions :
    the dictionary must be explicitly provided within parameters
 */

-LZ4_FORCE_INLINE int LZ4_decompress_usingDict_generic(const char* source, char* dest, int compressedSize, int maxOutputSize, int safe, const char* dictStart, int dictSize)
+int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
 {
    if (dictSize==0)
-        return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest, NULL, 0);
+        return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
    if (dictStart+dictSize == dest) {
-        if (dictSize >= (int)(64 KB - 1))
-            return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, withPrefix64k, (BYTE*)dest-64 KB, NULL, 0);
-        return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest-dictSize, NULL, 0);
+        if (dictSize >= 64 KB - 1) {
+            return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
        }
-    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
-}
-
-int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
-{
-    return LZ4_decompress_usingDict_generic(source, dest, compressedSize, maxOutputSize, 1, dictStart, dictSize);
+        assert(dictSize >= 0);
+        return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize);
+    }
+    assert(dictSize >= 0);
+    return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize);
 }

 int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
 {
-    return LZ4_decompress_usingDict_generic(source, dest, 0, originalSize, 0, dictStart, dictSize);
-}
-
-/* debug function */
-int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
-{
-    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+    if (dictSize==0 || dictStart+dictSize == dest)
+        return LZ4_decompress_fast(source, dest, originalSize);
+    assert(dictSize >= 0);
+    return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize);
 }


@@ -1417,64 +2429,67 @@ int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compres
 *  Obsolete Functions
 ***************************************************/
 /* obsolete compression functions */
-int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) { return LZ4_compress_default(source, dest, inputSize, maxOutputSize); }
-int LZ4_compress(const char* source, char* dest, int inputSize) { return LZ4_compress_default(source, dest, inputSize, LZ4_compressBound(inputSize)); }
-int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize) { return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1); }
-int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1); }
-int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, maxDstSize, 1); }
-int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize) { return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1); }
+int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    return LZ4_compress_default(source, dest, inputSize, maxOutputSize);
+}
+int LZ4_compress(const char* src, char* dest, int srcSize)
+{
+    return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize));
+}
+int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize)
+{
+    return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1);
+}
+int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize)
+{
+    return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1);
+}
+int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity)
+{
+    return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1);
+}
+int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize)
+{
+    return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1);
+}

 /*
-These function names are deprecated and should no longer be used.
+These decompression functions are deprecated and should no longer be used.
 They are only provided here for compatibility with older user programs.
 - LZ4_uncompress is totally equivalent to LZ4_decompress_fast
 - LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe
 */
-int LZ4_uncompress (const char* source, char* dest, int outputSize) { return LZ4_decompress_fast(source, dest, outputSize); }
-int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) { return LZ4_decompress_safe(source, dest, isize, maxOutputSize); }
-
+int LZ4_uncompress (const char* source, char* dest, int outputSize)
+{
+    return LZ4_decompress_fast(source, dest, outputSize);
+}
+int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize)
+{
+    return LZ4_decompress_safe(source, dest, isize, maxOutputSize);
+}

 /* Obsolete Streaming functions */

-int LZ4_sizeofStreamState() { return LZ4_STREAMSIZE; }
-
-static void LZ4_init(LZ4_stream_t* lz4ds, BYTE* base)
-{
-    MEM_INIT(lz4ds, 0, sizeof(LZ4_stream_t));
-    lz4ds->internal_donotuse.bufferStart = base;
-}
+int LZ4_sizeofStreamState(void) { return LZ4_STREAMSIZE; }

 int LZ4_resetStreamState(void* state, char* inputBuffer)
 {
-    if ((((uptrval)state) & 3) != 0) return 1;   /* Error : pointer is not aligned on 4-bytes boundary */
-    LZ4_init((LZ4_stream_t*)state, (BYTE*)inputBuffer);
+    (void)inputBuffer;
+    LZ4_resetStream((LZ4_stream_t*)state);
    return 0;
 }

 void* LZ4_create (char* inputBuffer)
 {
-    LZ4_stream_t* lz4ds = (LZ4_stream_t*)ALLOCATOR(8, sizeof(LZ4_stream_t));
-    LZ4_init (lz4ds, (BYTE*)inputBuffer);
-    return lz4ds;
-}
-
-char* LZ4_slideInputBuffer (void* LZ4_Data)
-{
-    LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)LZ4_Data)->internal_donotuse;
-    int dictSize = LZ4_saveDict((LZ4_stream_t*)LZ4_Data, (char*)ctx->bufferStart, 64 KB);
-    return (char*)(ctx->bufferStart + dictSize);
+    (void)inputBuffer;
+    return LZ4_createStream();
 }

-/* Obsolete streaming decompression functions */
-
-int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
-{
-    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB);
-}
-
-int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
+char* LZ4_slideInputBuffer (void* state)
 {
-    return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB);
+    /* avoid const char * -> char * conversion warning */
+    return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary;
 }

 #endif   /* LZ4_COMMONDEFS_ONLY */
--- a/documentation20/cn/03.architecture/docs.md
+++ b/documentation20/cn/03.architecture/docs.md
@@ -101,9 +101,9 @@
 </tbody>
 </table>

-<center> 表1：智能电表数据示例</center>
+<center> 表 1：智能电表数据示例</center>

-每一条记录都有设备ID，时间戳，采集的物理量（如上图中的电流、电压、相位），还有与每个设备相关的静态标签（如上述表1中的位置Location和分组groupId）。每个设备是受外界的触发，或按照设定的周期采集数据。采集的数据点是时序的，是一个数据流。
+每一条记录都有设备 ID，时间戳，采集的物理量（如上图中的电流、电压、相位），还有与每个设备相关的静态标签（如上述表1中的位置 Location 和分组 groupId）。每个设备是受外界的触发，或按照设定的周期采集数据。采集的数据点是时序的，是一个数据流。

 ### 数据特征

@@ -118,17 +118,17 @@
 7. 数据有保留期限；
 8. 数据的查询分析一定是基于时间段和空间区域；
 9. 除存储、查询操作外，还需要各种统计和实时计算操作；
-10. 数据量巨大，一天可能采集的数据就可以超过100亿条。
+10. 数据量巨大，一天可能采集的数据就可以超过 100 亿条。

 充分利用上述特征，TDengine 采取了经特殊优化的存储和计算设计来处理时序数据，它将系统处理能力显著提高，同时大幅降低了系统运维的复杂度。

 ### 关系型数据库模型

-因为采集的数据一般是结构化数据，同时为降低学习门槛，TDengine采用传统的关系型数据库模型管理数据。因此用户需要先创建库，然后创建表，之后才能插入或查询数据。TDengine采用的是结构化存储，而不是NoSQL的key-value存储。
+因为采集的数据一般是结构化数据，同时为降低学习门槛，TDengine 采用传统的关系型数据库模型管理数据。因此用户需要先创建库，然后创建表，之后才能插入或查询数据。TDengine 采用的是结构化存储，而不是 NoSQL 的 key-value 存储。

 ### 一个数据采集点一张表

-为充分利用其数据的时序性和其他数据特点，TDengine要求**对每个数据采集点单独建表**（比如有一千万个智能电表，就需创建一千万张表，上述表格中的d1001, d1002, d1003, d1004都需单独建表），用来存储这个采集点所采集的时序数据。这种设计有几大优点：
+为充分利用其数据的时序性和其他数据特点，TDengine 要求**对每个数据采集点单独建表**（比如有一千万个智能电表，就需创建一千万张表，上述表格中的 d1001, d1002, d1003, d1004 都需单独建表），用来存储这个采集点所采集的时序数据。这种设计有几大优点：

 1. 能保证一个采集点的数据在存储介质上是以块为单位连续存储的。如果读取一个时间段的数据，它能大幅减少随机读取操作，成数量级的提升读取和查询速度。
 2. 由于不同采集设备产生数据的过程完全独立，每个设备的数据源是唯一的，一张表也就只有一个写入者，这样就可采用无锁方式来写，写入速度就能大幅提升。
@@ -136,17 +136,17 @@

 如果采用传统的方式，将多个设备的数据写入一张表，由于网络延时不可控，不同设备的数据到达服务器的时序是无法保证的，写入操作是要有锁保护的，而且一个设备的数据是难以保证连续存储在一起的。**采用一个数据采集点一张表的方式，能最大程度的保证单个数据采集点的插入和查询的性能是最优的。**

-TDengine 建议用数据采集点的名字(如上表中的D1001)来做表名。每个数据采集点可能同时采集多个物理量(如上表中的curent, voltage, phase)，每个物理量对应一张表中的一列，数据类型可以是整型、浮点型、字符串等。除此之外，表的第一列必须是时间戳，即数据类型为 timestamp。对采集的数据，TDengine将自动按照时间戳建立索引，但对采集的物理量不建任何索引。数据用列式存储方式保存。
+TDengine 建议用数据采集点的名字(如上表中的 D1001)来做表名。每个数据采集点可能同时采集多个物理量(如上表中的 curent, voltage, phase)，每个物理量对应一张表中的一列，数据类型可以是整型、浮点型、字符串等。除此之外，表的第一列必须是时间戳，即数据类型为 timestamp。对采集的数据，TDengine 将自动按照时间戳建立索引，但对采集的物理量不建任何索引。数据用列式存储方式保存。

 ### 超级表：同一类型数据采集点的集合

-由于一个数据采集点一张表，导致表的数量巨增，难以管理，而且应用经常需要做采集点之间的聚合操作，聚合的操作也变得复杂起来。为解决这个问题，TDengine引入超级表(Super Table，简称为STable)的概念。
+由于一个数据采集点一张表，导致表的数量巨增，难以管理，而且应用经常需要做采集点之间的聚合操作，聚合的操作也变得复杂起来。为解决这个问题，TDengine 引入超级表(Super Table，简称为 STable)的概念。

-超级表是指某一特定类型的数据采集点的集合。同一类型的数据采集点，其表的结构是完全一样的，但每个表（数据采集点）的静态属性（标签）是不一样的。描述一个超级表（某一特定类型的数据采集点的集合），除需要定义采集量的表结构之外，还需要定义其标签的schema，标签的数据类型可以是整数、浮点数、字符串，标签可以有多个，可以事后增加、删除或修改。如果整个系统有N个不同类型的数据采集点，就需要建立N个超级表。
+超级表是指某一特定类型的数据采集点的集合。同一类型的数据采集点，其表的结构是完全一样的，但每个表（数据采集点）的静态属性（标签）是不一样的。描述一个超级表（某一特定类型的数据采集点的集合），除需要定义采集量的表结构之外，还需要定义其标签的 schema，标签的数据类型可以是整数、浮点数、字符串，标签可以有多个，可以事后增加、删除或修改。如果整个系统有 N 个不同类型的数据采集点，就需要建立 N 个超级表。

-在TDengine的设计里，**表用来代表一个具体的数据采集点，超级表用来代表一组相同类型的数据采集点集合**。当为某个具体数据采集点创建表时，用户使用超级表的定义做模板，同时指定该具体采集点（表）的标签值。与传统的关系型数据库相比，表（一个数据采集点）是带有静态标签的，而且这些标签可以事后增加、删除、修改。**一张超级表包含有多张表，这些表具有相同的时序数据schema，但带有不同的标签值**。
+在 TDengine 的设计里，**表用来代表一个具体的数据采集点，超级表用来代表一组相同类型的数据采集点集合**。当为某个具体数据采集点创建表时，用户使用超级表的定义做模板，同时指定该具体采集点（表）的标签值。与传统的关系型数据库相比，表（一个数据采集点）是带有静态标签的，而且这些标签可以事后增加、删除、修改。**一张超级表包含有多张表，这些表具有相同的时序数据 schema，但带有不同的标签值**。

-当对多个具有相同数据类型的数据采集点进行聚合操作时，TDengine会先把满足标签过滤条件的表从超级表中找出来，然后再扫描这些表的时序数据，进行聚合操作，这样需要扫描的数据集会大幅减少，从而显著提高聚合计算的性能。
+当对多个具有相同数据类型的数据采集点进行聚合操作时，TDengine 会先把满足标签过滤条件的表从超级表中找出来，然后再扫描这些表的时序数据，进行聚合操作，这样需要扫描的数据集会大幅减少，从而显著提高聚合计算的性能。

 ## <a class="anchor" id="cluster"></a>集群与基本逻辑单元

@@ -159,230 +159,230 @@ TDengine 分布式架构的逻辑结构图如下：
 ![TDengine架构示意图](../images/architecture/structure.png)
 <center> 图 1 TDengine架构示意图  </center>

-一个完整的 TDengine 系统是运行在一到多个物理节点上的，逻辑上，它包含数据节点(dnode)、TDengine应用驱动(taosc)以及应用(app)。系统中存在一到多个数据节点，这些数据节点组成一个集群(cluster)。应用通过taosc的API与TDengine集群进行互动。下面对每个逻辑单元进行简要介绍。
+一个完整的 TDengine 系统是运行在一到多个物理节点上的，逻辑上，它包含数据节点(dnode)、TDengine 应用驱动(taosc)以及应用(app)。系统中存在一到多个数据节点，这些数据节点组成一个集群(cluster)。应用通过 taosc 的 API 与 TDengine 集群进行互动。下面对每个逻辑单元进行简要介绍。

-**物理节点(pnode)：** pnode是一独立运行、拥有自己的计算、存储和网络能力的计算机，可以是安装有OS的物理机、虚拟机或Docker容器。物理节点由其配置的 FQDN(Fully Qualified Domain Name)来标识。TDengine完全依赖FQDN来进行网络通讯，如果不了解FQDN，请看博文[《一篇文章说清楚TDengine的FQDN》](https://www.taosdata.com/blog/2020/09/11/1824.html)。
+**物理节点(pnode)：** pnode 是一独立运行、拥有自己的计算、存储和网络能力的计算机，可以是安装有OS的物理机、虚拟机或 Docker 容器。物理节点由其配置的 FQDN (Fully Qualified Domain Name)来标识。TDengine 完全依赖 FQDN 来进行网络通讯，如果不了解 FQDN，请看博文[《一篇文章说清楚 TDengine 的 FQDN》](https://www.taosdata.com/blog/2020/09/11/1824.html)。

-**数据节点(dnode)：** dnode 是 TDengine 服务器侧执行代码 taosd 在物理节点上的一个运行实例，一个工作的系统必须有至少一个数据节点。dnode包含零到多个逻辑的虚拟节点(vnode)，零或者至多一个逻辑的管理节点(mnode)。dnode在系统中的唯一标识由实例的End Point (EP)决定。EP是dnode所在物理节点的FQDN (Fully Qualified Domain Name)和系统所配置的网络端口号(Port)的组合。通过配置不同的端口，一个物理节点(一台物理机、虚拟机或容器）可以运行多个实例，或有多个数据节点。
+**数据节点(dnode)：** dnode 是 TDengine 服务器侧执行代码 taosd 在物理节点上的一个运行实例，一个工作的系统必须有至少一个数据节点。dnode 包含零到多个逻辑的虚拟节点(vnode)，零或者至多一个逻辑的管理节点(mnode)。dnode 在系统中的唯一标识由实例的 End Point (EP)决定。EP 是 dnode 所在物理节点的 FQDN (Fully Qualified Domain Name)和系统所配置的网络端口号(Port)的组合。通过配置不同的端口，一个物理节点(一台物理机、虚拟机或容器）可以运行多个实例，或有多个数据节点。

-**虚拟节点(vnode)：** 为更好的支持数据分片、负载均衡，防止数据过热或倾斜，数据节点被虚拟化成多个虚拟节点(vnode，图中V2, V3, V4等)。每个 vnode 都是一个相对独立的工作单元，是时序数据存储的基本单元，具有独立的运行线程、内存空间与持久化存储的路径。一个 vnode 包含一定数量的表（数据采集点）。当创建一张新表时，系统会检查是否需要创建新的 vnode。一个数据节点上能创建的 vnode 的数量取决于该数据节点所在物理节点的硬件资源。一个 vnode 只属于一个DB，但一个DB可以有多个 vnode。一个 vnode 除存储的时序数据外，也保存有所包含的表的schema、标签值等。一个虚拟节点由所属的数据节点的EP，以及所属的VGroup ID在系统内唯一标识，由管理节点创建并管理。
+**虚拟节点(vnode)：** 为更好的支持数据分片、负载均衡，防止数据过热或倾斜，数据节点被虚拟化成多个虚拟节点(vnode，图中 V2, V3, V4等)。每个 vnode 都是一个相对独立的工作单元，是时序数据存储的基本单元，具有独立的运行线程、内存空间与持久化存储的路径。一个 vnode 包含一定数量的表（数据采集点）。当创建一张新表时，系统会检查是否需要创建新的 vnode。一个数据节点上能创建的 vnode 的数量取决于该数据节点所在物理节点的硬件资源。一个 vnode 只属于一个 DB，但一个 DB 可以有多个 vnode。一个 vnode 除存储的时序数据外，也保存有所包含的表的 schema、标签值等。一个虚拟节点由所属的数据节点的EP，以及所属的 VGroup ID 在系统内唯一标识，由管理节点创建并管理。

-**管理节点(mnode)：** 一个虚拟的逻辑单元，负责所有数据节点运行状态的监控和维护，以及节点之间的负载均衡(图中M)。同时，管理节点也负责元数据(包括用户、数据库、表、静态标签等)的存储和管理，因此也称为 Meta Node。TDengine 集群中可配置多个(开源版最多不超过3个) mnode，它们自动构建成为一个虚拟管理节点组(图中M0, M1, M2)。mnode 间采用 master/slave 的机制进行管理，而且采取强一致方式进行数据同步, 任何数据更新操作只能在 Master 上进行。mnode 集群的创建由系统自动完成，无需人工干预。每个dnode上至多有一个mnode，由所属的数据节点的EP来唯一标识。每个dnode通过内部消息交互自动获取整个集群中所有 mnode 所在的 dnode 的EP。
+**管理节点(mnode)：** 一个虚拟的逻辑单元，负责所有数据节点运行状态的监控和维护，以及节点之间的负载均衡(图中 M)。同时，管理节点也负责元数据(包括用户、数据库、表、静态标签等)的存储和管理，因此也称为 Meta Node。TDengine 集群中可配置多个(开源版最多不超过 3 个) mnode，它们自动构建成为一个虚拟管理节点组(图中 M0, M1, M2)。mnode 间采用 master/slave 的机制进行管理，而且采取强一致方式进行数据同步, 任何数据更新操作只能在 Master 上进行。mnode 集群的创建由系统自动完成，无需人工干预。每个 dnode 上至多有一个 mnode，由所属的数据节点的EP来唯一标识。每个 dnode 通过内部消息交互自动获取整个集群中所有 mnode 所在的 dnode 的EP。

-**虚拟节点组(VGroup)：** 不同数据节点上的 vnode 可以组成一个虚拟节点组(vnode group)来保证系统的高可靠。虚拟节点组内采取master/slave的方式进行管理。写操作只能在 master vnode 上进行，系统采用异步复制的方式将数据同步到 slave vnode，这样确保了一份数据在多个物理节点上有拷贝。一个 vgroup 里虚拟节点个数就是数据的副本数。如果一个DB的副本数为N，系统必须有至少N个数据节点。副本数在创建DB时通过参数 replica 可以指定，缺省为1。使用 TDengine 的多副本特性，可以不再需要昂贵的磁盘阵列等存储设备，就可以获得同样的数据高可靠性。虚拟节点组由管理节点创建、管理，并且由管理节点分配一个系统唯一的ID，VGroup ID。如果两个虚拟节点的vnode group ID相同，说明他们属于同一个组，数据互为备份。虚拟节点组里虚拟节点的个数是可以动态改变的，容许只有一个，也就是没有数据复制。VGroup ID是永远不变的，即使一个虚拟节点组被删除，它的ID也不会被收回重复利用。
+**虚拟节点组(VGroup)：** 不同数据节点上的 vnode 可以组成一个虚拟节点组(vnode group)来保证系统的高可靠。虚拟节点组内采取 master/slave 的方式进行管理。写操作只能在 master vnode 上进行，系统采用异步复制的方式将数据同步到 slave vnode，这样确保了一份数据在多个物理节点上有拷贝。一个 vgroup 里虚拟节点个数就是数据的副本数。如果一个 DB 的副本数为 N，系统必须有至少 N 数据节点。副本数在创建DB时通过参数 replica 可以指定，缺省为 1。使用 TDengine 的多副本特性，可以不再需要昂贵的磁盘阵列等存储设备，就可以获得同样的数据高可靠性。虚拟节点组由管理节点创建、管理，并且由管理节点分配一个系统唯一的 ID，VGroup ID。如果两个虚拟节点的 vnode group ID 相同，说明他们属于同一个组，数据互为备份。虚拟节点组里虚拟节点的个数是可以动态改变的，容许只有一个，也就是没有数据复制。VGroup ID 是永远不变的，即使一个虚拟节点组被删除，它的ID也不会被收回重复利用。

-**TAOSC：** taosc是TDengine给应用提供的驱动程序(driver)，负责处理应用与集群的接口交互，提供C/C++语言原生接口，内嵌于JDBC、C#、Python、Go、Node.js语言连接库里。应用都是通过taosc而不是直接连接集群中的数据节点与整个集群进行交互的。这个模块负责获取并缓存元数据；将插入、查询等请求转发到正确的数据节点；在把结果返回给应用时，还需要负责最后一级的聚合、排序、过滤等操作。对于JDBC、C/C++、C#、Python、Go、Node.js接口而言，这个模块是在应用所处的物理节点上运行。同时，为支持全分布式的RESTful接口，taosc在TDengine集群的每个dnode上都有一运行实例。
+**TAOSC：** taosc 是 TDengine 给应用提供的驱动程序(driver)，负责处理应用与集群的接口交互，提供 C/C++ 语言原生接口，内嵌于 JDBC、C#、Python、Go、Node.js 语言连接库里。应用都是通过 taosc 而不是直接连接集群中的数据节点与整个集群进行交互的。这个模块负责获取并缓存元数据；将插入、查询等请求转发到正确的数据节点；在把结果返回给应用时，还需要负责最后一级的聚合、排序、过滤等操作。对于 JDBC、C/C++、C#、Python、Go、Node.js 接口而言，这个模块是在应用所处的物理节点上运行。同时，为支持全分布式的 RESTful 接口，taosc 在 TDengine 集群的每个 dnode 上都有一运行实例。

 ### 节点之间的通讯

-**通讯方式：**TDengine系统的各个数据节点之间，以及应用驱动与各数据节点之间的通讯是通过TCP/UDP进行的。因为考虑到物联网场景，数据写入的包一般不大，因此TDengine 除采用TCP做传输之外，还采用UDP方式，因为UDP 更加高效，而且不受连接数的限制。TDengine实现了自己的超时、重传、确认等机制，以确保UDP的可靠传输。对于数据量不到15K的数据包，采取UDP的方式进行传输，超过15K的，或者是查询类的操作，自动采取TCP的方式进行传输。同时，TDengine根据配置和数据包，会自动对数据进行压缩/解压缩，数字签名/认证等处理。对于数据节点之间的数据复制，只采用TCP方式进行数据传输。
+**通讯方式：**TDengine 系统的各个数据节点之间，以及应用驱动与各数据节点之间的通讯是通过 TCP/UDP 进行的。因为考虑到物联网场景，数据写入的包一般不大，因此 TDengine 除采用 TCP 做传输之外，还采用 UDP 方式，因为 UDP 更加高效，而且不受连接数的限制。TDengine 实现了自己的超时、重传、确认等机制，以确保 UDP 的可靠传输。对于数据量不到15K的数据包，采取 UDP 的方式进行传输，超过 15K 的，或者是查询类的操作，自动采取 TCP 的方式进行传输。同时，TDengine 根据配置和数据包，会自动对数据进行压缩/解压缩，数字签名/认证等处理。对于数据节点之间的数据复制，只采用 TCP 方式进行数据传输。

-**FQDN配置：**一个数据节点有一个或多个FQDN，可以在系统配置文件taos.cfg通过参数"fqdn"进行指定，如果没有指定，系统将自动获取计算机的hostname作为其FQDN。如果节点没有配置FQDN，可以直接将该节点的配置参数fqdn设置为它的IP地址。但不建议使用IP，因为IP地址可变，一旦变化，将让集群无法正常工作。一个数据节点的EP(End Point)由FQDN + Port组成。采用FQDN，需要保证DNS服务正常工作，或者在节点以及应用所在的节点配置好hosts文件。另外，这个参数值的长度需要控制在 96 个字符以内。
+**FQDN配置：**一个数据节点有一个或多个 FQDN，可以在系统配置文件 taos.cfg 通过参数"fqdn"进行指定，如果没有指定，系统将自动获取计算机的 hostname 作为其 FQDN。如果节点没有配置 FQDN，可以直接将该节点的配置参数 fqdn 设置为它的IP地址。但不建议使用 IP，因为 IP 地址可变，一旦变化，将让集群无法正常工作。一个数据节点的 EP(End Point) 由 FQDN + Port 组成。采用 FQDN，需要保证 DNS 服务正常工作，或者在节点以及应用所在的节点配置好 hosts 文件。另外，这个参数值的长度需要控制在 96 个字符以内。

-**端口配置：**一个数据节点对外的端口由TDengine的系统配置参数serverPort决定，对集群内部通讯的端口是serverPort+5。为支持多线程高效的处理UDP数据，每个对内和对外的UDP连接，都需要占用5个连续的端口。
+**端口配置：**一个数据节点对外的端口由 TDengine 的系统配置参数 serverPort 决定，对集群内部通讯的端口是 serverPort+5。为支持多线程高效的处理 UDP 数据，每个对内和对外的 UDP 连接，都需要占用5个连续的端口。

- 集群内数据节点之间的数据复制操作占用一个TCP端口，是serverPort+10。
- 集群数据节点对外提供RESTful服务占用一个TCP端口，是serverPort+11。
- 集群内数据节点与Arbitrator节点之间通讯占用一个TCP端口，是serverPort+12。
+- 集群内数据节点之间的数据复制操作占用一个 TCP 端口，是 serverPort+10。
+- 集群数据节点对外提供 RESTful 服务占用一个 TCP 端口，是 serverPort+11。
+- 集群内数据节点与 Arbitrator 节点之间通讯占用一个 TCP 端口，是 serverPort+12。

-因此一个数据节点总的端口范围为serverPort到serverPort+12，总共13个TCP/UDP端口。使用时，需要确保防火墙将这些端口打开。每个数据节点可以配置不同的serverPort。（详细的端口情况请参见 [TDengine 2.0 端口说明](https://www.taosdata.com/cn/documentation/faq#port)）
+因此一个数据节点总的端口范围为 serverPort 到 serverPort+12，总共 13 个 TCP/UDP 端口。使用时，需要确保防火墙将这些端口打开。每个数据节点可以配置不同的 serverPort。（详细的端口情况请参见 [TDengine 2.0 端口说明](https://www.taosdata.com/cn/documentation/faq#port)）

-**集群对外连接：**TDengine集群可以容纳单个、多个甚至几千个数据节点。应用只需要向集群中任何一个数据节点发起连接即可，连接需要提供的网络参数是一数据节点的End Point(FQDN加配置的端口号）。通过命令行CLI启动应用taos时，可以通过选项-h来指定数据节点的FQDN, -P来指定其配置的端口号，如果端口不配置，将采用TDengine的系统配置参数serverPort。
+**集群对外连接：**TDengine 集群可以容纳单个、多个甚至几千个数据节点。应用只需要向集群中任何一个数据节点发起连接即可，连接需要提供的网络参数是一数据节点的 End Point(FQDN加配置的端口号）。通过命令行CLI启动应用 taos 时，可以通过选项-h来指定数据节点的 FQDN, -P 来指定其配置的端口号，如果端口不配置，将采用 TDengine 的系统配置参数 serverPort。

-**集群内部通讯：**各个数据节点之间通过TCP/UDP进行连接。一个数据节点启动时，将获取mnode所在的dnode的EP信息，然后与系统中的mnode建立起连接，交换信息。获取mnode的EP信息有三步：
+**集群内部通讯：**各个数据节点之间通过 TCP/UDP 进行连接。一个数据节点启动时，将获取 mnode 所在的 dnode 的 EP 信息，然后与系统中的 mnode 建立起连接，交换信息。获取 mnode 的 EP 信息有三步：

-1. 检查mnodeEpSet.json文件是否存在，如果不存在或不能正常打开获得mnode EP信息，进入第二步；
-2. 检查系统配置文件taos.cfg，获取节点配置参数firstEp、secondEp（这两个参数指定的节点可以是不带mnode的普通节点，这样的话，节点被连接时会尝试重定向到mnode节点），如果不存在或者taos.cfg里没有这两个配置参数，或无效，进入第三步；
-3. 将自己的EP设为mnode EP，并独立运行起来。
+1. 检查 mnodeEpSet.json 文件是否存在，如果不存在或不能正常打开获得 mnode EP 信息，进入第二步；
+2. 检查系统配置文件 taos.cfg，获取节点配置参数 firstEp、secondEp（这两个参数指定的节点可以是不带 mnode 的普通节点，这样的话，节点被连接时会尝试重定向到 mnode 节点），如果不存在或者 taos.cfg 里没有这两个配置参数，或无效，进入第三步；
+3. 将自己的EP设为 mnode EP，并独立运行起来。

-获取mnode EP列表后，数据节点发起连接，如果连接成功，则成功加入进工作的集群，如果不成功，则尝试mnode EP列表中的下一个。如果都尝试了，但连接都仍然失败，则休眠几秒后，再进行尝试。
+获取 mnode EP 列表后，数据节点发起连接，如果连接成功，则成功加入进工作的集群，如果不成功，则尝试 mnode EP 列表中的下一个。如果都尝试了，但连接都仍然失败，则休眠几秒后，再进行尝试。

-**MNODE的选择：**TDengine逻辑上有管理节点，但没有单独的执行代码，服务器侧只有一套执行代码taosd。那么哪个数据节点会是管理节点呢？这是系统自动决定的，无需任何人工干预。原则如下：一个数据节点启动时，会检查自己的End Point, 并与获取的mnode EP List进行比对，如果在其中，该数据节点认为自己应该启动mnode模块，成为mnode。如果自己的EP不在mnode EP List里，则不启动mnode模块。在系统的运行过程中，由于负载均衡、宕机等原因，mnode有可能迁移至新的dnode，但一切都是透明的，无需人工干预，配置参数的修改，是mnode自己根据资源做出的决定。
+**MNODE的选择：**TDengine 逻辑上有管理节点，但没有单独的执行代码，服务器侧只有一套执行代码 taosd。那么哪个数据节点会是管理节点呢？这是系统自动决定的，无需任何人工干预。原则如下：一个数据节点启动时，会检查自己的 End Point, 并与获取的 mnode EP List 进行比对，如果在其中，该数据节点认为自己应该启动 mnode 模块，成为 mnode。如果自己的 EP 不在 mnode EP List 里，则不启动 mnode 模块。在系统的运行过程中，由于负载均衡、宕机等原因，mnode 有可能迁移至新的 dnode，但一切都是透明的，无需人工干预，配置参数的修改，是 mnode 自己根据资源做出的决定。

-**新数据节点的加入：**系统有了一个数据节点后，就已经成为一个工作的系统。添加新的节点进集群时，有两个步骤，第一步：使用TDengine CLI连接到现有工作的数据节点，然后用命令”create dnode"将新的数据节点的End Point添加进去; 第二步：在新的数据节点的系统配置参数文件taos.cfg里，将firstEp, secondEp参数设置为现有集群中任意两个数据节点的EP即可。具体添加的详细步骤请见详细的用户手册。这样就把集群一步一步的建立起来。
+**新数据节点的加入：**系统有了一个数据节点后，就已经成为一个工作的系统。添加新的节点进集群时，有两个步骤，第一步：使用 TDengine CLI 连接到现有工作的数据节点，然后用命令"create dnode"将新的数据节点的 End Point 添加进去; 第二步：在新的数据节点的系统配置参数文件 taos.cfg 里，将 firstEp, secondEp 参数设置为现有集群中任意两个数据节点的 EP 即可。具体添加的详细步骤请见详细的用户手册。这样就把集群一步一步的建立起来。

-**重定向：**无论是dnode还是taosc，最先都是要发起与mnode的连接，但mnode是系统自动创建并维护的，因此对于用户来说，并不知道哪个dnode在运行mnode。TDengine只要求向系统中任何一个工作的dnode发起连接即可。因为任何一个正在运行的dnode，都维护有目前运行的mnode EP List。当收到一个来自新启动的dnode或taosc的连接请求，如果自己不是mnode，则将mnode EP List回复给对方，taosc或新启动的dnode收到这个list, 就重新尝试建立连接。当mnode EP List发生改变，通过节点之间的消息交互，各个数据节点就很快获取最新列表，并通知taosc。
+**重定向：**无论是 dnode 还是 taosc，最先都是要发起与 mnode 的连接，但 mnode 是系统自动创建并维护的，因此对于用户来说，并不知道哪个 dnode 在运行 mnode。TDengine 只要求向系统中任何一个工作的 dnode 发起连接即可。因为任何一个正在运行的 dnode，都维护有目前运行的 mnode EP List。当收到一个来自新启动的 dnode 或 taosc 的连接请求，如果自己不是 mnode，则将 mnode EP List 回复给对方，taosc 或新启动的 dnode 收到这个 list, 就重新尝试建立连接。当 mnode EP List 发生改变，通过节点之间的消息交互，各个数据节点就很快获取最新列表，并通知 taosc。

 ### 一个典型的消息流程

-为解释vnode、mnode、taosc和应用之间的关系以及各自扮演的角色，下面对写入数据这个典型操作的流程进行剖析。
+为解释 vnode、mnode、taosc 和应用之间的关系以及各自扮演的角色，下面对写入数据这个典型操作的流程进行剖析。

 ![TDengine典型的操作流程](../images/architecture/message.png)
-<center> 图 2 TDengine典型的操作流程 </center>
+<center> 图 2 TDengine 典型的操作流程 </center>

-1. 应用通过JDBC、ODBC或其他API接口发起插入数据的请求。
-2. taosc会检查缓存，看是否保存有该表的meta data。如果有，直接到第4步。如果没有，taosc将向mnode发出get meta-data请求。
-3. mnode将该表的meta-data返回给taosc。Meta-data包含有该表的schema, 而且还有该表所属的vgroup信息（vnode ID以及所在的dnode的End Point，如果副本数为N，就有N组End Point)。如果taosc迟迟得不到mnode回应，而且存在多个mnode, taosc将向下一个mnode发出请求。
-4. taosc向master vnode发起插入请求。
-5. vnode插入数据后，给taosc一个应答，表示插入成功。如果taosc迟迟得不到vnode的回应，taosc会认为该节点已经离线。这种情况下，如果被插入的数据库有多个副本，taosc将向vgroup里下一个vnode发出插入请求。
-6. taosc通知APP，写入成功。
+1. 应用通过 JDBC、ODBC 或其他API接口发起插入数据的请求。
+2. taosc 会检查缓存，看是否保存有该表的 meta data。如果有，直接到第 4 步。如果没有，taosc 将向 mnode 发出 get meta-data 请求。
+3. mnode 将该表的 meta-data 返回给 taosc。Meta-data 包含有该表的 schema, 而且还有该表所属的 vgroup信息（vnode ID 以及所在的 dnode 的 End Point，如果副本数为 N，就有 N 组 End Point)。如果 taosc 迟迟得不到 mnode 回应，而且存在多个 mnode, taosc 将向下一个 mnode 发出请求。
+4. taosc 向 master vnode 发起插入请求。
+5. vnode 插入数据后，给 taosc 一个应答，表示插入成功。如果 taosc 迟迟得不到 vnode 的回应，taosc 会认为该节点已经离线。这种情况下，如果被插入的数据库有多个副本，taosc 将向 vgroup 里下一个 vnode 发出插入请求。
+6. taosc 通知 APP，写入成功。

-对于第二和第三步，taosc启动时，并不知道mnode的End Point，因此会直接向配置的集群对外服务的End Point发起请求。如果接收到该请求的dnode并没有配置mnode，该dnode会在回复的消息中告知mnode EP列表，这样taosc会重新向新的mnode的EP发出获取meta-data的请求。
+对于第二和第三步，taosc 启动时，并不知道 mnode 的 End Point，因此会直接向配置的集群对外服务的 End Point 发起请求。如果接收到该请求的 dnode 并没有配置 mnode，该 dnode 会在回复的消息中告知mnode EP 列表，这样 taosc 会重新向新的 mnode 的 EP 发出获取 meta-data 的请求。

-对于第四和第五步，没有缓存的情况下，taosc无法知道虚拟节点组里谁是master，就假设第一个vnodeID就是master,向它发出请求。如果接收到请求的vnode并不是master,它会在回复中告知谁是master，这样taosc就向建议的master vnode发出请求。一旦得到插入成功的回复，taosc会缓存master节点的信息。
+对于第四和第五步，没有缓存的情况下，taosc 无法知道虚拟节点组里谁是 master，就假设第一个 vnodeID 就是 master,向它发出请求。如果接收到请求的 vnode 并不是 master,它会在回复中告知谁是 master，这样 taosc 就向建议的 master vnode 发出请求。一旦得到插入成功的回复，taosc 会缓存 master 节点的信息。

-上述是插入数据的流程，查询、计算的流程也完全一致。taosc把这些复杂的流程全部封装屏蔽了，对于应用来说无感知也无需任何特别处理。
+上述是插入数据的流程，查询、计算的流程也完全一致。taosc 把这些复杂的流程全部封装屏蔽了，对于应用来说无感知也无需任何特别处理。

-通过taosc缓存机制，只有在第一次对一张表操作时，才需要访问mnode,因此mnode不会成为系统瓶颈。但因为schema有可能变化，而且vgroup有可能发生改变（比如负载均衡发生），因此taosc会定时和mnode交互，自动更新缓存。
+通过 taosc 缓存机制，只有在第一次对一张表操作时，才需要访问 mnode，因此 mnode 不会成为系统瓶颈。但因为 schema 有可能变化，而且 vgroup 有可能发生改变（比如负载均衡发生），因此 taosc 会定时和mnode 交互，自动更新缓存。

 ## <a class="anchor" id="sharding"></a>存储模型与数据分区、分片

 ### 存储模型

-TDengine存储的数据包括采集的时序数据以及库、表相关的元数据、标签数据等，这些数据具体分为三部分：
+TDengine 存储的数据包括采集的时序数据以及库、表相关的元数据、标签数据等，这些数据具体分为三部分：

- 时序数据：存放于vnode里，由data、head和last三个文件组成，数据量大，查询量取决于应用场景。容许乱序写入，但暂时不支持删除操作，并且仅在update参数设置为1时允许更新操作。通过采用一个采集点一张表的模型，一个时间段的数据是连续存储，对单张表的写入是简单的追加操作，一次读，可以读到多条记录，这样保证对单个采集点的插入和查询操作，性能达到最优。
- 标签数据：存放于vnode里的meta文件，支持增删改查四个标准操作。数据量不大，有N张表，就有N条记录，因此可以全内存存储。如果标签过滤操作很多，查询将十分频繁，因此TDengine支持多核多线程并发查询。只要计算资源足够，即使有数千万张表，过滤结果能毫秒级返回。
- 元数据：存放于mnode里，包含系统节点、用户、DB、Table Schema等信息，支持增删改查四个标准操作。这部分数据的量不大，可以全内存保存，而且由于客户端有缓存，查询量也不大。因此目前的设计虽是集中式存储管理，但不会构成性能瓶颈。
+- 时序数据：存放于 vnode 里，由 data、head 和 last 三个文件组成，数据量大，查询量取决于应用场景。容许乱序写入，但暂时不支持删除操作，并且仅在 update 参数设置为 1 时允许更新操作。通过采用一个采集点一张表的模型，一个时间段的数据是连续存储，对单张表的写入是简单的追加操作，一次读，可以读到多条记录，这样保证对单个采集点的插入和查询操作，性能达到最优。
+- 标签数据：存放于 vnode 里的 meta 文件，支持增删改查四个标准操作。数据量不大，有 N 张表，就有 N 条记录，因此可以全内存存储。如果标签过滤操作很多，查询将十分频繁，因此 TDengine 支持多核多线程并发查询。只要计算资源足够，即使有数千万张表，过滤结果能毫秒级返回。
+- 元数据：存放于 mnode 里，包含系统节点、用户、DB、Table Schema 等信息，支持增删改查四个标准操作。这部分数据的量不大，可以全内存保存，而且由于客户端有缓存，查询量也不大。因此目前的设计虽是集中式存储管理，但不会构成性能瓶颈。

-与典型的NoSQL存储模型相比，TDengine将标签数据与时序数据完全分离存储，它具有两大优势：
+与典型的 NoSQL 存储模型相比，TDengine 将标签数据与时序数据完全分离存储，它具有两大优势：

- 能够极大地降低标签数据存储的冗余度：一般的NoSQL数据库或时序数据库，采用的K-V存储，其中的Key包含时间戳、设备ID、各种标签。每条记录都带有这些重复的内容，浪费存储空间。而且如果应用要在历史数据上增加、修改或删除标签，需要遍历数据，重写一遍，操作成本极其昂贵。
+- 能够极大地降低标签数据存储的冗余度：一般的 NoSQL 数据库或时序数据库，采用的 K-V 存储，其中的 Key 包含时间戳、设备 ID、各种标签。每条记录都带有这些重复的内容，浪费存储空间。而且如果应用要在历史数据上增加、修改或删除标签，需要遍历数据，重写一遍，操作成本极其昂贵。
 - 能够实现极为高效的多表之间的聚合查询：做多表之间聚合查询时，先把符合标签过滤条件的表查找出来，然后再查找这些表相应的数据块，这样大幅减少要扫描的数据集，从而大幅提高查询效率。而且标签数据采用全内存的结构进行管理和维护，千万级别规模的标签数据查询可以在毫秒级别返回。

 ### 数据分片

-对于海量的数据管理，为实现水平扩展，一般都需要采取分片(Sharding)分区(Partitioning)策略。TDengine是通过vnode来实现数据分片的，通过一个时间段一个数据文件来实现时序数据分区的。
+对于海量的数据管理，为实现水平扩展，一般都需要采取分片(Sharding)分区(Partitioning)策略。TDengine 是通过 vnode 来实现数据分片的，通过一个时间段一个数据文件来实现时序数据分区的。

-vnode(虚拟数据节点)负责为采集的时序数据提供写入、查询和计算功能。为便于负载均衡、数据恢复、支持异构环境，TDengine将一个数据节点根据其计算和存储资源切分为多个vnode。这些vnode的管理是TDengine自动完成的，对应用完全透明。
+vnode(虚拟数据节点)负责为采集的时序数据提供写入、查询和计算功能。为便于负载均衡、数据恢复、支持异构环境，TDengine 将一个数据节点根据其计算和存储资源切分为多个 vnode。这些 vnode 的管理是TDengine 自动完成的，对应用完全透明。

-对于单独一个数据采集点，无论其数据量多大，一个vnode（或vnode group, 如果副本数大于1）有足够的计算资源和存储资源来处理（如果每秒生成一条16字节的记录，一年产生的原始数据不到0.5G），因此TDengine将一张表（一个数据采集点）的所有数据都存放在一个vnode里，而不会让同一个采集点的数据分布到两个或多个dnode上。而且一个vnode可存储多个数据采集点(表）的数据，一个vnode可容纳的表的数目的上限为一百万。设计上，一个vnode里所有的表都属于同一个DB。一个数据节点上，除非特殊配置，一个DB拥有的vnode数目不会超过系统核的数目。
+对于单独一个数据采集点，无论其数据量多大，一个 vnode（或 vnode group, 如果副本数大于 1）有足够的计算资源和存储资源来处理（如果每秒生成一条 16 字节的记录，一年产生的原始数据不到 0.5G），因此 TDengine 将一张表（一个数据采集点）的所有数据都存放在一个 vnode 里，而不会让同一个采集点的数据分布到两个或多个 dnode 上。而且一个 vnode 可存储多个数据采集点(表）的数据，一个 vnode 可容纳的表的数目的上限为一百万。设计上，一个 vnode 里所有的表都属于同一个 DB。一个数据节点上，除非特殊配置，一个 DB 拥有的 vnode 数目不会超过系统核的数目。

-创建DB时，系统并不会马上分配资源。但当创建一张表时，系统将看是否有已经分配的vnode, 且该vnode是否有空余的表空间，如果有，立即在该有空位的vnode创建表。如果没有，系统将从集群中，根据当前的负载情况，在一个dnode上创建一新的vnode, 然后创建表。如果DB有多个副本，系统不是只创建一个vnode，而是一个vgroup(虚拟数据节点组)。系统对vnode的数目没有任何限制，仅仅受限于物理节点本身的计算和存储资源。
+创建 DB 时，系统并不会马上分配资源。但当创建一张表时，系统将看是否有已经分配的 vnode, 且该 vnode 是否有空余的表空间，如果有，立即在该有空位的 vnode 创建表。如果没有，系统将从集群中，根据当前的负载情况，在一个 dnode 上创建一新的 vnode, 然后创建表。如果DB有多个副本，系统不是只创建一个 vnode，而是一个 vgroup (虚拟数据节点组)。系统对 vnode 的数目没有任何限制，仅仅受限于物理节点本身的计算和存储资源。

-每张表的meta data（包含schema, 标签等）也存放于vnode里，而不是集中存放于mnode，实际上这是对Meta数据的分片，这样便于高效并行的进行标签过滤操作。
+每张表的 meta data（包含 schema, 标签等）也存放于 vnode 里，而不是集中存放于 mnode，实际上这是对 Meta 数据的分片，这样便于高效并行的进行标签过滤操作。

 ### 数据分区

-TDengine除vnode分片之外，还对时序数据按照时间段进行分区。每个数据文件只包含一个时间段的时序数据，时间段的长度由DB的配置参数days决定。这种按时间段分区的方法还便于高效实现数据的保留策略，只要数据文件超过规定的天数（系统配置参数keep)，将被自动删除。而且不同的时间段可以存放于不同的路径和存储介质，以便于大数据的冷热管理，实现多级存储。
+TDengine 除 vnode 分片之外，还对时序数据按照时间段进行分区。每个数据文件只包含一个时间段的时序数据，时间段的长度由 DB 的配置参数 days 决定。这种按时间段分区的方法还便于高效实现数据的保留策略，只要数据文件超过规定的天数（系统配置参数 keep)，将被自动删除。而且不同的时间段可以存放于不同的路径和存储介质，以便于大数据的冷热管理，实现多级存储。

-总的来说，**TDengine是通过vnode以及时间两个维度，对大数据进行切分**，便于并行高效的管理，实现水平扩展。
+总的来说，**TDengine 是通过 vnode 以及时间两个维度，对大数据进行切分**，便于并行高效的管理，实现水平扩展。

 ### 负载均衡

-每个dnode都定时向 mnode(虚拟管理节点)报告其状态（包括硬盘空间、内存大小、CPU、网络、虚拟节点个数等），因此mnode了解整个集群的状态。基于整体状态，当mnode发现某个dnode负载过重，它会将dnode上的一个或多个vnode挪到其他dnode。在挪动过程中，对外服务继续进行，数据插入、查询和计算操作都不受影响。
+每个 dnode 都定时向 mnode(虚拟管理节点)报告其状态（包括硬盘空间、内存大小、CPU、网络、虚拟节点个数等），因此 mnode 了解整个集群的状态。基于整体状态，当 mnode 发现某个dnode负载过重，它会将dnode 上的一个或多个 vnode 挪到其他 dnode。在挪动过程中，对外服务继续进行，数据插入、查询和计算操作都不受影响。

-如果mnode一段时间没有收到dnode的状态报告，mnode会认为这个dnode已经离线。如果离线时间超过一定时长（时长由配置参数offlineThreshold决定），该dnode将被mnode强制剔除出集群。该dnode上的vnodes如果副本数大于1，系统将自动在其他dnode上创建新的副本，以保证数据的副本数。如果该dnode上还有mnode, 而且mnode的副本数大于1，系统也将自动在其他dnode上创建新的mnode, 以保证mnode的副本数。
+如果 mnode 一段时间没有收到 dnode 的状态报告，mnode 会认为这个 dnode 已经离线。如果离线时间超过一定时长（时长由配置参数 offlineThreshold 决定），该 dnode 将被 mnode 强制剔除出集群。该dnode 上的 vnodes 如果副本数大于 1，系统将自动在其他 dnode 上创建新的副本，以保证数据的副本数。如果该 dnode 上还有 mnode, 而且 mnode 的副本数大于1，系统也将自动在其他 dnode 上创建新的 mnode, 以保证 mnode 的副本数。

 当新的数据节点被添加进集群，因为新的计算和存储被添加进来，系统也将自动启动负载均衡流程。

 负载均衡过程无需任何人工干预，应用也无需重启，将自动连接新的节点，完全透明。

-**提示：负载均衡由参数balance控制，决定开启/关闭自动负载均衡。**
+**提示：负载均衡由参数 balance 控制，决定开启/关闭自动负载均衡。**

 ## <a class="anchor" id="replication"></a>数据写入与复制流程

-如果一个数据库有N个副本，那一个虚拟节点组就有N个虚拟节点，但是只有一个是master，其他都是slave。当应用将新的记录写入系统时，只有master vnode能接受写的请求。如果slave vnode收到写的请求，系统将通知taosc需要重新定向。
+如果一个数据库有 N 个副本，那一个虚拟节点组就有 N 个虚拟节点，但是只有一个是 master，其他都是 slave。当应用将新的记录写入系统时，只有 master vnode 能接受写的请求。如果 slave vnode 收到写的请求，系统将通知 taosc 需要重新定向。

-### Master Vnode写入流程
+### Master Vnode 写入流程

-Master Vnode遵循下面的写入流程：
+Master Vnode 遵循下面的写入流程：

 ![TDengine Master写入流程](../images/architecture/write_master.png)
-<center> 图 3 TDengine Master写入流程  </center>
+<center> 图 3 TDengine Master 写入流程  </center>

-1. master vnode收到应用的数据插入请求，验证OK，进入下一步；
-2. 如果系统配置参数walLevel大于0，vnode将把该请求的原始数据包写入数据库日志文件WAL。如果walLevel设置为2，而且fsync设置为0，TDengine还将WAL数据立即落盘，以保证即使宕机，也能从数据库日志文件中恢复数据，避免数据的丢失；
-3. 如果有多个副本，vnode将把数据包转发给同一虚拟节点组内的slave vnodes, 该转发包带有数据的版本号(version)；
-4. 写入内存，并将记录加入到skip list；
-5. master vnode返回确认信息给应用，表示写入成功。
-6. 如果第2，3，4步中任何一步失败，将直接返回错误给应用。
+1. master vnode 收到应用的数据插入请求，验证OK，进入下一步；
+2. 如果系统配置参数 walLevel 大于 0，vnode 将把该请求的原始数据包写入数据库日志文件 WAL。如果 walLevel 设置为 2，而且 fsync 设置为 0，TDengine 还将 WAL 数据立即落盘，以保证即使宕机，也能从数据库日志文件中恢复数据，避免数据的丢失；
+3. 如果有多个副本，vnode 将把数据包转发给同一虚拟节点组内的 slave vnodes, 该转发包带有数据的版本号(version)；
+4. 写入内存，并将记录加入到 skip list；
+5. master vnode 返回确认信息给应用，表示写入成功。
+6. 如果第 2、3、4 步中任何一步失败，将直接返回错误给应用。

-### Slave Vnode写入流程
+### Slave Vnode 写入流程

-对于slave vnode，写入流程是：
+对于 slave vnode，写入流程是：

-![TDengine Slave写入流程](../images/architecture/write_slave.png)
-<center> 图 4 TDengine Slave写入流程  </center>
+![TDengine Slave 写入流程](../images/architecture/write_slave.png)
+<center> 图 4 TDengine Slave 写入流程  </center>

-1. slave vnode收到Master vnode转发了的数据插入请求。检查last version是否与master一致，如果一致，进入下一步。如果不一致，需要进入同步状态。
-2. 如果系统配置参数walLevel大于0，vnode将把该请求的原始数据包写入数据库日志文件WAL。如果walLevel设置为2，而且fsync设置为0，TDengine还将WAL数据立即落盘，以保证即使宕机，也能从数据库日志文件中恢复数据，避免数据的丢失。
-3. 写入内存，更新内存中的skip list。
+1. slave vnode 收到 Master vnode 转发了的数据插入请求。检查 last version 是否与 master 一致，如果一致，进入下一步。如果不一致，需要进入同步状态。
+2. 如果系统配置参数 walLevel 大于 0，vnode 将把该请求的原始数据包写入数据库日志文件 WAL。如果 walLevel 设置为 2，而且 fsync 设置为 0，TDengine 还将 WAL 数据立即落盘，以保证即使宕机，也能从数据库日志文件中恢复数据，避免数据的丢失。
+3. 写入内存，更新内存中的 skip list。

-与master vnode相比，slave vnode不存在转发环节，也不存在回复确认环节，少了两步。但写内存与WAL是完全一样的。
+与 master vnode 相比，slave vnode 不存在转发环节，也不存在回复确认环节，少了两步。但写内存与 WAL 是完全一样的。

 ### 异地容灾、IDC迁移

-从上述master和slave流程可以看出，TDengine采用的是异步复制的方式进行数据同步。这种方式能够大幅提高写入性能，网络延时对写入速度不会有大的影响。通过配置每个物理节点的IDC和机架号，可以保证对于一个虚拟节点组，虚拟节点由来自不同IDC、不同机架的物理节点组成，从而实现异地容灾。因此TDengine原生支持异地容灾，无需再使用其他工具。
+从上述 master 和 slave 流程可以看出，TDengine 采用的是异步复制的方式进行数据同步。这种方式能够大幅提高写入性能，网络延时对写入速度不会有大的影响。通过配置每个物理节点的IDC和机架号，可以保证对于一个虚拟节点组，虚拟节点由来自不同 IDC、不同机架的物理节点组成，从而实现异地容灾。因此 TDengine 原生支持异地容灾，无需再使用其他工具。

-另一方面，TDengine支持动态修改副本数，一旦副本数增加，新加入的虚拟节点将立即进入数据同步流程，同步结束后，新加入的虚拟节点即可提供服务。而在同步过程中，master以及其他已经同步的虚拟节点都可以对外提供服务。利用这一特性，TDengine可以实现无服务中断的IDC机房迁移。只需要将新IDC的物理节点加入现有集群，等数据同步完成后，再将老的IDC的物理节点从集群中剔除即可。
+另一方面，TDengine 支持动态修改副本数，一旦副本数增加，新加入的虚拟节点将立即进入数据同步流程，同步结束后，新加入的虚拟节点即可提供服务。而在同步过程中，master 以及其他已经同步的虚拟节点都可以对外提供服务。利用这一特性，TDengine 可以实现无服务中断的 IDC 机房迁移。只需要将新 IDC 的物理节点加入现有集群，等数据同步完成后，再将老的 IDC 的物理节点从集群中剔除即可。

 但是，这种异步复制的方式，存在极小的时间窗口，丢失写入的数据。具体场景如下：

-1. master vnode完成了它的5步操作，已经给APP确认写入成功，然后宕机
-2. slave vnode收到写入请求后，在第2步写入日志之前，处理失败
-3. slave vnode将成为新的master，从而丢失了一条记录
+1. master vnode 完成了它的 5 步操作，已经给 APP 确认写入成功，然后宕机
+2. slave vnode 收到写入请求后，在第 2 步写入日志之前，处理失败
+3. slave vnode 将成为新的 master，从而丢失了一条记录

-理论上，只要是异步复制，就无法保证100%不丢失。但是这个窗口极小，master与slave要同时发生故障，而且发生在刚给应用确认写入成功之后。
+理论上，只要是异步复制，就无法保证 100% 不丢失。但是这个窗口极小，master 与 slave 要同时发生故障，而且发生在刚给应用确认写入成功之后。

 ### 主从选择

-Vnode会保持一个数据版本号(version)，对内存数据进行持久化存储时，对该版本号也进行持久化存储。每个数据更新操作，无论是采集的时序数据还是元数据，这个版本号将增加1。
+Vnode 会保持一个数据版本号(version)，对内存数据进行持久化存储时，对该版本号也进行持久化存储。每个数据更新操作，无论是采集的时序数据还是元数据，这个版本号将增加 1。

-一个vnode启动时，角色(master、slave) 是不定的，数据是处于未同步状态，它需要与虚拟节点组内其他节点建立TCP连接，并互相交换status，其中包括version和自己的角色。通过status的交换，系统进入选主流程，规则如下：
+一个 vnode 启动时，角色(master、slave) 是不定的，数据是处于未同步状态，它需要与虚拟节点组内其他节点建立 TCP 连接，并互相交换 status，其中包括 version 和自己的角色。通过 status 的交换，系统进入选主流程，规则如下：

-1. 如果只有一个副本，该副本永远就是master
-2. 所有副本都在线时，版本最高的被选为master
-3. 在线的虚拟节点数过半，而且有虚拟节点是slave的话，该虚拟节点自动成为master
-4. 对于2和3，如果多个虚拟节点满足成为master的要求，那么虚拟节点组的节点列表里，最前面的选为master
+1. 如果只有一个副本，该副本永远就是 master
+2. 所有副本都在线时，版本最高的被选为 master
+3. 在线的虚拟节点数过半，而且有虚拟节点是 slave 的话，该虚拟节点自动成为 master
+4. 对于 2 和 3，如果多个虚拟节点满足成为 master 的要求，那么虚拟节点组的节点列表里，最前面的选为 master

-更多的关于数据复制的流程，请见[TDengine 2.0数据复制模块设计](https://www.taosdata.com/cn/documentation/architecture/replica/)。
+更多的关于数据复制的流程，请见[TDengine 2.0 数据复制模块设计](https://www.taosdata.com/cn/documentation/architecture/replica/)。

 ### 同步复制

-对于数据一致性要求更高的场景，异步数据复制无法满足要求，因为有极小的概率丢失数据，因此TDengine提供同步复制的机制供用户选择。在创建数据库时，除指定副本数replica之外，用户还需要指定新的参数quorum。如果quorum大于1，它表示每次master转发给副本时，需要等待quorum-1个回复确认，才能通知应用，数据在slave已经写入成功。如果在一定的时间内，得不到quorum-1个回复确认，master vnode将返回错误给应用。
+对于数据一致性要求更高的场景，异步数据复制无法满足要求，因为有极小的概率丢失数据，因此 TDengine 提供同步复制的机制供用户选择。在创建数据库时，除指定副本数 replica 之外，用户还需要指定新的参数 quorum。如果 quorum 大于1，它表示每次master转发给副本时，需要等待 quorum-1 个回复确认，才能通知应用，数据在 slave 已经写入成功。如果在一定的时间内，得不到 quorum-1 个回复确认，master vnode 将返回错误给应用。

-采用同步复制，系统的性能会有所下降，而且latency会增加。因为元数据要强一致，mnode之间的数据同步缺省就是采用的同步复制。
+采用同步复制，系统的性能会有所下降，而且 latency 会增加。因为元数据要强一致，mnode 之间的数据同步缺省就是采用的同步复制。

 ## <a class="anchor" id="persistence"></a>缓存与持久化

 ### 缓存

-TDengine采用时间驱动缓存管理策略（First-In-First-Out，FIFO），又称为写驱动的缓存管理机制。这种策略有别于读驱动的数据缓存模式（Least-Recent-Used，LRU），直接将最近写入的数据保存在系统的缓存中。当缓存达到临界值的时候，将最早的数据批量写入磁盘。一般意义上来说，对于物联网数据的使用，用户最为关心的是刚产生的数据，即当前状态。TDengine充分利用这一特性，将最近到达的（当前状态）数据保存在缓存中。
+TDengine 采用时间驱动缓存管理策略（First-In-First-Out，FIFO），又称为写驱动的缓存管理机制。这种策略有别于读驱动的数据缓存模式（Least-Recent-Used，LRU），直接将最近写入的数据保存在系统的缓存中。当缓存达到临界值的时候，将最早的数据批量写入磁盘。一般意义上来说，对于物联网数据的使用，用户最为关心的是刚产生的数据，即当前状态。TDengine 充分利用这一特性，将最近到达的（当前状态）数据保存在缓存中。

-TDengine通过查询函数向用户提供毫秒级的数据获取能力。直接将最近到达的数据保存在缓存中，可以更加快速地响应用户针对最近一条或一批数据的查询分析，整体上提供更快的数据库查询响应能力。从这个意义上来说，**可通过设置合适的配置参数将TDengine作为数据缓存来使用，而不需要再部署Redis或其他额外的缓存系统**，可有效地简化系统架构，降低运维的成本。需要注意的是，TDengine重启以后系统的缓存将被清空，之前缓存的数据均会被批量写入磁盘，缓存的数据将不会像专门的key-value缓存系统再将之前缓存的数据重新加载到缓存中。
+TDengine 通过查询函数向用户提供毫秒级的数据获取能力。直接将最近到达的数据保存在缓存中，可以更加快速地响应用户针对最近一条或一批数据的查询分析，整体上提供更快的数据库查询响应能力。从这个意义上来说，**可通过设置合适的配置参数将 TDengine 作为数据缓存来使用，而不需要再部署 Redis 或其他额外的缓存系统**，可有效地简化系统架构，降低运维的成本。需要注意的是，TDengine 重启以后系统的缓存将被清空，之前缓存的数据均会被批量写入磁盘，缓存的数据将不会像专门的 key-value 缓存系统再将之前缓存的数据重新加载到缓存中。

-每个vnode有自己独立的内存，而且由多个固定大小的内存块组成，不同vnode之间完全隔离。数据写入时，类似于日志的写法，数据被顺序追加写入内存，但每个vnode维护有自己的skip list，便于迅速查找。当三分之一以上的内存块写满时，启动落盘操作，而且后续写的操作在新的内存块进行。这样，一个vnode里有三分之一内存块是保留有最近的数据的，以达到缓存、快速查找的目的。一个vnode的内存块的个数由配置参数blocks决定，内存块的大小由配置参数cache决定。
+每个 vnode 有自己独立的内存，而且由多个固定大小的内存块组成，不同 vnode 之间完全隔离。数据写入时，类似于日志的写法，数据被顺序追加写入内存，但每个 vnode 维护有自己的 skip list，便于迅速查找。当三分之一以上的内存块写满时，启动落盘操作，而且后续写的操作在新的内存块进行。这样，一个 vnode 里有三分之一内存块是保留有最近的数据的，以达到缓存、快速查找的目的。一个 vnode 的内存块的个数由配置参数 blocks 决定，内存块的大小由配置参数 cache 决定。

 ### 持久化存储

-TDengine采用数据驱动的方式让缓存中的数据写入硬盘进行持久化存储。当vnode中缓存的数据达到一定规模时，为了不阻塞后续数据的写入，TDengine也会拉起落盘线程将缓存的数据写入持久化存储。TDengine在数据落盘时会打开新的数据库日志文件，在落盘成功后则会删除老的数据库日志文件，避免日志文件无限制地增长。
+TDengine 采用数据驱动的方式让缓存中的数据写入硬盘进行持久化存储。当 vnode 中缓存的数据达到一定规模时，为了不阻塞后续数据的写入，TDengine 也会拉起落盘线程将缓存的数据写入持久化存储。TDengine 在数据落盘时会打开新的数据库日志文件，在落盘成功后则会删除老的数据库日志文件，避免日志文件无限制地增长。

-为充分利用时序数据特点，TDengine将一个vnode保存在持久化存储的数据切分成多个文件，每个文件只保存固定天数的数据，这个天数由系统配置参数days决定。切分成多个文件后，给定查询的起止日期，无需任何索引，就可以立即定位需要打开哪些数据文件，大大加快读取速度。
+为充分利用时序数据特点，TDengine 将一个 vnode 保存在持久化存储的数据切分成多个文件，每个文件只保存固定天数的数据，这个天数由系统配置参数 days 决定。切分成多个文件后，给定查询的起止日期，无需任何索引，就可以立即定位需要打开哪些数据文件，大大加快读取速度。

-对于采集的数据，一般有保留时长，这个时长由系统配置参数keep决定。超过这个设置天数的数据文件，将被系统自动删除，释放存储空间。
+对于采集的数据，一般有保留时长，这个时长由系统配置参数 keep 决定。超过这个设置天数的数据文件，将被系统自动删除，释放存储空间。

-给定days与keep两个参数，一个典型工作状态的vnode中总的数据文件数为：`向上取整(keep/days)+1`个。总的数据文件个数不宜过大，也不宜过小。10到100以内合适。基于这个原则，可以设置合理的days。目前的版本，参数keep可以修改，但对于参数days，一旦设置后，不可修改。
+给定 days 与 keep 两个参数，一个典型工作状态的 vnode 中总的数据文件数为：`向上取整(keep/days)+1`个。总的数据文件个数不宜过大，也不宜过小。10到100以内合适。基于这个原则，可以设置合理的 days。目前的版本，参数 keep 可以修改，但对于参数 days，一旦设置后，不可修改。

-在每个数据文件里，一张表的数据是一块一块存储的。一张表可以有一到多个数据文件块。在一个文件块里，数据是列式存储的，占用的是一片连续的存储空间，这样大大提高读取速度。文件块的大小由系统参数maxRows（每块最大记录条数）决定，缺省值为4096。这个值不宜过大，也不宜过小。过大，定位具体时间段的数据的搜索时间会变长，影响读取速度；过小，数据块的索引太大，压缩效率偏低，也影响读取速度。
+在每个数据文件里，一张表的数据是一块一块存储的。一张表可以有一到多个数据文件块。在一个文件块里，数据是列式存储的，占用的是一片连续的存储空间，这样大大提高读取速度。文件块的大小由系统参数 maxRows （每块最大记录条数）决定，缺省值为 4096。这个值不宜过大，也不宜过小。过大，定位具体时间段的数据的搜索时间会变长，影响读取速度；过小，数据块的索引太大，压缩效率偏低，也影响读取速度。

-每个数据文件(.data结尾)都有一个对应的索引文件（.head结尾），该索引文件对每张表都有一数据块的摘要信息，记录了每个数据块在数据文件中的偏移量，数据的起止时间等信息，以帮助系统迅速定位需要查找的数据。每个数据文件还有一对应的last文件(.last结尾)，该文件是为防止落盘时数据块碎片化而设计的。如果一张表落盘的记录条数没有达到系统配置参数minRows（每块最小记录条数），将被先存储到last文件，等下次落盘时，新落盘的记录将与last文件的记录进行合并，再写入数据文件。
+每个数据文件(.data 结尾)都有一个对应的索引文件（.head 结尾），该索引文件对每张表都有一数据块的摘要信息，记录了每个数据块在数据文件中的偏移量，数据的起止时间等信息，以帮助系统迅速定位需要查找的数据。每个数据文件还有一对应的 last 文件(.last 结尾)，该文件是为防止落盘时数据块碎片化而设计的。如果一张表落盘的记录条数没有达到系统配置参数 minRows（每块最小记录条数），将被先存储到 last 文件，等下次落盘时，新落盘的记录将与 last 文件的记录进行合并，再写入数据文件。

-数据写入磁盘时，根据系统配置参数comp决定是否压缩数据。TDengine提供了三种压缩选项：无压缩、一阶段压缩和两阶段压缩，分别对应comp值为0、1和2的情况。一阶段压缩根据数据的类型进行了相应的压缩，压缩算法包括delta-delta编码、simple 8B方法、zig-zag编码、LZ4等算法。二阶段压缩在一阶段压缩的基础上又用通用压缩算法进行了压缩，压缩率更高。
+数据写入磁盘时，根据系统配置参数 comp 决定是否压缩数据。TDengine 提供了三种压缩选项：无压缩、一阶段压缩和两阶段压缩，分别对应 comp 值为 0、1 和 2 的情况。一阶段压缩根据数据的类型进行了相应的压缩，压缩算法包括 delta-delta 编码、simple 8B 方法、zig-zag 编码、LZ4 等算法。二阶段压缩在一阶段压缩的基础上又用通用压缩算法进行了压缩，压缩率更高。

 ### 多级存储

 说明：多级存储功能仅企业版支持，从 2.0.16.0 版本开始提供。

-在默认配置下，TDengine会将所有数据保存在/var/lib/taos目录下，而且每个vnode的数据文件保存在该目录下的不同目录。为扩大存储空间，尽量减少文件读取的瓶颈，提高数据吞吐率 TDengine可通过配置系统参数dataDir让多个挂载的硬盘被系统同时使用。
+在默认配置下，TDengine 会将所有数据保存在 /var/lib/taos 目录下，而且每个 vnode 的数据文件保存在该目录下的不同目录。为扩大存储空间，尽量减少文件读取的瓶颈，提高数据吞吐率 TDengine 可通过配置系统参数 dataDir 让多个挂载的硬盘被系统同时使用。

-除此之外，TDengine也提供了数据分级存储的功能，将不同时间段的数据存储在挂载的不同介质上的目录里，从而实现不同“热度”的数据存储在不同的存储介质上，充分利用存储，节约成本。比如，最新采集的数据需要经常访问，对硬盘的读取性能要求高，那么用户可以配置将这些数据存储在SSD盘上。超过一定期限的数据，查询需求量没有那么高，那么可以存储在相对便宜的HDD盘上。
+除此之外，TDengine 也提供了数据分级存储的功能，将不同时间段的数据存储在挂载的不同介质上的目录里，从而实现不同“热度”的数据存储在不同的存储介质上，充分利用存储，节约成本。比如，最新采集的数据需要经常访问，对硬盘的读取性能要求高，那么用户可以配置将这些数据存储在 SSD 盘上。超过一定期限的数据，查询需求量没有那么高，那么可以存储在相对便宜的 HDD 盘上。

-多级存储支持3级，每级最多可配置16个挂载点。
+多级存储支持3级，每级最多可配置 16 个挂载点。

-TDengine多级存储配置方式如下（在配置文件/etc/taos/taos.cfg中）：
+TDengine 多级存储配置方式如下（在配置文件/etc/taos/taos.cfg中）：

 ```
 dataDir [path] <level> <primary>
 ```

 -  path: 挂载点的文件夹路径
-  level: 介质存储等级，取值为0，1，2。   
-   0级存储最新的数据，1级存储次新的数据，2级存储最老的数据，省略默认为0。   
-   各级存储之间的数据流向：0级存储 -> 1级存储 -> 2级存储。   
+-  level: 介质存储等级，取值为 0，1，2。   
+   0级存储最新的数据，1级存储次新的数据，2级存储最老的数据，省略默认为 0。   
+   各级存储之间的数据流向：0 级存储 -> 1 级存储 -> 2 级存储。   
   同一存储等级可挂载多个硬盘，同一存储等级上的数据文件分布在该存储等级的所有硬盘上。   
   需要说明的是，数据在不同级别的存储介质上的移动，是由系统自动完成的，用户无需干预。
-  primary: 是否为主挂载点，0（是）或1（否），省略默认为1。
+-  primary: 是否为主挂载点，0（是）或 1（否），省略默认为 1。

 在配置中，只允许一个主挂载点的存在（level=0, primary=0），例如采用如下的配置方式：

@@ -396,56 +396,56 @@ dataDir /mnt/data6 2 1
 ```

 注意：
-1. 多级存储不允许跨级配置，合法的配置方案有：仅0级，仅0级+1级，以及0级+1级+2级。而不允许只配置level=0和level=2，而不配置level=1。
+1. 多级存储不允许跨级配置，合法的配置方案有：仅 0 级，仅 0 级+ 1 级，以及 0 级+ 1 级+ 2 级。而不允许只配置 level=0 和 level=2，而不配置 level=1。
 2. 禁止手动移除使用中的挂载盘，挂载盘目前不支持非本地的网络盘。
 3. 多级存储目前不支持删除已经挂载的硬盘的功能。

 ## <a class="anchor" id="query"></a>数据查询

-TDengine提供了多种多样针对表和超级表的查询处理功能，除了常规的聚合查询之外，还提供针对时序数据的窗口查询、统计聚合等功能。TDengine的查询处理需要客户端、vnode、mnode节点协同完成。
+TDengine 提供了多种多样针对表和超级表的查询处理功能，除了常规的聚合查询之外，还提供针对时序数据的窗口查询、统计聚合等功能。TDengine 的查询处理需要客户端、vnode、mnode 节点协同完成。

 ### 单表查询

-SQL语句的解析和校验工作在客户端完成。解析SQL语句并生成抽象语法树(Abstract Syntax Tree, AST)，然后对其进行校验和检查。以及向管理节点(mnode)请求查询中指定表的元数据信息(table metadata)。
+SQL 语句的解析和校验工作在客户端完成。解析 SQL 语句并生成抽象语法树(Abstract Syntax Tree, AST)，然后对其进行校验和检查。以及向管理节点(mnode)请求查询中指定表的元数据信息(table metadata)。

-根据元数据信息中的End Point信息，将查询请求序列化后发送到该表所在的数据节点（dnode）。dnode接收到查询请求后，识别出该查询请求指向的虚拟节点（vnode），将消息转发到vnode的查询执行队列。vnode的查询执行线程建立基础的查询执行环境，并立即返回该查询请求，同时开始执行该查询。
+根据元数据信息中的 End Point 信息，将查询请求序列化后发送到该表所在的数据节点（dnode）。dnode 接收到查询请求后，识别出该查询请求指向的虚拟节点（vnode），将消息转发到 vnode 的查询执行队列。vnode 的查询执行线程建立基础的查询执行环境，并立即返回该查询请求，同时开始执行该查询。

-客户端在获取查询结果的时候，dnode的查询执行队列中的工作线程会等待vnode执行线程执行完成，才能将查询结果返回到请求的客户端。
+客户端在获取查询结果的时候，dnode 的查询执行队列中的工作线程会等待 vnode 执行线程执行完成，才能将查询结果返回到请求的客户端。

 ### 按时间轴聚合、降采样、插值

 时序数据有别于普通数据的显著特征是每条记录均具有时间戳，因此针对具有时间戳的数据在时间轴上进行聚合是不同于普通数据库的重要功能。从这点上来看，与流计算引擎的窗口查询有相似的地方。

-在TDengine中引入关键词interval来进行时间轴上固定长度时间窗口的切分，并按照时间窗口对数据进行聚合，对窗口范围内的数据按需进行聚合。例如：
+在 TDengine 中引入关键词 interval 来进行时间轴上固定长度时间窗口的切分，并按照时间窗口对数据进行聚合，对窗口范围内的数据按需进行聚合。例如：
 ```sql
 SELECT COUNT(*) FROM d1001 INTERVAL(1h);
 ```

-针对d1001设备采集的数据，按照1小时的时间窗口返回每小时存储的记录数量。
+针对 d1001 设备采集的数据，按照1小时的时间窗口返回每小时存储的记录数量。

-在需要连续获得查询结果的应用场景下，如果给定的时间区间存在数据缺失，会导致该区间数据结果也丢失。TDengine提供策略针对时间轴聚合计算的结果进行插值，通过使用关键词fill就能够对时间轴聚合结果进行插值。例如：
+在需要连续获得查询结果的应用场景下，如果给定的时间区间存在数据缺失，会导致该区间数据结果也丢失。TDengine 提供策略针对时间轴聚合计算的结果进行插值，通过使用关键词 fill 就能够对时间轴聚合结果进行插值。例如：
 ```sql
 SELECT COUNT(*) FROM d1001 WHERE ts >= '2017-7-14 00:00:00' AND ts < '2017-7-14 23:59:59' INTERVAL(1h) FILL(PREV);
 ```

-针对d1001设备采集数据统计每小时记录数，如果某一个小时不存在数据，则返回之前一个小时的统计数据。TDengine提供前向插值(prev)、线性插值(linear)、NULL值填充(NULL)、特定值填充(value)。
+针对 d1001 设备采集数据统计每小时记录数，如果某一个小时不存在数据，则返回之前一个小时的统计数据。TDengine 提供前向插值(prev)、线性插值(linear)、NULL值填充(NULL)、特定值填充(value)。

 ### 多表聚合查询

-TDengine对每个数据采集点单独建表，但在实际应用中经常需要对不同的采集点数据进行聚合。为高效的进行聚合操作，TDengine引入超级表（STable）的概念。超级表用来代表一特定类型的数据采集点，它是包含多张表的表集合，集合里每张表的模式（schema）完全一致，但每张表都带有自己的静态标签，标签可以有多个，可以随时增加、删除和修改。应用可通过指定标签的过滤条件，对一个STable下的全部或部分表进行聚合或统计操作，这样大大简化应用的开发。其具体流程如下图所示：
+TDengine 对每个数据采集点单独建表，但在实际应用中经常需要对不同的采集点数据进行聚合。为高效的进行聚合操作，TDengine 引入超级表（STable）的概念。超级表用来代表一特定类型的数据采集点，它是包含多张表的表集合，集合里每张表的模式（schema）完全一致，但每张表都带有自己的静态标签，标签可以有多个，可以随时增加、删除和修改。应用可通过指定标签的过滤条件，对一个 STable 下的全部或部分表进行聚合或统计操作，这样大大简化应用的开发。其具体流程如下图所示：

 ![多表聚合查询原理图](../images/architecture/multi_tables.png)
 <center> 图 5 多表聚合查询原理图  </center>

 1. 应用将一个查询条件发往系统；
-2. taosc将超级表的名字发往 meta node（管理节点)；
+2. taosc 将超级表的名字发往 meta node（管理节点)；
 3. 管理节点将超级表所拥有的 vnode 列表发回 taosc；
-4. taosc将计算的请求连同标签过滤条件发往这些vnode对应的多个数据节点；
-5. 每个vnode先在内存里查找出自己节点里符合标签过滤条件的表的集合，然后扫描存储的时序数据，完成相应的聚合计算，将结果返回给taosc；
-6. taosc将多个数据节点返回的结果做最后的聚合，将其返回给应用。 
+4. taosc 将计算的请求连同标签过滤条件发往这些 vnode 对应的多个数据节点；
+5. 每个 vnode 先在内存里查找出自己节点里符合标签过滤条件的表的集合，然后扫描存储的时序数据，完成相应的聚合计算，将结果返回给 taosc；
+6. taosc 将多个数据节点返回的结果做最后的聚合，将其返回给应用。 

-由于TDengine在vnode内将标签数据与时序数据分离存储，通过在内存里过滤标签数据，先找到需要参与聚合操作的表的集合，将需要扫描的数据集大幅减少，大幅提升聚合计算速度。同时，由于数据分布在多个vnode/dnode，聚合计算操作在多个vnode里并发进行，又进一步提升了聚合的速度。 对普通表的聚合函数以及绝大部分操作都适用于超级表，语法完全一样，细节请看 TAOS SQL。
+由于 TDengine 在 vnode 内将标签数据与时序数据分离存储，通过在内存里过滤标签数据，先找到需要参与聚合操作的表的集合，将需要扫描的数据集大幅减少，大幅提升聚合计算速度。同时，由于数据分布在多个 vnode/dnode，聚合计算操作在多个 vnode 里并发进行，又进一步提升了聚合的速度。 对普通表的聚合函数以及绝大部分操作都适用于超级表，语法完全一样，细节请看 TAOS SQL。

 ### 预计算

-为有效提升查询处理的性能，针对物联网数据的不可更改的特点，在数据块头部记录该数据块中存储数据的统计信息：包括最大值、最小值、和。我们称之为预计算单元。如果查询处理涉及整个数据块的全部数据，直接使用预计算结果，完全不需要读取数据块的内容。由于预计算数据量远小于磁盘上存储的数据块数据的大小，对于磁盘IO为瓶颈的查询处理，使用预计算结果可以极大地减小读取IO压力，加速查询处理的流程。预计算机制与Postgre SQL的索引BRIN（block range index）有异曲同工之妙。
+为有效提升查询处理的性能，针对物联网数据的不可更改的特点，在数据块头部记录该数据块中存储数据的统计信息：包括最大值、最小值、和。我们称之为预计算单元。如果查询处理涉及整个数据块的全部数据，直接使用预计算结果，完全不需要读取数据块的内容。由于预计算数据量远小于磁盘上存储的数据块数据的大小，对于磁盘 I/O 为瓶颈的查询处理，使用预计算结果可以极大地减小读取 I/O 压力，加速查询处理的流程。预计算机制与 Postgre SQL 的索引 BRIN（block range index）有异曲同工之妙。
--- a/documentation20/cn/05.insert/docs.md
+++ b/documentation20/cn/05.insert/docs.md
@@ -243,7 +243,7 @@ repeater 部分添加 { host:'<TDengine server/cluster host>', port: <port for S
 {
 port: 8125
 , backends: ["./backends/repeater"]
-, repeater: [{ host: '127.0.0.1', port: 8126}]
+, repeater: [{ host: '127.0.0.1', port: 6044}]
 }
 ```


--- a/documentation20/cn/12.taos-sql/docs.md
+++ b/documentation20/cn/12.taos-sql/docs.md
@@ -171,6 +171,7 @@ TDengine 缺省的时间戳是毫秒精度，但通过在 CREATE DATABASE 时传
    4) 子表名只能由字母、数字和下划线组成，且不能以数字开头，不区分大小写

    5) 使用数据类型 binary 或 nchar，需指定其最长的字节数，如 binary(20)，表示 20 字节；
+    
    6) 为了兼容支持更多形式的表名，TDengine 引入新的转义符 "\`"，可以让表名与关键词不冲突，同时不受限于上述表名称合法性约束检查。但是同样具有长度限制要求。使用转义字符以后，不再对转义字符中的内容进行大小写统一。
    例如：\`aBc\` 和 \`abc\` 是不同的表名，但是 abc 和 aBc 是相同的表名。
    需要注意的是转义字符中的内容必须是可打印字符。
@@ -1280,7 +1281,7 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数

    适用于：**表、超级表**。

-    说明：*P*值有效取值范围0≤P≤100，为 0 的时候等同于 MIN，为 100 的时候等同于MAX；*algo_type*的有效输入：**default** 和 **t-digest**。 用于指定计算近似分位数的算法。可不提供第三个参数的输入，此时将使用 default 的算法进行计算，即 apercentile(column_name, 50, "default") 与 apercentile(column_name, 50) 等价。当使用“t-digest”参数的时候，将使用t-digest方式采样计算近似分位数。但该参数指定计算算法的功能从2.2.0.x版本开始支持，2.2.0.0之前的版本不支持指定使用算法的功能。
+    说明：<br/>**P**值有效取值范围0≤P≤100，为 0 的时候等同于 MIN，为 100 的时候等同于MAX；<br/>**algo_type**的有效输入：**default** 和 **t-digest**。 用于指定计算近似分位数的算法。可不提供第三个参数的输入，此时将使用 default 的算法进行计算，即 apercentile(column_name, 50, "default") 与 apercentile(column_name, 50) 等价。当使用“t-digest”参数的时候，将使用t-digest方式采样计算近似分位数。但该参数指定计算算法的功能从2.2.0.x版本开始支持，2.2.0.0之前的版本不支持指定使用算法的功能。<br/>
    
    嵌套子查询支持：适用于内层查询和外层查询。
    
@@ -1319,9 +1320,9 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数
    
 限制：LAST_ROW() 不能与 INTERVAL 一起使用。
    
-说明：在用于超级表时，时间戳完全一样且同为最大的数据行可能有多个，那么会从中随机返回一条，而并不保证多次运行所挑选的数据行必然一致。
+说明：在用于超级表时，时间戳完全一样且同为最大的数据行可能有多个，那么会从中随机返回一条，而并不保证多次运行所挑选的数据行必然一致。<br/>
+   <br/>示例：

-    示例：
   ```mysql
    taos> SELECT LAST_ROW(current) FROM meters;
    last_row(current)   |
@@ -1336,10 +1337,13 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数
    Query OK, 1 row(s) in set (0.001042s)
   ```
    
+
 - **INTERP** 
+    
    ```mysql
    SELECT INTERP(field_name) FROM { tb_name | stb_name } WHERE ts='timestamp' [FILL ({ VALUE | PREV | NULL | LINEAR | NEXT})];
    ```
+
 功能说明：返回表/超级表的指定时间截面、指定字段的记录。

 返回结果数据类型：同字段类型。
@@ -1348,15 +1352,9 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数

 适用于：**表、超级表**。

-说明：（从 2.0.15.0 版本开始新增此函数）
-
-    1）INTERP 必须指定时间断面，如果该时间断面不存在直接对应的数据，那么会根据 FILL 参数的设定进行插值。此外，条件语句里面可附带筛选条件，例如标签、tbname。
-
-    2）INTERP 查询要求查询的时间区间必须位于数据集合（表）的所有记录的时间范围之内。如果给定的时间戳位于时间范围之外，即使有插值指令，仍然不返回结果。
-
-    3）单个 INTERP 函数查询只能够针对一个时间点进行查询，如果需要返回等时间间隔的断面数据，可以通过 INTERP 配合 EVERY 的方式来进行查询处理（而不是使用 INTERVAL），其含义是每隔固定长度的时间进行插值。
-
+说明：（从 2.0.15.0 版本开始新增此函数） <br/>1）INTERP 必须指定时间断面，如果该时间断面不存在直接对应的数据，那么会根据 FILL 参数的设定进行插值。此外，条件语句里面可附带筛选条件，例如标签、tbname。<br/>2）INTERP 查询要求查询的时间区间必须位于数据集合（表）的所有记录的时间范围之内。如果给定的时间戳位于时间范围之外，即使有插值指令，仍然不返回结果。<br/>3）单个 INTERP 函数查询只能够针对一个时间点进行查询，如果需要返回等时间间隔的断面数据，可以通过 INTERP 配合 EVERY 的方式来进行查询处理（而不是使用 INTERVAL），其含义是每隔固定长度的时间进行插值。<br/>    
    示例：
+    
   ```mysql
    taos> SELECT INTERP(*) FROM meters WHERE ts='2017-7-14 18:40:00.004';
           interp(ts)        |   interp(current)    | interp(voltage) |    interp(phase)     |
@@ -1365,20 +1363,20 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数
    Query OK, 1 row(s) in set (0.002652s)
   ```

-   如果给定的时间戳无对应的数据，在不指定插值生成策略的情况下，不会返回结果，如果指定了插值策略，会根据插值策略返回结果。
+如果给定的时间戳无对应的数据，在不指定插值生成策略的情况下，不会返回结果，如果指定了插值策略，会根据插值策略返回结果。

   ```mysql
    taos> SELECT INTERP(*) FROM meters WHERE tbname IN ('d636') AND ts='2017-7-14 18:40:00.005';
    Query OK, 0 row(s) in set (0.004022s)
    
-    taos> SELECT INTERP(*) FROM meters WHERE tbname IN ('d636') AND ts='2017-7-14 18:40:00.005' FILL(PREV);;
+    taos> SELECT INTERP(*) FROM meters WHERE tbname IN ('d636') AND ts='2017-7-14 18:40:00.005' FILL(PREV);
           interp(ts)        |   interp(current)    | interp(voltage) |    interp(phase)     |
    ==========================================================================================
     2017-07-14 18:40:00.005 |              9.88150 |             217 |              0.32500 |
    Query OK, 1 row(s) in set (0.003056s)
   ```

-    如下所示代码表示在时间区间 `['2017-7-14 18:40:00', '2017-7-14 18:40:00.014']`  中每隔 5 毫秒 进行一次断面计算。
+如下所示代码表示在时间区间 `['2017-7-14 18:40:00', '2017-7-14 18:40:00.014']` 中每隔 5 毫秒 进行一次断面计算。

   ```mysql
    taos> SELECT INTERP(current) FROM d636 WHERE ts>='2017-7-14 18:40:00' AND ts<='2017-7-14 18:40:00.014' EVERY(5a);
@@ -1604,7 +1602,7 @@ SELECT AVG(current), MAX(current), LEASTSQUARES(current, start_val, step_val), P

 **GROUP BY的限制**

-TAOS SQL 支持对标签、TBNAME 进行 GROUP BY 操作，也支持普通列进行 GROUP BY，前提是：仅限一列且该列的唯一值小于 10 万个。
+TAOS SQL 支持对标签、TBNAME 进行 GROUP BY 操作，也支持普通列进行 GROUP BY，前提是：仅限一列且该列的唯一值小于 10 万个。注意：group by 不支持float,double 类型。

 **IS NOT NULL 与不为空的表达式适用范围**


--- a/documentation20/cn/14.devops/02.collectd/docs.md
+++ b/documentation20/cn/14.devops/02.collectd/docs.md
@@ -57,7 +57,7 @@ repeater 部分添加 { host:'<TDengine server/cluster host>', port: <port for S

 ### 导入 Dashboard

-使用 Web 浏览器访问 IP:3000 登录 Grafana 界面，系统初始用户名密码为 admin/admin。
+使用 Web 浏览器访问运行 Grafana 的服务器的3000端口 host:3000 登录 Grafana 界面，系统初始用户名密码为 admin/admin。
 点击左侧齿轮图标并选择 Plugins，应该可以找到 TDengine data source 插件图标。

 #### 导入 collectd 仪表盘

--- a/packaging/tools/make_install.sh
+++ b/packaging/tools/make_install.sh
@@ -254,7 +254,10 @@ function install_lib() {
    fi

    if [ "$osType" != "Darwin" ]; then
-        ${csudo} cp ${binary_dir}/build/lib/libtaos.so.${verNumber} ${install_main_dir}/driver && ${csudo} chmod 777 ${install_main_dir}/driver/*
+        ${csudo} cp ${binary_dir}/build/lib/libtaos.so.${verNumber} \
+            ${install_main_dir}/driver \
+            && ${csudo} chmod 777 ${install_main_dir}/driver/*
+
        ${csudo} ln -sf ${install_main_dir}/driver/libtaos.* ${lib_link_dir}/libtaos.so.1
        ${csudo} ln -sf ${lib_link_dir}/libtaos.so.1 ${lib_link_dir}/libtaos.so

@@ -263,11 +266,28 @@ function install_lib() {
          ${csudo} ln -sf ${lib64_link_dir}/libtaos.so.1 ${lib64_link_dir}/libtaos.so
        fi
    else
-        ${csudo} cp -Rf ${binary_dir}/build/lib/libtaos.${verNumber}.dylib ${install_main_dir}/driver || ${csudo} cp -Rf ${binary_dir}/build/lib/libtaos.${verNumber}.dylib ${install_main_2_dir}/driver && ${csudo} chmod 777 ${install_main_dir}/driver/* || ${csudo} chmod 777 ${install_main_2_dir}/driver/*
+        ${csudo} cp -Rf ${binary_dir}/build/lib/libtaos.${verNumber}.dylib \
+            ${install_main_dir}/driver \
+            || ${csudo} cp -Rf ${binary_dir}/build/lib/libtaos.${verNumber}.dylib \
+            ${install_main_2_dir}/driver \
+            && ${csudo} chmod 777 ${install_main_dir}/driver/* \
+            || ${csudo} chmod 777 ${install_main_2_dir}/driver/*
+
+        ${csudo} ln -sf ${install_main_dir}/driver/libtaos.* \
+            ${install_main_dir}/driver/libtaos.1.dylib \
+            || ${csudo} ln -sf ${install_main_2_dir}/driver/libtaos.* \
+            ${install_main_2_dir}/driver/libtaos.1.dylib   || :
+
+        ${csudo} ln -sf ${install_main_dir}/driver/libtaos.1.dylib \
+            ${install_main_dir}/driver/libtaos.dylib \
+            || ${csudo} ln -sf ${install_main_2_dir}/driver/libtaos.1.dylib \
+            ${install_main_2_dir}/driver/libtaos.dylib   || :
+
+        ${csudo} ln -sf ${install_main_dir}/driver/libtaos.${verNumber}.dylib \
+            ${lib_link_dir}/libtaos.1.dylib \
+            || ${csudo} ln -sf ${install_main_2_dir}/driver/libtaos.${verNumber}.dylib \
+            ${lib_link_dir}/libtaos.1.dylib   || :

-        ${csudo} ln -sf ${install_main_dir}/driver/libtaos.* ${install_main_dir}/driver/libtaos.1.dylib || ${csudo} ln -sf ${install_main_2_dir}/driver/libtaos.* ${install_main_2_dir}/driver/libtaos.1.dylib   || :
-        ${csudo} ln -sf ${install_main_dir}/driver/libtaos.1.dylib ${install_main_dir}/driver/libtaos.dylib || ${csudo} ln -sf ${install_main_2_dir}/driver/libtaos.1.dylib ${install_main_2_dir}/driver/libtaos.dylib   || :
-        ${csudo} ln -sf ${install_main_dir}/driver/libtaos.${verNumber}.dylib ${lib_link_dir}/libtaos.1.dylib || ${csudo} ln -sf ${install_main_2_dir}/driver/libtaos.${verNumber}.dylib ${lib_link_dir}/libtaos.1.dylib   || :
        ${csudo} ln -sf ${lib_link_dir}/libtaos.1.dylib ${lib_link_dir}/libtaos.dylib   || :
    fi

@@ -282,11 +302,17 @@ function install_header() {

    if [ "$osType" != "Darwin" ]; then
        ${csudo} rm -f ${inc_link_dir}/taos.h ${inc_link_dir}/taoserror.h     || :
-        ${csudo} cp -f ${source_dir}/src/inc/taos.h ${source_dir}/src/inc/taoserror.h ${install_main_dir}/include && ${csudo} chmod 644 ${install_main_dir}/include/*
+        ${csudo} cp -f ${source_dir}/src/inc/taos.h ${source_dir}/src/inc/taoserror.h \
+            ${install_main_dir}/include && ${csudo} chmod 644 ${install_main_dir}/include/*
        ${csudo} ln -s ${install_main_dir}/include/taos.h ${inc_link_dir}/taos.h
        ${csudo} ln -s ${install_main_dir}/include/taoserror.h ${inc_link_dir}/taoserror.h
    else
-        ${csudo} cp -f ${source_dir}/src/inc/taos.h ${source_dir}/src/inc/taoserror.h ${install_main_dir}/include || ${csudo} cp -f ${source_dir}/src/inc/taos.h ${source_dir}/src/inc/taoserror.h ${install_main_2_dir}/include && ${csudo} chmod 644 ${install_main_dir}/include/* || ${csudo} chmod 644 ${install_main_2_dir}/include/*
+        ${csudo} cp -f ${source_dir}/src/inc/taos.h ${source_dir}/src/inc/taoserror.h \
+            ${install_main_dir}/include \
+            || ${csudo} cp -f ${source_dir}/src/inc/taos.h ${source_dir}/src/inc/taoserror.h \
+            ${install_main_2_dir}/include \
+            && ${csudo} chmod 644 ${install_main_dir}/include/* \
+            || ${csudo} chmod 644 ${install_main_2_dir}/include/*
    fi
 }

@@ -301,7 +327,12 @@ function install_config() {
        ${csudo} cp -f ${script_dir}/../cfg/taos.cfg ${install_main_dir}/cfg/taos.cfg.org
        ${csudo} ln -s ${cfg_install_dir}/taos.cfg ${install_main_dir}/cfg/taos.cfg
    else
-        ${csudo} cp -f ${script_dir}/../cfg/taos.cfg ${install_main_dir}/cfg/taos.cfg.org || ${csudo} cp -f ${script_dir}/../cfg/taos.cfg ${install_main_2_dir}/cfg/taos.cfg.org
+        if [ "$osType" != "Darwin" ]; then
+            ${csudo} cp -f ${script_dir}/../cfg/taos.cfg ${install_main_dir}/cfg/taos.cfg.org
+        else
+            ${csudo} cp -f ${script_dir}/../cfg/taos.cfg ${install_main_dir}/cfg/taos.cfg.org\
+                || ${csudo} cp -f ${script_dir}/../cfg/taos.cfg ${install_main_2_dir}/cfg/taos.cfg.org
+        fi
    fi
 }

@@ -312,14 +343,21 @@ function install_blm3_config() {
            ${csudo} cp ${binary_dir}/test/cfg/blm.toml ${cfg_install_dir}
        [ -f ${cfg_install_dir}/blm.toml ] &&
            ${csudo} chmod 644 ${cfg_install_dir}/blm.toml
-        [ -f ${binary_dir}/test/cfg//blm.toml ] &&
+        [ -f ${binary_dir}/test/cfg/blm.toml ] &&
            ${csudo} cp -f ${binary_dir}/test/cfg/blm.toml ${install_main_dir}/cfg/blm.toml.org
        [ -f ${cfg_install_dir}/blm.toml ] &&
            ${csudo} ln -s ${cfg_install_dir}/blm.toml ${install_main_dir}/cfg/blm.toml
    else
-        [ -f ${binary_dir}/test/cfg//blm.toml ] &&
+        if [ -f "${binary_dir}/test/cfg/blm.toml" ]; then
+            if [ "$osType" != "Darwin" ]; then
+                ${csudo} cp -f ${binary_dir}/test/cfg/blm.toml \
+                    ${install_main_dir}/cfg/blm.toml.org
+            else
                ${csudo} cp -f ${binary_dir}/test/cfg/blm.toml ${install_main_dir}/cfg/blm.toml.org \
-                || ${csudo} cp -f ${binary_dir}/test/cfg/blm.toml ${install_main_2_dir}/cfg/blm.toml.org
+                    || ${csudo} cp -f ${binary_dir}/test/cfg/blm.toml \
+                    ${install_main_2_dir}/cfg/blm.toml.org
+            fi
+        fi
    fi
 }


--- a/src/client/inc/tscParseLine.h
+++ b/src/client/inc/tscParseLine.h
@@ -67,6 +67,8 @@ typedef struct {
  SMLProtocolType protocol;
  SMLTimeStampType tsType;
  SHashObj* smlDataToSchema;
+
+  int64_t affectedRows;
 } SSmlLinesInfo;

 int tscSmlInsert(TAOS* taos, TAOS_SML_DATA_POINT* points, int numPoint, SSmlLinesInfo* info);

--- a/src/client/src/tscParseLineProtocol.c
+++ b/src/client/src/tscParseLineProtocol.c
@@ -761,7 +761,7 @@ static int32_t doInsertChildTableWithStmt(TAOS* taos, char* sql, char* cTableNam
  code = taos_stmt_prepare(stmt, sql, (unsigned long)strlen(sql));

  if (code != 0) {
-    tscError("SML:0x%"PRIx64" taos_stmt_prepare return %d:%s", info->id, code, tstrerror(code));
+    tscError("SML:0x%"PRIx64" taos_stmt_prepare return %d:%s", info->id, code, taos_stmt_errstr(stmt));
    taos_stmt_close(stmt);
    return code;
  }
@@ -771,7 +771,11 @@ static int32_t doInsertChildTableWithStmt(TAOS* taos, char* sql, char* cTableNam
  do {
    code = taos_stmt_set_tbname(stmt, cTableName);
    if (code != 0) {
-      tscError("SML:0x%"PRIx64" taos_stmt_set_tbname return %d:%s", info->id, code, tstrerror(code));
+      tscError("SML:0x%"PRIx64" taos_stmt_set_tbname return %d:%s", info->id, code, taos_stmt_errstr(stmt));
+
+      int affectedRows = taos_stmt_affected_rows(stmt);
+      info->affectedRows += affectedRows;
+
      taos_stmt_close(stmt);
      return code;
    }
@@ -781,13 +785,21 @@ static int32_t doInsertChildTableWithStmt(TAOS* taos, char* sql, char* cTableNam
      TAOS_BIND* colsBinds = taosArrayGetP(batchBind, i);
      code = taos_stmt_bind_param(stmt, colsBinds);
      if (code != 0) {
-        tscError("SML:0x%"PRIx64" taos_stmt_bind_param return %d:%s", info->id, code, tstrerror(code));
+        tscError("SML:0x%"PRIx64" taos_stmt_bind_param return %d:%s", info->id, code, taos_stmt_errstr(stmt));
+
+        int affectedRows = taos_stmt_affected_rows(stmt);
+        info->affectedRows += affectedRows;
+
        taos_stmt_close(stmt);
        return code;
      }
      code = taos_stmt_add_batch(stmt);
      if (code != 0) {
-        tscError("SML:0x%"PRIx64" taos_stmt_add_batch return %d:%s", info->id, code, tstrerror(code));
+        tscError("SML:0x%"PRIx64" taos_stmt_add_batch return %d:%s", info->id, code, taos_stmt_errstr(stmt));
+
+        int affectedRows = taos_stmt_affected_rows(stmt);
+        info->affectedRows += affectedRows;
+
        taos_stmt_close(stmt);
        return code;
      }
@@ -795,8 +807,9 @@ static int32_t doInsertChildTableWithStmt(TAOS* taos, char* sql, char* cTableNam

    code = taos_stmt_execute(stmt);
    if (code != 0) {
-      tscError("SML:0x%"PRIx64" taos_stmt_execute return %d:%s, try:%d", info->id, code, tstrerror(code), try);
+      tscError("SML:0x%"PRIx64" taos_stmt_execute return %d:%s, try:%d", info->id, code, taos_stmt_errstr(stmt), try);
    }
+    tscDebug("SML:0x%"PRIx64" taos_stmt_execute inserted %d rows", info->id, taos_stmt_affected_rows(stmt));
    
    tryAgain = false;
    if ((code == TSDB_CODE_TDB_INVALID_TABLE_ID
@@ -825,6 +838,8 @@ static int32_t doInsertChildTableWithStmt(TAOS* taos, char* sql, char* cTableNam
    }
  } while (tryAgain);

+  int affectedRows = taos_stmt_affected_rows(stmt);
+  info->affectedRows += affectedRows;

  taos_stmt_close(stmt);
  return code;
@@ -1069,6 +1084,8 @@ int tscSmlInsert(TAOS* taos, TAOS_SML_DATA_POINT* points, int numPoint, SSmlLine

  int32_t code = TSDB_CODE_SUCCESS;

+  info->affectedRows = 0;
+
  tscDebug("SML:0x%"PRIx64" build data point schemas", info->id);
  SArray* stableSchemas = taosArrayInit(32, sizeof(SSmlSTableSchema)); // SArray<STableColumnsSchema>
  code = buildDataPointSchemas(points, numPoint, stableSchemas, info);
@@ -1871,7 +1888,7 @@ static int32_t parseSmlKey(TAOS_SML_KV *pKV, const char **index, SHashObj *pHash

  //key field cannot start with digit
  if (isdigit(*cur)) {
-    tscError("SML:0x%"PRIx64" Tag key cannnot start with digit", info->id);
+    tscError("SML:0x%"PRIx64" Tag key cannot start with digit", info->id);
    return TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
  }
  while (*cur != '\0') {
@@ -1885,6 +1902,8 @@ static int32_t parseSmlKey(TAOS_SML_KV *pKV, const char **index, SHashObj *pHash
    }
    //Escape special character
    if (*cur == '\\') {
+      //TODO: escape will work after column & tag
+      //support spcial characters
      escapeSpecialCharacter(2, &cur);
    }
    key[len] = *cur;
@@ -1911,13 +1930,42 @@ static int32_t parseSmlKey(TAOS_SML_KV *pKV, const char **index, SHashObj *pHash
 static int32_t parseSmlValue(TAOS_SML_KV *pKV, const char **index,
                          bool *is_last_kv, SSmlLinesInfo* info, bool isTag) {
  const char *start, *cur;
+  int32_t ret = TSDB_CODE_SUCCESS;
  char *value = NULL;
  uint16_t len = 0;
+  bool searchQuote = false;
  start = cur = *index;

+  //if field value is string
+  if (!isTag) {
+    if (*cur == '"') {
+      searchQuote = true;
+      cur += 1;
+      len += 1;
+    } else if (*cur == 'L' && *(cur + 1) == '"') {
+      searchQuote = true;
+      cur += 2;
+      len += 2;
+    }
+  }
+
  while (1) {
    // unescaped ',' or ' ' or '\0' identifies a value
-    if ((*cur == ',' || *cur == ' ' || *cur == '\0') && *(cur - 1) != '\\') {
+    if (((*cur == ',' || *cur == ' ' ) && *(cur - 1) != '\\') || *cur == '\0') {
+      if (searchQuote == true) {
+        //first quote ignored while searching
+        if (*(cur - 1) == '"' && len != 1 && len != 2) {
+          *is_last_kv = (*cur == ' ' || *cur == '\0') ? true : false;
+          break;
+        } else if (*cur == '\0') {
+          ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
+          goto error;
+        } else {
+          cur++;
+          len++;
+          continue;
+        }
+      }
      //unescaped ' ' or '\0' indicates end of value
      *is_last_kv = (*cur == ' ' || *cur == '\0') ? true : false;
      if (*cur == ' ' && *(cur + 1) == ' ') {
@@ -1929,7 +1977,7 @@ static int32_t parseSmlValue(TAOS_SML_KV *pKV, const char **index,
    }
    //Escape special character
    if (*cur == '\\') {
-      escapeSpecialCharacter(2, &cur);
+      escapeSpecialCharacter(isTag ? 2 : 3, &cur);
    }
    cur++;
    len++;
@@ -1946,16 +1994,20 @@ static int32_t parseSmlValue(TAOS_SML_KV *pKV, const char **index,
  if (!convertSmlValueType(pKV, value, len, info, isTag)) {
    tscError("SML:0x%"PRIx64" Failed to convert sml value string(%s) to any type",
            info->id, value);
-    //free previous alocated key field
-    free(pKV->key);
-    pKV->key = NULL;
    free(value);
-    return TSDB_CODE_TSC_INVALID_VALUE;
+    ret = TSDB_CODE_TSC_INVALID_VALUE;
+    goto error;
  }
  free(value);

  *index = (*cur == '\0') ? cur : cur + 1;
-  return TSDB_CODE_SUCCESS;
+  return ret;
+
+error:
+  //free previous alocated key field
+  free(pKV->key);
+  pKV->key = NULL;
+  return ret;
 }

 static int32_t parseSmlMeasurement(TAOS_SML_DATA_POINT *pSml, const char **index,

--- a/src/client/src/tscParseOpenTSDB.c
+++ b/src/client/src/tscParseOpenTSDB.c
@@ -138,14 +138,35 @@ static int32_t parseTelnetMetricValue(TAOS_SML_KV **pKVs, int *num_kvs, const ch
  const char *start, *cur;
  int32_t ret = TSDB_CODE_SUCCESS;
  int len = 0;
+  bool searchQuote = false;
  char key[] = OTD_METRIC_VALUE_COLUMN_NAME;
  char *value = NULL;

  start = cur = *index;

+  //if metric value is string
+  if (*cur == '"') {
+    searchQuote = true;
+    cur += 1;
+    len += 1;
+  } else if (*cur == 'L' && *(cur + 1) == '"') {
+    searchQuote = true;
+    cur += 2;
+    len += 2;
+  }
+
  while(*cur != '\0') {
    if (*cur == ' ') {
-      if (*cur == ' ') {
+      if (searchQuote == true) {
+        if (*(cur - 1) == '"' && len != 1 && len != 2) {
+          searchQuote = false;
+        } else {
+          cur++;
+          len++;
+          continue;
+        }
+      }
+
      if (*(cur + 1) != ' ') {
        break;
      } else {
@@ -153,7 +174,6 @@ static int32_t parseTelnetMetricValue(TAOS_SML_KV **pKVs, int *num_kvs, const ch
        continue;
      }
    }
-    }
    cur++;
    len++;
  }

--- a/src/client/src/tscPrepare.c
+++ b/src/client/src/tscPrepare.c
@@ -78,6 +78,8 @@ typedef struct STscStmt {
  SSqlObj* pSql;
  SMultiTbStmt mtb;
  SNormalStmt normal;
+
+  int numOfRows;
 } STscStmt;

 #define STMT_RET(c) do {          \
@@ -1212,6 +1214,8 @@ static int insertStmtExecute(STscStmt* stmt) {
  // wait for the callback function to post the semaphore
  tsem_wait(&pSql->rspSem);

+  stmt->numOfRows += pSql->res.numOfRows;
+
  // data block reset
  pCmd->batchSize = 0;
  for(int32_t i = 0; i < pCmd->insertParam.numOfTables; ++i) {
@@ -1285,6 +1289,8 @@ static int insertBatchStmtExecute(STscStmt* pStmt) {

  code = pStmt->pSql->res.code;

+  pStmt->numOfRows += pStmt->pSql->res.numOfRows;
+
  insertBatchClean(pStmt);

  return code;
@@ -1521,6 +1527,7 @@ TAOS_STMT* taos_stmt_init(TAOS* taos) {
  pSql->maxRetry    = TSDB_MAX_REPLICA;
  pStmt->pSql       = pSql;
  pStmt->last       = STMT_INIT;
+  pStmt->numOfRows  = 0;
  registerSqlObj(pSql);

  return pStmt;
@@ -1564,9 +1571,7 @@ int taos_stmt_prepare(TAOS_STMT* stmt, const char* sql, unsigned long length) {
  }

  pRes->qId = 0;
-  pRes->numOfRows = 1;
-
-  registerSqlObj(pSql);
+  pRes->numOfRows = 0;

  strtolower(pSql->sqlstr, sql);
  tscDebugL("0x%"PRIx64" SQL: %s", pSql->self, pSql->sqlstr);
@@ -1981,6 +1986,7 @@ int taos_stmt_execute(TAOS_STMT* stmt) {
    } else {
      taosReleaseRef(tscObjRef, pStmt->pSql->self);
      pStmt->pSql = taos_query((TAOS*)pStmt->taos, sql);
+      pStmt->numOfRows += taos_affected_rows(pStmt->pSql);
      ret = taos_errno(pStmt->pSql);
      free(sql);
    }
@@ -1989,6 +1995,17 @@ int taos_stmt_execute(TAOS_STMT* stmt) {
  STMT_RET(ret);
 }

+int taos_stmt_affected_rows(TAOS_STMT* stmt) {
+  STscStmt* pStmt = (STscStmt*)stmt;
+
+  if (pStmt == NULL) {
+    tscError("statement is invalid");
+    return 0;
+  }
+
+  return pStmt->numOfRows;
+}
+
 TAOS_RES *taos_stmt_use_result(TAOS_STMT* stmt) {
  if (stmt == NULL) {
    tscError("statement is invalid.");

--- a/src/client/src/tscUtil.c
+++ b/src/client/src/tscUtil.c
@@ -5125,7 +5125,6 @@ static int32_t doAddTableName(char* nextStr, char** str, SArray* pNameArray, SSq

  if (nextStr == NULL) {
    tstrncpy(tablename, *str, TSDB_TABLE_FNAME_LEN);
-    len = (int32_t) strlen(tablename);
  } else {
    len = (int32_t)(nextStr - (*str));
    if (len >= TSDB_TABLE_NAME_LEN) {

--- a/src/connector/jdbc/src/main/java/com/taosdata/jdbc/AbstractDatabaseMetaData.java
+++ b/src/connector/jdbc/src/main/java/com/taosdata/jdbc/AbstractDatabaseMetaData.java
@@ -562,7 +562,7 @@ public abstract class AbstractDatabaseMetaData extends WrapperImpl implements Da
        List<TSDBResultSetRowData> rowDataList = new ArrayList<>();
        try (Statement stmt = connection.createStatement()) {
            stmt.execute("use " + catalog);
-            ResultSet tables = stmt.executeQuery("show tables");
+            try (ResultSet tables = stmt.executeQuery("show tables")) {
                while (tables.next()) {
                    TSDBResultSetRowData rowData = new TSDBResultSetRowData(10);
                    rowData.setStringValue(1, catalog);                                     //TABLE_CAT
@@ -572,7 +572,8 @@ public abstract class AbstractDatabaseMetaData extends WrapperImpl implements Da
                    rowData.setStringValue(5, "");                                   //REMARKS
                    rowDataList.add(rowData);
                }
-            ResultSet stables = stmt.executeQuery("show stables");
+            }
+            try (ResultSet stables = stmt.executeQuery("show stables")) {
                while (stables.next()) {
                    TSDBResultSetRowData rowData = new TSDBResultSetRowData(10);
                    rowData.setStringValue(1, catalog);                                  //TABLE_CAT
@@ -582,6 +583,7 @@ public abstract class AbstractDatabaseMetaData extends WrapperImpl implements Da
                    rowData.setStringValue(5, "STABLE");                          //REMARKS
                    rowDataList.add(rowData);
                }
+            }
            resultSet.setRowDataList(rowDataList);
        }
        return resultSet;
@@ -638,8 +640,9 @@ public abstract class AbstractDatabaseMetaData extends WrapperImpl implements Da
        resultSet.setColumnMetaDataList(buildGetColumnsColumnMetaDataList());
        // set up rowDataList
        List<TSDBResultSetRowData> rowDataList = new ArrayList<>();
-        try (Statement stmt = conn.createStatement()) {
-            ResultSet rs = stmt.executeQuery("describe " + catalog + "." + tableNamePattern);
+        try (Statement stmt = conn.createStatement();
+             ResultSet rs = stmt.executeQuery("describe " + catalog + "." + tableNamePattern)) {
+
            int rowIndex = 0;
            while (rs.next()) {
                TSDBResultSetRowData rowData = new TSDBResultSetRowData(24);
@@ -1147,9 +1150,9 @@ public abstract class AbstractDatabaseMetaData extends WrapperImpl implements Da
        columnMetaDataList.add(buildTableCatalogMeta(1));       // 1. TABLE_CAT
        resultSet.setColumnMetaDataList(columnMetaDataList);

-        try (Statement stmt = conn.createStatement()) {
+        try (Statement stmt = conn.createStatement();
+             ResultSet rs = stmt.executeQuery("show databases")) {
            List<TSDBResultSetRowData> rowDataList = new ArrayList<>();
-            ResultSet rs = stmt.executeQuery("show databases");
            while (rs.next()) {
                TSDBResultSetRowData rowData = new TSDBResultSetRowData(1);
                rowData.setStringValue(1, rs.getString("name"));
@@ -1168,12 +1171,13 @@ public abstract class AbstractDatabaseMetaData extends WrapperImpl implements Da
            return new EmptyResultSet();

        DatabaseMetaDataResultSet resultSet = new DatabaseMetaDataResultSet();
-        try (Statement stmt = conn.createStatement()) {
+        try (Statement stmt = conn.createStatement();
+             ResultSet rs = stmt.executeQuery("describe " + catalog + "." + table)) {
            // set up ColumnMetaDataList
            resultSet.setColumnMetaDataList(buildGetPrimaryKeysMetadataList());
            // set rowData
            List<TSDBResultSetRowData> rowDataList = new ArrayList<>();
-            ResultSet rs = stmt.executeQuery("describe " + catalog + "." + table);
+
            rs.next();
            TSDBResultSetRowData rowData = new TSDBResultSetRowData(6);
            rowData.setStringValue(1, catalog);
@@ -1217,15 +1221,14 @@ public abstract class AbstractDatabaseMetaData extends WrapperImpl implements Da
    }

    private boolean isAvailableCatalog(Connection connection, String catalog) {
-        try (Statement stmt = connection.createStatement()) {
-            ResultSet databases = stmt.executeQuery("show databases");
+        try (Statement stmt = connection.createStatement();
+             ResultSet databases = stmt.executeQuery("show databases")) {
            while (databases.next()) {
                String dbname = databases.getString("name");
                this.precision = databases.getString("precision");
                if (dbname.equalsIgnoreCase(catalog))
                    return true;
            }
-            databases.close();
        } catch (SQLException e) {
            e.printStackTrace();
        }
@@ -1246,7 +1249,7 @@ public abstract class AbstractDatabaseMetaData extends WrapperImpl implements Da
            resultSet.setColumnMetaDataList(buildGetSuperTablesColumnMetaDataList());
            // set result set row data
            stmt.execute("use " + catalog);
-            ResultSet rs = stmt.executeQuery("show tables like '" + tableNamePattern + "'");
+            try (ResultSet rs = stmt.executeQuery("show tables like '" + tableNamePattern + "'")) {
                List<TSDBResultSetRowData> rowDataList = new ArrayList<>();
                while (rs.next()) {
                    TSDBResultSetRowData rowData = new TSDBResultSetRowData(4);
@@ -1258,6 +1261,7 @@ public abstract class AbstractDatabaseMetaData extends WrapperImpl implements Da
                }
                resultSet.setRowDataList(rowDataList);
            }
+        }
        return resultSet;
    }


--- a/src/connector/jdbc/src/main/java/com/taosdata/jdbc/AbstractStatement.java
+++ b/src/connector/jdbc/src/main/java/com/taosdata/jdbc/AbstractStatement.java
@@ -9,6 +9,7 @@ public abstract class AbstractStatement extends WrapperImpl implements Statement

    protected List<String> batchedArgs;
    private int fetchSize;
+    protected int affectedRows = -1;

    @Override
    public abstract ResultSet executeQuery(String sql) throws SQLException;
@@ -247,6 +248,7 @@ public abstract class AbstractStatement extends WrapperImpl implements Statement
    public boolean getMoreResults(int current) throws SQLException {
        if (isClosed())
            throw TSDBError.createSQLException(TSDBErrorNumbers.ERROR_STATEMENT_CLOSED);
+        this.affectedRows = -1;
        switch (current) {
            case Statement.CLOSE_CURRENT_RESULT:
                return false;

--- a/src/connector/jdbc/src/main/java/com/taosdata/jdbc/TSDBResultSetRowData.java
+++ b/src/connector/jdbc/src/main/java/com/taosdata/jdbc/TSDBResultSetRowData.java
@@ -189,7 +189,7 @@ public class TSDBResultSetRowData {
        long value = (long) obj;
        if (value < 0)
            throw TSDBError.createSQLException(TSDBErrorNumbers.ERROR_NUMERIC_VALUE_OUT_OF_RANGE);
-        return Long.valueOf(value).intValue();
+        return (int) value;
    }



--- a/src/connector/jdbc/src/main/java/com/taosdata/jdbc/TSDBStatement.java
+++ b/src/connector/jdbc/src/main/java/com/taosdata/jdbc/TSDBStatement.java
@@ -23,7 +23,6 @@ public class TSDBStatement extends AbstractStatement {
     * Status of current statement
     */
    private boolean isClosed;
-    private int affectedRows = -1;
    private TSDBConnection connection;
    private TSDBResultSet resultSet;

@@ -85,7 +84,8 @@ public class TSDBStatement extends AbstractStatement {
        long pSql = this.connection.getConnector().executeQuery(sql);
        // if pSql is create/insert/update/delete/alter SQL
        if (this.connection.getConnector().isUpdateQuery(pSql)) {
-            this.affectedRows = this.connection.getConnector().getAffectedRows(pSql);
+            int rows = this.connection.getConnector().getAffectedRows(pSql);
+            this.affectedRows = rows == 0 ? -1 : this.connection.getConnector().getAffectedRows(pSql);
            this.connection.getConnector().freeResultSet(pSql);
            return false;
        }

--- a/src/connector/jdbc/src/main/java/com/taosdata/jdbc/rs/RestfulDriver.java
+++ b/src/connector/jdbc/src/main/java/com/taosdata/jdbc/rs/RestfulDriver.java
@@ -64,9 +64,9 @@ public class RestfulDriver extends AbstractDriver {

        RestfulConnection conn = new RestfulConnection(host, port, props, database, url, token);
        if (database != null && !database.trim().replaceAll("\\s", "").isEmpty()) {
-            Statement stmt = conn.createStatement();
+            try (Statement stmt = conn.createStatement()) {
                stmt.execute("use " + database);
-            stmt.close();
+            }
        }
        return conn;
    }

--- a/src/connector/jdbc/src/main/java/com/taosdata/jdbc/rs/RestfulStatement.java
+++ b/src/connector/jdbc/src/main/java/com/taosdata/jdbc/rs/RestfulStatement.java
@@ -22,7 +22,6 @@ public class RestfulStatement extends AbstractStatement {
    private final RestfulConnection conn;

    private volatile RestfulResultSet resultSet;
-    private volatile int affectedRows;

    public RestfulStatement(RestfulConnection conn, String database) {
        this.conn = conn;
@@ -118,7 +117,7 @@ public class RestfulStatement extends AbstractStatement {
            throw TSDBError.createSQLException(resultJson.getInteger("code"), resultJson.getString("desc"));
        }
        this.resultSet = new RestfulResultSet(database, this, resultJson);
-        this.affectedRows = 0;
+        this.affectedRows = -1;
        return resultSet;
    }

@@ -140,9 +139,10 @@ public class RestfulStatement extends AbstractStatement {
        if (head.size() != 1 || !"affected_rows".equals(head.getString(0)))
            throw TSDBError.createSQLException(TSDBErrorNumbers.ERROR_INVALID_VARIABLE);
        JSONArray data = jsonObject.getJSONArray("data");
-        if (data != null)
-            return data.getJSONArray(0).getInteger(0);
-
+        if (data != null) {
+            int rows = data.getJSONArray(0).getInteger(0);
+            return rows == 0 ? -1 : data.getJSONArray(0).getInteger(0);
+        }
        throw TSDBError.createSQLException(TSDBErrorNumbers.ERROR_INVALID_VARIABLE);
    }


--- a/src/inc/taos.h
+++ b/src/inc/taos.h
@@ -141,6 +141,7 @@ DLL_EXPORT int        taos_stmt_bind_param_batch(TAOS_STMT* stmt, TAOS_MULTI_BIN
 DLL_EXPORT int        taos_stmt_bind_single_param_batch(TAOS_STMT* stmt, TAOS_MULTI_BIND* bind, int colIdx);
 DLL_EXPORT int        taos_stmt_add_batch(TAOS_STMT *stmt);
 DLL_EXPORT int        taos_stmt_execute(TAOS_STMT *stmt);
+DLL_EXPORT int        taos_stmt_affected_rows(TAOS_STMT *stmt);
 DLL_EXPORT TAOS_RES * taos_stmt_use_result(TAOS_STMT *stmt);
 DLL_EXPORT int        taos_stmt_close(TAOS_STMT *stmt);
 DLL_EXPORT char *     taos_stmt_errstr(TAOS_STMT *stmt);

--- a/src/kit/taosdemo/taosdemo.c
+++ b/src/kit/taosdemo/taosdemo.c
@@ -56,6 +56,7 @@

 #define REQ_EXTRA_BUF_LEN   1024
 #define RESP_BUF_LEN        4096
+#define SQL_BUFF_LEN        1024

 extern char configDir[];

@@ -66,6 +67,7 @@ extern char configDir[];
 #define HEAD_BUFF_LEN       TSDB_MAX_COLUMNS*24  // 16*MAX_COLUMNS + (192+32)*2 + insert into ..

 #define BUFFER_SIZE         TSDB_MAX_ALLOWED_SQL_LEN
+#define FETCH_BUFFER_SIZE   100 * TSDB_MAX_ALLOWED_SQL_LEN
 #define COND_BUF_LEN        (BUFFER_SIZE - 30)
 #define COL_BUFFER_LEN      ((TSDB_COL_NAME_LEN + 15) * TSDB_MAX_COLUMNS)

@@ -87,6 +89,7 @@ extern char configDir[];
 #define FLOAT_BUFF_LEN          22
 #define DOUBLE_BUFF_LEN         42
 #define TIMESTAMP_BUFF_LEN      21
+#define PRINT_STAT_INTERVAL     30*1000

 #define MAX_SAMPLES             10000
 #define MAX_NUM_COLUMNS        (TSDB_MAX_COLUMNS - 1)      // exclude first column timestamp
@@ -97,8 +100,10 @@ extern char configDir[];
 #define MAX_QUERY_SQL_COUNT     100

 #define MAX_DATABASE_COUNT      256
-#define INPUT_BUF_LEN           256
+#define MAX_JSON_BUFF           6400000

+#define INPUT_BUF_LEN           256
+#define EXTRA_SQL_LEN           256
 #define TBNAME_PREFIX_LEN       (TSDB_TABLE_NAME_LEN - 20) // 20 characters reserved for seq
 #define SMALL_BUFF_LEN          8
 #define DATATYPE_BUFF_LEN       (SMALL_BUFF_LEN*3)
@@ -109,6 +114,45 @@ extern char configDir[];
 #define DEFAULT_INTERLACE_ROWS  0
 #define DEFAULT_DATATYPE_NUM    1
 #define DEFAULT_CHILDTABLES     10000
+#define DEFAULT_TEST_MODE       0
+#define DEFAULT_METAFILE        NULL
+#define DEFAULT_SQLFILE         NULL
+#define DEFAULT_HOST            "localhost"
+#define DEFAULT_PORT            6030
+#define DEFAULT_IFACE           INTERFACE_BUT
+#define DEFAULT_DATABASE        "test"
+#define DEFAULT_REPLICA         1
+#define DEFAULT_TB_PREFIX       "d"
+#define DEFAULT_ESCAPE_CHAR     false
+#define DEFAULT_USE_METRIC      true
+#define DEFAULT_DROP_DB         true
+#define DEFAULT_AGGR_FUNC       false
+#define DEFAULT_DEBUG           false
+#define DEFAULT_VERBOSE         false
+#define DEFAULT_PERF_STAT       false
+#define DEFAULT_ANS_YES         false
+#define DEFAULT_OUTPUT          "./output.txt"
+#define DEFAULT_SYNC_MODE       0
+#define DEFAULT_DATA_TYPE       {TSDB_DATA_TYPE_FLOAT,TSDB_DATA_TYPE_INT,TSDB_DATA_TYPE_FLOAT}
+#define DEFAULT_DATATYPE        {"FLOAT","INT","FLOAT"}
+#define DEFAULT_BINWIDTH        64
+#define DEFAULT_COL_COUNT       4
+#define DEFAULT_LEN_ONE_ROW     76
+#define DEFAULT_INSERT_INTERVAL 0
+#define DEFAULT_QUERY_TIME      1
+#define DEFAULT_PREPARED_RAND   10000
+#define DEFAULT_REQ_PER_REQ     30000
+#define DEFAULT_INSERT_ROWS     10000
+#define DEFAULT_ABORT           0
+#define DEFAULT_RATIO           0
+#define DEFAULT_DISORDER_RANGE  1000
+#define DEFAULT_METHOD_DEL      1
+#define DEFAULT_TOTAL_INSERT    0
+#define DEFAULT_TOTAL_AFFECT    0
+#define DEFAULT_DEMO_MODE       true
+#define DEFAULT_CREATE_BATCH    10
+#define DEFAULT_SUB_INTERVAL    10000
+#define DEFAULT_QUERY_INTERVAL    10000

 #define STMT_BIND_PARAM_BATCH   1

@@ -147,6 +191,7 @@ enum enum_TAOS_INTERFACE {
    TAOSC_IFACE,
    REST_IFACE,
    STMT_IFACE,
+    SML_IFACE,
    INTERFACE_BUT
 };

@@ -245,6 +290,7 @@ typedef struct SArguments_S {
    uint64_t insert_interval;
    uint64_t timestamp_step;
    int64_t  query_times;
+    int64_t  prepared_rand;
    uint32_t interlaceRows;
    uint32_t reqPerReq;                  // num_of_records_per_req
    uint64_t max_sql_len;
@@ -366,7 +412,7 @@ typedef struct SDataBase_S {
    bool         drop;  // 0: use exists, 1: if exists, drop then new create
    SDbCfg       dbCfg;
    uint64_t     superTblCount;
-    SSuperTable  superTbls[MAX_SUPER_TABLE_COUNT];
+    SSuperTable*  superTbls;
 } SDataBase;

 typedef struct SDbs_S {
@@ -385,12 +431,11 @@ typedef struct SDbs_S {
    uint32_t    threadCount;
    uint32_t    threadCountForCreateTbl;
    uint32_t    dbCount;
-    SDataBase   db[MAX_DB_COUNT];
-
    // statistics
    uint64_t    totalInsertRows;
    uint64_t    totalAffectedRows;

+    SDataBase*  db;
 } SDbs;

 typedef struct SpecifiedQueryInfo_S {
@@ -466,7 +511,7 @@ typedef struct SThreadInfo_S {
    int       threadID;
    char      db_name[TSDB_DB_NAME_LEN];
    uint32_t  time_precision;
-    char      filePath[4096];
+    char      filePath[TSDB_FILENAME_LEN];
    FILE      *fp;
    char      tb_prefix[TSDB_TABLE_NAME_LEN];
    uint64_t  start_table_from;
@@ -504,6 +549,8 @@ typedef struct SThreadInfo_S {
    uint64_t  querySeq;   // sequence number of sql command
    TAOS_SUB*  tsub;

+    char**    lines;
+    int       sockfd;
 } threadInfo;

 #ifdef WINDOWS
@@ -583,8 +630,7 @@ static void prompt();
 static int createDatabasesAndStables();
 static void createChildTables();
 static int queryDbExec(TAOS *taos, char *command, QUERY_TYPE type, bool quiet);
-static int postProceSql(char *host, struct sockaddr_in *pServAddr,
-        uint16_t port, char* sqlstr, threadInfo *pThreadInfo);
+static int postProceSql(char *host, uint16_t port, char* sqlstr, threadInfo *pThreadInfo);
 static int64_t getTSRandTail(int64_t timeStampStep, int32_t seq,
        int disorderRatio, int disorderRange);
 static bool getInfoFromJsonFile(char* file);
@@ -593,12 +639,12 @@ static int regexMatch(const char *s, const char *reg, int cflags);

 /* ************ Global variables ************  */

-int32_t  g_randint[MAX_PREPARED_RAND];
-uint32_t  g_randuint[MAX_PREPARED_RAND];
-int64_t  g_randbigint[MAX_PREPARED_RAND];
-uint64_t  g_randubigint[MAX_PREPARED_RAND];
-float    g_randfloat[MAX_PREPARED_RAND];
-double   g_randdouble[MAX_PREPARED_RAND];
+int32_t*  g_randint;
+uint32_t*  g_randuint;
+int64_t*  g_randbigint;
+uint64_t*  g_randubigint;
+float*    g_randfloat;
+double*   g_randdouble;

 char    *g_randbool_buff = NULL;
 char    *g_randint_buff = NULL;
@@ -622,62 +668,49 @@ char    *g_aggreFunc[] = {"*", "count(*)", "avg(C0)", "sum(C0)",
    "max(C0)", "min(C0)", "first(C0)", "last(C0)"};

 SArguments g_args = {
-    NULL,           // metaFile
-    0,              // test_mode
-    "localhost",    // host
-    6030,           // port
-    INTERFACE_BUT,  // iface
-    "root",         // user
-#ifdef _TD_POWER_
-    "powerdb",      // password
-#elif (_TD_TQ_ == true)
-    "tqueue",       // password
-#elif (_TD_PRO_ == true)
-    "prodb",       // password
-#else
-    "taosdata",     // password
-#endif
-    "test",         // database
-    1,              // replica
-    "d",             // tb_prefix
-    false,           // escapeChar
-    NULL,            // sqlFile
-    true,            // use_metric
-    true,            // drop_database
-    false,           // aggr_func
-    false,           // debug_print
-    false,           // verbose_print
-    false,           // performance statistic print
-    false,           // answer_yes;
-    "./output.txt",  // output_file
-    0,               // mode : sync or async
-    {TSDB_DATA_TYPE_FLOAT,
-    TSDB_DATA_TYPE_INT,
-    TSDB_DATA_TYPE_FLOAT},
-    {
-        "FLOAT",         // dataType
-        "INT",           // dataType
-        "FLOAT",         // dataType. demo mode has 3 columns
-    },
-    64,              // binwidth
-    4,               // columnCount, timestamp + float + int + float
-    20 + FLOAT_BUFF_LEN + INT_BUFF_LEN + FLOAT_BUFF_LEN, // lenOfOneRow
-    DEFAULT_NTHREADS,// nthreads
-    0,               // insert_interval
+    DEFAULT_METAFILE,                      // metaFile
+    DEFAULT_TEST_MODE,                     // test_mode
+    DEFAULT_HOST,                          // host
+    DEFAULT_PORT,                          // port
+    DEFAULT_IFACE,                         // iface
+    TSDB_DEFAULT_USER,                     // user
+    TSDB_DEFAULT_PASS,                     // password
+    DEFAULT_DATABASE,                      // database
+    DEFAULT_REPLICA,                       // replica
+    DEFAULT_TB_PREFIX,                     // tb_prefix
+    DEFAULT_ESCAPE_CHAR,                   // escapeChar
+    DEFAULT_SQLFILE,                       // sqlFile
+    DEFAULT_USE_METRIC,                    // use_metric
+    DEFAULT_DROP_DB,                       // drop_database
+    DEFAULT_AGGR_FUNC,                     // aggr_func
+    DEFAULT_DEBUG,                         // debug_print
+    DEFAULT_VERBOSE,                       // verbose_print
+    DEFAULT_PERF_STAT,                     // performance statistic print
+    DEFAULT_ANS_YES,                       // answer_yes;
+    DEFAULT_OUTPUT,                        // output_file
+    DEFAULT_SYNC_MODE,                     // mode : sync or async
+    DEFAULT_DATA_TYPE,                     // data_type
+    DEFAULT_DATATYPE,                      // dataType
+    DEFAULT_BINWIDTH,                      // binwidth
+    DEFAULT_COL_COUNT,                     // columnCount, timestamp + float + int + float
+    DEFAULT_LEN_ONE_ROW,                   // lenOfOneRow
+    DEFAULT_NTHREADS,                      // nthreads
+    DEFAULT_INSERT_INTERVAL,               // insert_interval
    DEFAULT_TIMESTAMP_STEP,                // timestamp_step
-    1,               // query_times
+    DEFAULT_QUERY_TIME,                    // query_times
+    DEFAULT_PREPARED_RAND,                 // prepared_rand
    DEFAULT_INTERLACE_ROWS,                // interlaceRows;
-    30000,           // reqPerReq
-    (1024*1024),     // max_sql_len
+    DEFAULT_REQ_PER_REQ,                   // reqPerReq
+    TSDB_MAX_ALLOWED_SQL_LEN,              // max_sql_len
    DEFAULT_CHILDTABLES,                   // ntables
-    10000,           // insertRows
-    0,               // abort
-    0,               // disorderRatio
-    1000,            // disorderRange
-    1,               // method_of_delete
-    0,               // totalInsertRows;
-    0,               // totalAffectedRows;
-    true,            // demo_mode;
+    DEFAULT_INSERT_ROWS,                   // insertRows
+    DEFAULT_ABORT,                         // abort
+    DEFAULT_RATIO,                         // disorderRatio
+    DEFAULT_DISORDER_RANGE,                // disorderRange
+    DEFAULT_METHOD_DEL,                    // method_of_delete
+    DEFAULT_TOTAL_INSERT,                  // totalInsertRows;
+    DEFAULT_TOTAL_AFFECT,                  // totalAffectedRows;
+    DEFAULT_DEMO_MODE,                     // demo_mode;
 };

 static SDbs            g_Dbs;
@@ -732,7 +765,7 @@ static FILE *          g_fpOfInsertResult = NULL;

 ///////////////////////////////////////////////////

-static void ERROR_EXIT(const char *msg) { errorPrint("%s", msg); exit(-1); }
+static void ERROR_EXIT(const char *msg) { errorPrint("%s", msg); exit(EXIT_FAILURE); }

 #ifndef TAOSDEMO_COMMIT_SHA1
 #define TAOSDEMO_COMMIT_SHA1 "unknown"
@@ -800,6 +833,8 @@ static void printHelp() {
            "Set the replica parameters of the database, By default use 1, min: 1, max: 3.");
    printf("%s%s%s%s\n", indent, "-m, --table-prefix=TABLEPREFIX", "\t",
            "Table prefix name. By default use 'd'.");
+    printf("%s%s%s%s\n", indent, "-E, --escape-character", "\t",
+            "Use escape character for Both Stable and normmal table name");
    printf("%s%s%s%s\n", indent, "-s, --sql-file=FILE", "\t\t",
            "The select sql file.");
    printf("%s%s%s%s\n", indent, "-N, --normal-table", "\t\t", "Use normal table flag.");
@@ -1052,6 +1087,8 @@ static void parse_args(int argc, char *argv[], SArguments *arguments) {
                    arguments->iface = REST_IFACE;
                } else if (0 == strcasecmp(argv[i+1], "stmt")) {
                    arguments->iface = STMT_IFACE;
+                } else if (0 == strcasecmp(argv[i+1], "sml")) {
+                    arguments->iface = SML_IFACE;
                } else {
                    errorWrongValue(argv[0], "-I", argv[i+1]);
                    exit(EXIT_FAILURE);
@@ -1064,6 +1101,8 @@ static void parse_args(int argc, char *argv[], SArguments *arguments) {
                    arguments->iface = REST_IFACE;
                } else if (0 == strcasecmp((char *)(argv[i] + strlen("--interface=")), "stmt")) {
                    arguments->iface = STMT_IFACE;
+                } else if (0 == strcasecmp((char *)(argv[i] + strlen("--interface=")), "sml")) {
+                    arguments->iface = SML_IFACE;
                } else {
                    errorPrintReqArg3(argv[0], "--interface");
                    exit(EXIT_FAILURE);
@@ -1075,6 +1114,8 @@ static void parse_args(int argc, char *argv[], SArguments *arguments) {
                    arguments->iface = REST_IFACE;
                } else if (0 == strcasecmp((char *)(argv[i] + strlen("-I")), "stmt")) {
                    arguments->iface = STMT_IFACE;
+                } else if (0 == strcasecmp((char *)(argv[i] + strlen("-I")), "sml")) {
+                    arguments->iface = SML_IFACE;
                } else {
                    errorWrongValue(argv[0], "-I",
                            (char *)(argv[i] + strlen("-I")));
@@ -1091,6 +1132,8 @@ static void parse_args(int argc, char *argv[], SArguments *arguments) {
                    arguments->iface = REST_IFACE;
                } else if (0 == strcasecmp(argv[i+1], "stmt")) {
                    arguments->iface = STMT_IFACE;
+                } else if (0 == strcasecmp(argv[i+1], "sml")) {
+                    arguments->iface = SML_IFACE;
                } else {
                    errorWrongValue(argv[0], "--interface", argv[i+1]);
                    exit(EXIT_FAILURE);
@@ -1893,12 +1936,6 @@ static void parse_args(int argc, char *argv[], SArguments *arguments) {
                        arguments->disorderRatio, 50);
                arguments->disorderRatio = 50;
            }
-
-            if (arguments->disorderRatio < 0) {
-                errorPrint("Invalid disorder ratio %d, will be set to %d\n",
-                        arguments->disorderRatio, 0);
-                arguments->disorderRatio = 0;
-            }
        } else if ((0 == strncmp(argv[i], "-a", strlen("-a")))
                || (0 == strncmp(argv[i], "--replica",
                        strlen("--replica")))) {
@@ -2003,7 +2040,7 @@ static void parse_args(int argc, char *argv[], SArguments *arguments) {
    }
    g_args.columnCount = columnCount;

-    g_args.lenOfOneRow = 20; // timestamp
+    g_args.lenOfOneRow = TIMESTAMP_BUFF_LEN; // timestamp
    for (int c = 0; c < g_args.columnCount; c++) {
        switch(g_args.data_type[c]) {
            case TSDB_DATA_TYPE_BINARY:
@@ -2110,7 +2147,7 @@ static void tmfclose(FILE *fp) {
    }
 }

-static void tmfree(char *buf) {
+static void tmfree(void *buf) {
    if (NULL != buf) {
        free(buf);
        buf = NULL;
@@ -2165,7 +2202,7 @@ static void fetchResult(TAOS_RES *res, threadInfo* pThreadInfo) {
    int         num_fields = taos_field_count(res);
    TAOS_FIELD *fields     = taos_fetch_fields(res);

-    char* databuf = (char*) calloc(1, 100*1024*1024);
+    char* databuf = (char*) calloc(1, FETCH_BUFFER_SIZE);
    if (databuf == NULL) {
        errorPrint2("%s() LN%d, failed to malloc, warning: save result to file slowly!\n",
                __func__, __LINE__);
@@ -2176,11 +2213,11 @@ static void fetchResult(TAOS_RES *res, threadInfo* pThreadInfo) {

    // fetch the records row by row
    while((row = taos_fetch_row(res))) {
-        if (totalLen >= (100*1024*1024 - HEAD_BUFF_LEN*2)) {
+        if (totalLen >= (FETCH_BUFFER_SIZE - HEAD_BUFF_LEN*2)) {
            if (strlen(pThreadInfo->filePath) > 0)
                appendResultBufToFile(databuf, pThreadInfo);
            totalLen = 0;
-            memset(databuf, 0, 100*1024*1024);
+            memset(databuf, 0, FETCH_BUFFER_SIZE);
        }
        num_rows++;
        char  temp[HEAD_BUFF_LEN] = {0};
@@ -2218,7 +2255,7 @@ static void selectAndGetResult(

    } else if (0 == strncasecmp(g_queryInfo.queryMode, "rest", strlen("rest"))) {
        int retCode = postProceSql(
-                g_queryInfo.host, &(g_queryInfo.serv_addr), g_queryInfo.port,
+                g_queryInfo.host, g_queryInfo.port,
                command,
                pThreadInfo);
        if (0 != retCode) {
@@ -2234,157 +2271,157 @@ static void selectAndGetResult(
 static char *rand_bool_str() {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
-    return g_randbool_buff + ((cursor % MAX_PREPARED_RAND) * BOOL_BUFF_LEN);
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
+    return g_randbool_buff + ((cursor % g_args.prepared_rand) * BOOL_BUFF_LEN);
 }

 static int32_t rand_bool() {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
-    return g_randint[cursor % MAX_PREPARED_RAND] % 2;
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
+    return g_randint[cursor % g_args.prepared_rand] % TSDB_DATA_BOOL_NULL;
 }

 static char *rand_tinyint_str()
 {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
    return g_randtinyint_buff +
-        ((cursor % MAX_PREPARED_RAND) * TINYINT_BUFF_LEN);
+        ((cursor % g_args.prepared_rand) * TINYINT_BUFF_LEN);
 }

 static int32_t rand_tinyint()
 {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
-    return g_randint[cursor % MAX_PREPARED_RAND] % 128;
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
+    return g_randint[cursor % g_args.prepared_rand] % TSDB_DATA_TINYINT_NULL;
 }

 static char *rand_utinyint_str()
 {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
    return g_randutinyint_buff +
-        ((cursor % MAX_PREPARED_RAND) * TINYINT_BUFF_LEN);
+        ((cursor % g_args.prepared_rand) * TINYINT_BUFF_LEN);
 }

 static int32_t rand_utinyint()
 {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
-    return g_randuint[cursor % MAX_PREPARED_RAND] % 255;
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
+    return g_randuint[cursor % g_args.prepared_rand] % TSDB_DATA_UTINYINT_NULL;
 }

 static char *rand_smallint_str()
 {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
    return g_randsmallint_buff +
-        ((cursor % MAX_PREPARED_RAND) * SMALLINT_BUFF_LEN);
+        ((cursor % g_args.prepared_rand) * SMALLINT_BUFF_LEN);
 }

 static int32_t rand_smallint()
 {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
-    return g_randint[cursor % MAX_PREPARED_RAND] % 32768;
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
+    return g_randint[cursor % g_args.prepared_rand] % TSDB_DATA_SMALLINT_NULL;
 }

 static char *rand_usmallint_str()
 {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
    return g_randusmallint_buff +
-        ((cursor % MAX_PREPARED_RAND) * SMALLINT_BUFF_LEN);
+        ((cursor % g_args.prepared_rand) * SMALLINT_BUFF_LEN);
 }

 static int32_t rand_usmallint()
 {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
-    return g_randuint[cursor % MAX_PREPARED_RAND] % 65535;
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
+    return g_randuint[cursor % g_args.prepared_rand] % TSDB_DATA_USMALLINT_NULL;
 }

 static char *rand_int_str()
 {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
-    return g_randint_buff + ((cursor % MAX_PREPARED_RAND) * INT_BUFF_LEN);
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
+    return g_randint_buff + ((cursor % g_args.prepared_rand) * INT_BUFF_LEN);
 }

 static int32_t rand_int()
 {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
-    return g_randint[cursor % MAX_PREPARED_RAND];
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
+    return g_randint[cursor % g_args.prepared_rand];
 }

 static char *rand_uint_str()
 {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
-    return g_randuint_buff + ((cursor % MAX_PREPARED_RAND) * INT_BUFF_LEN);
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
+    return g_randuint_buff + ((cursor % g_args.prepared_rand) * INT_BUFF_LEN);
 }

 static int32_t rand_uint()
 {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
-    return g_randuint[cursor % MAX_PREPARED_RAND];
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
+    return g_randuint[cursor % g_args.prepared_rand];
 }

 static char *rand_bigint_str()
 {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
    return g_randbigint_buff +
-        ((cursor % MAX_PREPARED_RAND) * BIGINT_BUFF_LEN);
+        ((cursor % g_args.prepared_rand) * BIGINT_BUFF_LEN);
 }

 static int64_t rand_bigint()
 {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
-    return g_randbigint[cursor % MAX_PREPARED_RAND];
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
+    return g_randbigint[cursor % g_args.prepared_rand];
 }

 static char *rand_ubigint_str()
 {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
    return g_randubigint_buff +
-        ((cursor % MAX_PREPARED_RAND) * BIGINT_BUFF_LEN);
+        ((cursor % g_args.prepared_rand) * BIGINT_BUFF_LEN);
 }

 static int64_t rand_ubigint()
 {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
-    return g_randubigint[cursor % MAX_PREPARED_RAND];
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
+    return g_randubigint[cursor % g_args.prepared_rand];
 }

 static char *rand_float_str()
 {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
-    return g_randfloat_buff + ((cursor % MAX_PREPARED_RAND) * FLOAT_BUFF_LEN);
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
+    return g_randfloat_buff + ((cursor % g_args.prepared_rand) * FLOAT_BUFF_LEN);
 }


@@ -2392,58 +2429,58 @@ static float rand_float()
 {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
-    return g_randfloat[cursor % MAX_PREPARED_RAND];
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
+    return g_randfloat[cursor % g_args.prepared_rand];
 }

 static char *demo_current_float_str()
 {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
    return g_rand_current_buff +
-        ((cursor % MAX_PREPARED_RAND) * FLOAT_BUFF_LEN);
+        ((cursor % g_args.prepared_rand) * FLOAT_BUFF_LEN);
 }

 static float UNUSED_FUNC demo_current_float()
 {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
-    return (float)(9.8 + 0.04 * (g_randint[cursor % MAX_PREPARED_RAND] % 10)
-            + g_randfloat[cursor % MAX_PREPARED_RAND]/1000000000);
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
+    return (float)(9.8 + 0.04 * (g_randint[cursor % g_args.prepared_rand] % 10)
+            + g_randfloat[cursor % g_args.prepared_rand]/1000000000);
 }

 static char *demo_voltage_int_str()
 {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
    return g_rand_voltage_buff +
-        ((cursor % MAX_PREPARED_RAND) * INT_BUFF_LEN);
+        ((cursor % g_args.prepared_rand) * INT_BUFF_LEN);
 }

 static int32_t UNUSED_FUNC demo_voltage_int()
 {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
-    return 215 + g_randint[cursor % MAX_PREPARED_RAND] % 10;
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
+    return 215 + g_randint[cursor % g_args.prepared_rand] % 10;
 }

 static char *demo_phase_float_str() {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
-    return g_rand_phase_buff + ((cursor % MAX_PREPARED_RAND) * FLOAT_BUFF_LEN);
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
+    return g_rand_phase_buff + ((cursor % g_args.prepared_rand) * FLOAT_BUFF_LEN);
 }

 static float UNUSED_FUNC demo_phase_float() {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
-    return (float)((115 + g_randint[cursor % MAX_PREPARED_RAND] % 10
-                + g_randfloat[cursor % MAX_PREPARED_RAND]/1000000000)/360);
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
+    return (float)((115 + g_randint[cursor % g_args.prepared_rand] % 10
+                + g_randfloat[cursor % g_args.prepared_rand]/1000000000)/360);
 }

 #if 0
@@ -2471,7 +2508,7 @@ static void rand_string(char *str, int size) {
        //--size;
        int n;
        for (n = 0; n < size; n++) {
-            int key = abs(rand_tinyint()) % (int)(sizeof(charset) - 1);
+            int key = abs(taosRandom()) % (int)(sizeof(charset) - 1);
            str[n] = charset[key];
        }
        str[n] = 0;
@@ -2482,7 +2519,7 @@ static char *rand_double_str()
 {
    static int cursor;
    cursor++;
-    if (cursor > (MAX_PREPARED_RAND - 1)) cursor = 0;
+    if (cursor > (g_args.prepared_rand - 1)) cursor = 0;
    return g_randdouble_buff + (cursor * DOUBLE_BUFF_LEN);
 }

@@ -2490,42 +2527,54 @@ static double rand_double()
 {
    static int cursor;
    cursor++;
-    cursor = cursor % MAX_PREPARED_RAND;
+    cursor = cursor % g_args.prepared_rand;
    return g_randdouble[cursor];
 }

 static void init_rand_data() {

-    g_randint_buff = calloc(1, INT_BUFF_LEN * MAX_PREPARED_RAND);
+    g_randint_buff = calloc(1, INT_BUFF_LEN * g_args.prepared_rand);
    assert(g_randint_buff);
-    g_rand_voltage_buff = calloc(1, INT_BUFF_LEN * MAX_PREPARED_RAND);
+    g_rand_voltage_buff = calloc(1, INT_BUFF_LEN * g_args.prepared_rand);
    assert(g_rand_voltage_buff);
-    g_randbigint_buff = calloc(1, BIGINT_BUFF_LEN * MAX_PREPARED_RAND);
+    g_randbigint_buff = calloc(1, BIGINT_BUFF_LEN * g_args.prepared_rand);
    assert(g_randbigint_buff);
-    g_randsmallint_buff = calloc(1, SMALLINT_BUFF_LEN * MAX_PREPARED_RAND);
+    g_randsmallint_buff = calloc(1, SMALLINT_BUFF_LEN * g_args.prepared_rand);
    assert(g_randsmallint_buff);
-    g_randtinyint_buff = calloc(1, TINYINT_BUFF_LEN * MAX_PREPARED_RAND);
+    g_randtinyint_buff = calloc(1, TINYINT_BUFF_LEN * g_args.prepared_rand);
    assert(g_randtinyint_buff);
-    g_randbool_buff = calloc(1, BOOL_BUFF_LEN * MAX_PREPARED_RAND);
+    g_randbool_buff = calloc(1, BOOL_BUFF_LEN * g_args.prepared_rand);
    assert(g_randbool_buff);
-    g_randfloat_buff = calloc(1, FLOAT_BUFF_LEN * MAX_PREPARED_RAND);
+    g_randfloat_buff = calloc(1, FLOAT_BUFF_LEN * g_args.prepared_rand);
    assert(g_randfloat_buff);
-    g_rand_current_buff = calloc(1, FLOAT_BUFF_LEN * MAX_PREPARED_RAND);
+    g_rand_current_buff = calloc(1, FLOAT_BUFF_LEN * g_args.prepared_rand);
    assert(g_rand_current_buff);
-    g_rand_phase_buff = calloc(1, FLOAT_BUFF_LEN * MAX_PREPARED_RAND);
+    g_rand_phase_buff = calloc(1, FLOAT_BUFF_LEN * g_args.prepared_rand);
    assert(g_rand_phase_buff);
-    g_randdouble_buff = calloc(1, DOUBLE_BUFF_LEN * MAX_PREPARED_RAND);
+    g_randdouble_buff = calloc(1, DOUBLE_BUFF_LEN * g_args.prepared_rand);
    assert(g_randdouble_buff);
-    g_randuint_buff = calloc(1, INT_BUFF_LEN * MAX_PREPARED_RAND);
+    g_randuint_buff = calloc(1, INT_BUFF_LEN * g_args.prepared_rand);
    assert(g_randuint_buff);
-    g_randutinyint_buff = calloc(1, TINYINT_BUFF_LEN * MAX_PREPARED_RAND);
+    g_randutinyint_buff = calloc(1, TINYINT_BUFF_LEN * g_args.prepared_rand);
    assert(g_randutinyint_buff);
-    g_randusmallint_buff = calloc(1, SMALLINT_BUFF_LEN * MAX_PREPARED_RAND);
+    g_randusmallint_buff = calloc(1, SMALLINT_BUFF_LEN * g_args.prepared_rand);
    assert(g_randusmallint_buff);
-    g_randubigint_buff = calloc(1, BIGINT_BUFF_LEN * MAX_PREPARED_RAND);
+    g_randubigint_buff = calloc(1, BIGINT_BUFF_LEN * g_args.prepared_rand);
    assert(g_randubigint_buff);
-
-    for (int i = 0; i < MAX_PREPARED_RAND; i++) {
+    g_randint = calloc(1, sizeof(int32_t) * g_args.prepared_rand);
+    assert(g_randint);
+    g_randuint = calloc(1, sizeof(uint32_t) * g_args.prepared_rand);
+    assert(g_randuint);
+    g_randbigint = calloc(1, sizeof(int64_t) * g_args.prepared_rand);
+    assert(g_randbigint);
+    g_randubigint = calloc(1, sizeof(uint64_t) * g_args.prepared_rand);
+    assert(g_randubigint);
+    g_randfloat = calloc(1, sizeof(float) * g_args.prepared_rand);
+    assert(g_randfloat);
+    g_randdouble = calloc(1, sizeof(double) * g_args.prepared_rand);
+    assert(g_randdouble);
+
+    for (int i = 0; i < g_args.prepared_rand; i++) {
        g_randint[i] = (int)(taosRandom() % RAND_MAX - (RAND_MAX >> 1));
        g_randuint[i] = (int)(taosRandom());
        sprintf(g_randint_buff + i * INT_BUFF_LEN, "%d",
@@ -2602,7 +2651,8 @@ static int printfInsertMeta() {
        // first time if no iface specified
        printf("interface:                  \033[33m%s\033[0m\n",
                (g_args.iface==TAOSC_IFACE)?"taosc":
-                (g_args.iface==REST_IFACE)?"rest":"stmt");
+                (g_args.iface==REST_IFACE)?"rest":
+                (g_args.iface==STMT_IFACE)?"stmt":"sml");
    }

    printf("host:                       \033[33m%s:%u\033[0m\n",
@@ -2728,7 +2778,8 @@ static int printfInsertMeta() {
                        g_Dbs.db[i].superTbls[j].dataSource);
                printf("      iface:             \033[33m%s\033[0m\n",
                        (g_Dbs.db[i].superTbls[j].iface==TAOSC_IFACE)?"taosc":
-                        (g_Dbs.db[i].superTbls[j].iface==REST_IFACE)?"rest":"stmt");
+                        (g_Dbs.db[i].superTbls[j].iface==REST_IFACE)?"rest":
+                        (g_Dbs.db[i].superTbls[j].iface==STMT_IFACE)?"stmt":"sml");
                if (g_Dbs.db[i].superTbls[j].childTblLimit > 0) {
                    printf("      childTblLimit:     \033[33m%"PRId64"\033[0m\n",
                            g_Dbs.db[i].superTbls[j].childTblLimit);
@@ -2927,7 +2978,8 @@ static void printfInsertMetaToFile(FILE* fp) {
                    g_Dbs.db[i].superTbls[j].dataSource);
            fprintf(fp, "      iface:             %s\n",
                    (g_Dbs.db[i].superTbls[j].iface==TAOSC_IFACE)?"taosc":
-                    (g_Dbs.db[i].superTbls[j].iface==REST_IFACE)?"rest":"stmt");
+                    (g_Dbs.db[i].superTbls[j].iface==REST_IFACE)?"rest":
+                    (g_Dbs.db[i].superTbls[j].iface==STMT_IFACE)?"stmt":"sml");
            fprintf(fp, "      insertRows:        %"PRId64"\n",
                    g_Dbs.db[i].superTbls[j].insertRows);
            fprintf(fp, "      interlace rows:    %u\n",
@@ -3344,7 +3396,7 @@ static void printfDbInfoForQueryToFile(

 static void printfQuerySystemInfo(TAOS * taos) {
    char filename[MAX_FILE_NAME_LEN] = {0};
-    char buffer[1024] = {0};
+    char buffer[SQL_BUFF_LEN] = {0};
    TAOS_RES* res;

    time_t t;
@@ -3383,12 +3435,12 @@ static void printfQuerySystemInfo(TAOS * taos) {
        printfDbInfoForQueryToFile(filename, dbInfos[i], i);

        // show db.vgroups
-        snprintf(buffer, 1024, "show %s.vgroups;", dbInfos[i]->name);
+        snprintf(buffer, SQL_BUFF_LEN, "show %s.vgroups;", dbInfos[i]->name);
        res = taos_query(taos, buffer);
        xDumpResultToFile(filename, res);

        // show db.stables
-        snprintf(buffer, 1024, "show %s.stables;", dbInfos[i]->name);
+        snprintf(buffer, SQL_BUFF_LEN, "show %s.stables;", dbInfos[i]->name);
        res = taos_query(taos, buffer);
        xDumpResultToFile(filename, res);
        free(dbInfos[i]);
@@ -3397,7 +3449,7 @@ static void printfQuerySystemInfo(TAOS * taos) {
    free(dbInfos);
 }

-static int postProceSql(char *host, struct sockaddr_in *pServAddr, uint16_t port,
+static int postProceSql(char *host, uint16_t port,
        char* sqlstr, threadInfo *pThreadInfo)
 {
    char *req_fmt = "POST %s HTTP/1.1\r\nHost: %s:%d\r\nAccept: */*\r\nAuthorization: Basic %s\r\nContent-Length: %d\r\nContent-Type: application/x-www-form-urlencoded\r\n\r\n%s";
@@ -3435,29 +3487,6 @@ static int postProceSql(char *host, struct sockaddr_in *pServAddr, uint16_t port
    size_t encoded_len = 4 * ((userpass_buf_len +2) / 3);

    char base64_buf[INPUT_BUF_LEN];
-#ifdef WINDOWS
-    WSADATA wsaData;
-    WSAStartup(MAKEWORD(2, 2), &wsaData);
-    SOCKET sockfd;
-#else
-    int sockfd;
-#endif
-    sockfd = socket(AF_INET, SOCK_STREAM, 0);
-    if (sockfd < 0) {
-#ifdef WINDOWS
-        errorPrint( "Could not create socket : %d" , WSAGetLastError());
-#endif
-        debugPrint("%s() LN%d, sockfd=%d\n", __func__, __LINE__, sockfd);
-        free(request_buf);
-        ERROR_EXIT("opening socket");
-    }
-
-    int retConn = connect(sockfd, (struct sockaddr *)pServAddr, sizeof(struct sockaddr));
-    debugPrint("%s() LN%d connect() return %d\n", __func__, __LINE__, retConn);
-    if (retConn < 0) {
-        free(request_buf);
-        ERROR_EXIT("connecting");
-    }

    memset(base64_buf, 0, INPUT_BUF_LEN);

@@ -3497,9 +3526,9 @@ static int postProceSql(char *host, struct sockaddr_in *pServAddr, uint16_t port
    sent = 0;
    do {
 #ifdef WINDOWS
-        bytes = send(sockfd, request_buf + sent, req_str_len - sent, 0);
+        bytes = send(pThreadInfo->sockfd, request_buf + sent, req_str_len - sent, 0);
 #else
-        bytes = write(sockfd, request_buf + sent, req_str_len - sent);
+        bytes = write(pThreadInfo->sockfd, request_buf + sent, req_str_len - sent);
 #endif
        if (bytes < 0)
            ERROR_EXIT("writing message to socket");
@@ -3511,12 +3540,18 @@ static int postProceSql(char *host, struct sockaddr_in *pServAddr, uint16_t port
    memset(response_buf, 0, RESP_BUF_LEN);
    resp_len = sizeof(response_buf) - 1;
    received = 0;
+
+    char resEncodingChunk[] = "Encoding: chunked";
+    char resHttp[] = "HTTP/1.1 ";
+    char resHttpOk[] = "HTTP/1.1 200 OK";
+
    do {
 #ifdef WINDOWS
-        bytes = recv(sockfd, response_buf + received, resp_len - received, 0);
+        bytes = recv(pThreadInfo->sockfds, response_buf + received, resp_len - received, 0);
 #else
-        bytes = read(sockfd, response_buf + received, resp_len - received);
+        bytes = read(pThreadInfo->sockfd, response_buf + received, resp_len - received);
 #endif
+        verbosePrint("%s() LN%d: bytes:%d\n", __func__, __LINE__, bytes);
        if (bytes < 0) {
            free(request_buf);
            ERROR_EXIT("reading response from socket");
@@ -3524,6 +3559,19 @@ static int postProceSql(char *host, struct sockaddr_in *pServAddr, uint16_t port
        if (bytes == 0)
            break;
        received += bytes;
+
+        verbosePrint("%s() LN%d: received:%d resp_len:%d, response_buf:\n%s\n",
+                __func__, __LINE__, received, resp_len, response_buf);
+
+        if (((NULL != strstr(response_buf, resEncodingChunk))
+                    && (NULL != strstr(response_buf, resHttp)))
+                || ((NULL != strstr(response_buf, resHttpOk))
+                    && (NULL != strstr(response_buf, "\"status\":")))) {
+            debugPrint(
+                    "%s() LN%d: received:%d resp_len:%d, response_buf:\n%s\n",
+                    __func__, __LINE__, received, resp_len, response_buf);
+            break;
+        } 
    } while(received < resp_len);

    if (received == resp_len) {
@@ -3532,20 +3580,18 @@ static int postProceSql(char *host, struct sockaddr_in *pServAddr, uint16_t port
    }

    response_buf[RESP_BUF_LEN - 1] = '\0';
-    printf("Response:\n%s\n", response_buf);

    if (strlen(pThreadInfo->filePath) > 0) {
        appendResultBufToFile(response_buf, pThreadInfo);
    }

    free(request_buf);
-#ifdef WINDOWS
-    closesocket(sockfd);
-    WSACleanup();
-#else
-    close(sockfd);
-#endif

+    if (NULL == strstr(response_buf, resHttpOk)) {
+        errorPrint("%s() LN%d, Response:\n%s\n",
+                __func__, __LINE__, response_buf);
+        return -1;
+    }
    return 0;
 }

@@ -3737,7 +3783,7 @@ static int calcRowLen(SSuperTable*  superTbls) {
        }
    }

-    superTbls->lenOfOneRow = lenOfOneRow + 20; // timestamp
+    superTbls->lenOfOneRow = lenOfOneRow + TIMESTAMP_BUFF_LEN; // timestamp

    int tagIndex;
    int lenOfTagOfOneRow = 0;
@@ -3789,9 +3835,9 @@ static int calcRowLen(SSuperTable*  superTbls) {

 static int getChildNameOfSuperTableWithLimitAndOffset(TAOS * taos,
        char* dbName, char* stbName, char** childTblNameOfSuperTbl,
-        int64_t* childTblCountOfSuperTbl, int64_t limit, uint64_t offset) {
+        int64_t* childTblCountOfSuperTbl, int64_t limit, uint64_t offset, bool escapChar) {

-    char command[1024] = "\0";
+    char command[SQL_BUFF_LEN] = "\0";
    char limitBuf[100] = "\0";

    TAOS_RES * res;
@@ -3803,20 +3849,20 @@ static int getChildNameOfSuperTableWithLimitAndOffset(TAOS * taos,
        limit, offset);

    //get all child table name use cmd: select tbname from superTblName;
-    snprintf(command, 1024, "select tbname from %s.%s %s",
-            dbName, stbName, limitBuf);
+    snprintf(command, SQL_BUFF_LEN, escapChar ? "select tbname from %s.`%s` %s" :
+            "select tbname from %s.%s %s", dbName, stbName, limitBuf);

    res = taos_query(taos, command);
    int32_t code = taos_errno(res);
    if (code != 0) {
        taos_free_result(res);
        taos_close(taos);
-        errorPrint2("%s() LN%d, failed to run command %s\n",
-                __func__, __LINE__, command);
+        errorPrint2("%s() LN%d, failed to run command %s, reason: %s\n",
+                __func__, __LINE__, command, taos_errstr(res));
        exit(EXIT_FAILURE);
    }

-    int64_t childTblCount = (limit < 0)?10000:limit;
+    int64_t childTblCount = (limit < 0)?DEFAULT_CHILDTABLES:limit;
    int64_t count = 0;
    if (childTblName == NULL) {
        childTblName = (char*)calloc(1, childTblCount * TSDB_TABLE_NAME_LEN);
@@ -3875,23 +3921,23 @@ static int getAllChildNameOfSuperTable(TAOS * taos, char* dbName,

    return getChildNameOfSuperTableWithLimitAndOffset(taos, dbName, stbName,
            childTblNameOfSuperTbl, childTblCountOfSuperTbl,
-            -1, 0);
+            -1, 0, false);
 }

 static int getSuperTableFromServer(TAOS * taos, char* dbName,
        SSuperTable*  superTbls) {

-    char command[1024] = "\0";
+    char command[SQL_BUFF_LEN] = "\0";
    TAOS_RES * res;
    TAOS_ROW row = NULL;
    int count = 0;

    //get schema use cmd: describe superTblName;
-    snprintf(command, 1024, "describe %s.%s", dbName, superTbls->stbName);
+    snprintf(command, SQL_BUFF_LEN, "describe %s.%s", dbName, superTbls->stbName);
    res = taos_query(taos, command);
    int32_t code = taos_errno(res);
    if (code != 0) {
-        printf("failed to run command %s\n", command);
+        printf("failed to run command %s, reason: %s\n", command, taos_errstr(res));
        taos_free_result(res);
        return -1;
    }
@@ -4217,10 +4263,10 @@ static int createSuperTable(
        }
    }

-    superTbl->lenOfOneRow = lenOfOneRow + 20; // timestamp
+    superTbl->lenOfOneRow = lenOfOneRow + TIMESTAMP_BUFF_LEN; // timestamp

    // save for creating child table
-    superTbl->colsOfCreateChildTable = (char*)calloc(len+20, 1);
+    superTbl->colsOfCreateChildTable = (char*)calloc(len+TIMESTAMP_BUFF_LEN, 1);
    if (NULL == superTbl->colsOfCreateChildTable) {
        taos_close(taos);
        free(command);
@@ -4229,7 +4275,7 @@ static int createSuperTable(
        exit(EXIT_FAILURE);
    }

-    snprintf(superTbl->colsOfCreateChildTable, len+20, "(ts timestamp%s)", cols);
+    snprintf(superTbl->colsOfCreateChildTable, len+TIMESTAMP_BUFF_LEN, "(ts timestamp%s)", cols);
    verbosePrint("%s() LN%d: %s\n",
            __func__, __LINE__, superTbl->colsOfCreateChildTable);

@@ -4332,7 +4378,10 @@ static int createSuperTable(

    superTbl->lenOfTagOfOneRow = lenOfTagOfOneRow;

+    
    snprintf(command, BUFFER_SIZE,
+        superTbl->escapeChar ?
+        "CREATE TABLE IF NOT EXISTS %s.`%s` (ts TIMESTAMP%s) TAGS %s":
        "CREATE TABLE IF NOT EXISTS %s.%s (ts TIMESTAMP%s) TAGS %s",
        dbName, superTbl->stbName, cols, tags);
    if (0 != queryDbExec(taos, command, NO_INSERT_TYPE, false)) {
@@ -4461,6 +4510,10 @@ int createDatabasesAndStables(char *command) {
        int validStbCount = 0;

        for (uint64_t j = 0; j < g_Dbs.db[i].superTblCount; j++) {
+            if (g_Dbs.db[i].superTbls[j].iface == SML_IFACE) {
+                goto skip;
+            }
+            
            sprintf(command, "describe %s.%s;", g_Dbs.db[i].dbName,
                    g_Dbs.db[i].superTbls[j].stbName);
            ret = queryDbExec(taos, command, NO_INSERT_TYPE, true);
@@ -4482,6 +4535,7 @@ int createDatabasesAndStables(char *command) {
                    continue;
                }
            }
+            skip:
            validStbCount ++;
        }
        g_Dbs.db[i].superTblCount = validStbCount;
@@ -4568,7 +4622,7 @@ static void* createTable(void *sarg)
                batchNum++;
                if ((batchNum < stbInfo->batchCreateTableNum)
                        && ((buff_len - len)
-                            >= (stbInfo->lenOfTagOfOneRow + 256))) {
+                            >= (stbInfo->lenOfTagOfOneRow + EXTRA_SQL_LEN))) {
                    continue;
                }
            }
@@ -4583,9 +4637,8 @@ static void* createTable(void *sarg)
            return NULL;
        }
        pThreadInfo->tables_created += batchNum;
-
        uint64_t currentPrintTime = taosGetTimestampMs();
-        if (currentPrintTime - lastPrintTime > 30*1000) {
+        if (currentPrintTime - lastPrintTime > PRINT_STAT_INTERVAL) {
            printf("thread[%d] already create %"PRIu64" - %"PRIu64" tables\n",
                    pThreadInfo->threadID, pThreadInfo->start_table_from, i);
            lastPrintTime = currentPrintTime;
@@ -4597,8 +4650,8 @@ static void* createTable(void *sarg)
                    NO_INSERT_TYPE, false)) {
            errorPrint2("queryDbExec() failed. buffer:\n%s\n", pThreadInfo->buffer);
        }
+        pThreadInfo->tables_created += batchNum;
    }
-
    free(pThreadInfo->buffer);
    return NULL;
 }
@@ -4757,7 +4810,7 @@ static int readTagFromCsvFileToMem(SSuperTable  * stbInfo) {
        stbInfo->tagDataBuf = NULL;
    }

-    int tagCount = 10000;
+    int tagCount = MAX_SAMPLES;
    int count = 0;
    char* tagDataBuf = calloc(1, stbInfo->lenOfTagOfOneRow * tagCount);
    if (tagDataBuf == NULL) {
@@ -5163,35 +5216,35 @@ static bool getMetaFromInsertJsonFile(cJSON* root) {
    if (port && port->type == cJSON_Number) {
        g_Dbs.port = port->valueint;
    } else if (!port) {
-        g_Dbs.port = 6030;
+        g_Dbs.port = DEFAULT_PORT;
    }

    cJSON* user = cJSON_GetObjectItem(root, "user");
    if (user && user->type == cJSON_String && user->valuestring != NULL) {
        tstrncpy(g_Dbs.user, user->valuestring, MAX_USERNAME_SIZE);
    } else if (!user) {
-        tstrncpy(g_Dbs.user, "root", MAX_USERNAME_SIZE);
+        tstrncpy(g_Dbs.user, TSDB_DEFAULT_USER, MAX_USERNAME_SIZE);
    }

    cJSON* password = cJSON_GetObjectItem(root, "password");
    if (password && password->type == cJSON_String && password->valuestring != NULL) {
        tstrncpy(g_Dbs.password, password->valuestring, SHELL_MAX_PASSWORD_LEN);
    } else if (!password) {
-        tstrncpy(g_Dbs.password, "taosdata", SHELL_MAX_PASSWORD_LEN);
+        tstrncpy(g_Dbs.password, TSDB_DEFAULT_PASS, SHELL_MAX_PASSWORD_LEN);
    }

    cJSON* resultfile = cJSON_GetObjectItem(root, "result_file");
    if (resultfile && resultfile->type == cJSON_String && resultfile->valuestring != NULL) {
        tstrncpy(g_Dbs.resultFile, resultfile->valuestring, MAX_FILE_NAME_LEN);
    } else if (!resultfile) {
-        tstrncpy(g_Dbs.resultFile, "./insert_res.txt", MAX_FILE_NAME_LEN);
+        tstrncpy(g_Dbs.resultFile, DEFAULT_OUTPUT, MAX_FILE_NAME_LEN);
    }

    cJSON* threads = cJSON_GetObjectItem(root, "thread_count");
    if (threads && threads->type == cJSON_Number) {
        g_Dbs.threadCount = threads->valueint;
    } else if (!threads) {
-        g_Dbs.threadCount = 1;
+        g_Dbs.threadCount = DEFAULT_NTHREADS;
    } else {
        errorPrint("%s", "failed to read json, threads not found\n");
        goto PARSE_OVER;
@@ -5201,7 +5254,7 @@ static bool getMetaFromInsertJsonFile(cJSON* root) {
    if (threads2 && threads2->type == cJSON_Number) {
        g_Dbs.threadCountForCreateTbl = threads2->valueint;
    } else if (!threads2) {
-        g_Dbs.threadCountForCreateTbl = 1;
+        g_Dbs.threadCountForCreateTbl = DEFAULT_NTHREADS;
    } else {
        errorPrint("%s", "failed to read json, threads2 not found\n");
        goto PARSE_OVER;
@@ -5215,7 +5268,7 @@ static bool getMetaFromInsertJsonFile(cJSON* root) {
        }
        g_args.insert_interval = gInsertInterval->valueint;
    } else if (!gInsertInterval) {
-        g_args.insert_interval = 0;
+        g_args.insert_interval = DEFAULT_INSERT_INTERVAL;
    } else {
        errorPrint("%s", "failed to read json, insert_interval input mistake\n");
        goto PARSE_OVER;
@@ -5230,7 +5283,7 @@ static bool getMetaFromInsertJsonFile(cJSON* root) {
        }
        g_args.interlaceRows = interlaceRows->valueint;
    } else if (!interlaceRows) {
-        g_args.interlaceRows = 0; // 0 means progressive mode, > 0 mean interlace mode. max value is less or equ num_of_records_per_req
+        g_args.interlaceRows = DEFAULT_INTERLACE_ROWS; // 0 means progressive mode, > 0 mean interlace mode. max value is less or equ num_of_records_per_req
    } else {
        errorPrint("%s", "failed to read json, interlaceRows input mistake\n");
        goto PARSE_OVER;
@@ -5245,7 +5298,7 @@ static bool getMetaFromInsertJsonFile(cJSON* root) {
        }
        g_args.max_sql_len = maxSqlLen->valueint;
    } else if (!maxSqlLen) {
-        g_args.max_sql_len = (1024*1024);
+        g_args.max_sql_len = TSDB_MAX_ALLOWED_SQL_LEN;
    } else {
        errorPrint("%s() LN%d, failed to read json, max_sql_len input mistake\n",
                __func__, __LINE__);
@@ -5275,6 +5328,22 @@ static bool getMetaFromInsertJsonFile(cJSON* root) {
        goto PARSE_OVER;
    }

+    cJSON* prepareRand = cJSON_GetObjectItem(root, "prepared_rand");
+    if (prepareRand && prepareRand->type == cJSON_Number) {
+        if (prepareRand->valueint <= 0) {
+            errorPrint("%s() LN%d, failed to read json, prepared_rand input mistake\n",
+                    __func__, __LINE__);
+            goto PARSE_OVER;
+        }
+        g_args.prepared_rand = prepareRand->valueint;
+    } else if (!prepareRand) {
+        g_args.prepared_rand = DEFAULT_PREPARED_RAND;
+    } else {
+        errorPrint("%s() LN%d, failed to read json, prepared_rand not found\n",
+                __func__, __LINE__);
+        goto PARSE_OVER;
+    }
+
    cJSON *answerPrompt = cJSON_GetObjectItem(root, "confirm_parameter_prompt"); // yes, no,
    if (answerPrompt
            && answerPrompt->type == cJSON_String
@@ -5284,7 +5353,7 @@ static bool getMetaFromInsertJsonFile(cJSON* root) {
        } else if (0 == strncasecmp(answerPrompt->valuestring, "no", 2)) {
            g_args.answer_yes = true;
        } else {
-            g_args.answer_yes = false;
+            g_args.answer_yes = DEFAULT_ANS_YES;
        }
    } else if (!answerPrompt) {
        g_args.answer_yes = true;   // default is no, mean answer_yes.
@@ -5316,7 +5385,8 @@ static bool getMetaFromInsertJsonFile(cJSON* root) {
                MAX_DB_COUNT);
        goto PARSE_OVER;
    }
-
+    g_Dbs.db = calloc(1, sizeof(SDataBase)*dbSize);
+    assert(g_Dbs.db);
    g_Dbs.dbCount = dbSize;
    for (int i = 0; i < dbSize; ++i) {
        cJSON* dbinfos = cJSON_GetArrayItem(dbs, i);
@@ -5516,7 +5586,8 @@ static bool getMetaFromInsertJsonFile(cJSON* root) {
                    MAX_SUPER_TABLE_COUNT);
            goto PARSE_OVER;
        }
-
+        g_Dbs.db[i].superTbls = calloc(1, stbSize * sizeof(SSuperTable));
+        assert(g_Dbs.db[i].superTbls);
        g_Dbs.db[i].superTblCount = stbSize;
        for (int j = 0; j < stbSize; ++j) {
            cJSON* stbInfo = cJSON_GetArrayItem(stables, j);
@@ -5581,7 +5652,7 @@ static bool getMetaFromInsertJsonFile(cJSON* root) {
            if (batchCreateTbl && batchCreateTbl->type == cJSON_Number) {
                g_Dbs.db[i].superTbls[j].batchCreateTableNum = batchCreateTbl->valueint;
            } else if (!batchCreateTbl) {
-                g_Dbs.db[i].superTbls[j].batchCreateTableNum = 10;
+                g_Dbs.db[i].superTbls[j].batchCreateTableNum = DEFAULT_CREATE_BATCH;
            } else {
                errorPrint("%s", "failed to read json, batch_create_tbl_num not found\n");
                goto PARSE_OVER;
@@ -5644,6 +5715,9 @@ static bool getMetaFromInsertJsonFile(cJSON* root) {
                    g_Dbs.db[i].superTbls[j].iface= REST_IFACE;
                } else if (0 == strcasecmp(stbIface->valuestring, "stmt")) {
                    g_Dbs.db[i].superTbls[j].iface= STMT_IFACE;
+                } else if (0 == strcasecmp(stbIface->valuestring, "sml")) {
+                    g_Dbs.db[i].superTbls[j].iface= SML_IFACE;
+                    g_args.iface = SML_IFACE;
                } else {
                    errorPrint("failed to read json, insert_mode %s not recognized\n",
                            stbIface->valuestring);
@@ -5860,7 +5934,7 @@ static bool getMetaFromInsertJsonFile(cJSON* root) {
            if (disorderRange && disorderRange->type == cJSON_Number) {
                g_Dbs.db[i].superTbls[j].disorderRange = disorderRange->valueint;
            } else if (!disorderRange) {
-                g_Dbs.db[i].superTbls[j].disorderRange = 1000;
+                g_Dbs.db[i].superTbls[j].disorderRange = DEFAULT_DISORDER_RANGE;
            } else {
                errorPrint("%s", "failed to read json, disorderRange not found\n");
                goto PARSE_OVER;
@@ -5908,7 +5982,7 @@ static bool getMetaFromQueryJsonFile(cJSON* root) {
    if (host && host->type == cJSON_String && host->valuestring != NULL) {
        tstrncpy(g_queryInfo.host, host->valuestring, MAX_HOSTNAME_SIZE);
    } else if (!host) {
-        tstrncpy(g_queryInfo.host, "127.0.0.1", MAX_HOSTNAME_SIZE);
+        tstrncpy(g_queryInfo.host, DEFAULT_HOST, MAX_HOSTNAME_SIZE);
    } else {
        errorPrint("%s", "failed to read json, host not found\n");
        goto PARSE_OVER;
@@ -5918,21 +5992,21 @@ static bool getMetaFromQueryJsonFile(cJSON* root) {
    if (port && port->type == cJSON_Number) {
        g_queryInfo.port = port->valueint;
    } else if (!port) {
-        g_queryInfo.port = 6030;
+        g_queryInfo.port = DEFAULT_PORT;
    }

    cJSON* user = cJSON_GetObjectItem(root, "user");
    if (user && user->type == cJSON_String && user->valuestring != NULL) {
        tstrncpy(g_queryInfo.user, user->valuestring, MAX_USERNAME_SIZE);
    } else if (!user) {
-        tstrncpy(g_queryInfo.user, "root", MAX_USERNAME_SIZE); ;
+        tstrncpy(g_queryInfo.user, TSDB_DEFAULT_USER, MAX_USERNAME_SIZE); ;
    }

    cJSON* password = cJSON_GetObjectItem(root, "password");
    if (password && password->type == cJSON_String && password->valuestring != NULL) {
        tstrncpy(g_queryInfo.password, password->valuestring, SHELL_MAX_PASSWORD_LEN);
    } else if (!password) {
-        tstrncpy(g_queryInfo.password, "taosdata", SHELL_MAX_PASSWORD_LEN);;
+        tstrncpy(g_queryInfo.password, TSDB_DEFAULT_PASS, SHELL_MAX_PASSWORD_LEN);;
    }

    cJSON *answerPrompt = cJSON_GetObjectItem(root, "confirm_parameter_prompt"); // yes, no,
@@ -5960,7 +6034,7 @@ static bool getMetaFromQueryJsonFile(cJSON* root) {
        }
        g_args.query_times = gQueryTimes->valueint;
    } else if (!gQueryTimes) {
-        g_args.query_times = 1;
+        g_args.query_times = DEFAULT_QUERY_TIME;
    } else {
        errorPrint("%s", "failed to read json, query_times input mistake\n");
        goto PARSE_OVER;
@@ -6058,7 +6132,7 @@ static bool getMetaFromQueryJsonFile(cJSON* root) {
        } else if (!interval) {
            //printf("failed to read json, subscribe interval no found\n");
            //goto PARSE_OVER;
-            g_queryInfo.specifiedQueryInfo.subscribeInterval = 10000;
+            g_queryInfo.specifiedQueryInfo.subscribeInterval = DEFAULT_SUB_INTERVAL;
        }

        cJSON* restart = cJSON_GetObjectItem(specifiedQuery, "restart");
@@ -6205,7 +6279,7 @@ static bool getMetaFromQueryJsonFile(cJSON* root) {
            }
            g_queryInfo.superQueryInfo.threadCnt = threads->valueint;
        } else if (!threads) {
-            g_queryInfo.superQueryInfo.threadCnt = 1;
+            g_queryInfo.superQueryInfo.threadCnt = DEFAULT_NTHREADS;
        }

        //cJSON* subTblCnt = cJSON_GetObjectItem(superQuery, "childtable_count");
@@ -6250,7 +6324,7 @@ static bool getMetaFromQueryJsonFile(cJSON* root) {
        } else if (!superInterval) {
            //printf("failed to read json, subscribe interval no found\n");
            //goto PARSE_OVER;
-            g_queryInfo.superQueryInfo.subscribeInterval = 10000;
+            g_queryInfo.superQueryInfo.subscribeInterval = DEFAULT_QUERY_INTERVAL;
        }

        cJSON* subrestart = cJSON_GetObjectItem(superQuery, "restart");
@@ -6370,7 +6444,7 @@ static bool getInfoFromJsonFile(char* file) {
    }

    bool  ret = false;
-    int   maxLen = 6400000;
+    int   maxLen = MAX_JSON_BUFF;
    char *content = calloc(1, maxLen + 1);
    int   len = fread(content, 1, maxLen, fp);
    if (len <= 0) {
@@ -6407,9 +6481,12 @@ static bool getInfoFromJsonFile(char* file) {
    }

    if (INSERT_TEST == g_args.test_mode) {
+        memset(&g_Dbs, 0, sizeof(SDbs));
+        g_Dbs.use_metric = g_args.use_metric;
        ret = getMetaFromInsertJsonFile(root);
    } else if ((QUERY_TEST == g_args.test_mode)
            || (SUBSCRIBE_TEST == g_args.test_mode)) {
+        memset(&g_queryInfo, 0, sizeof(SQueryMetaInfo));
        ret = getMetaFromQueryJsonFile(root);
    } else {
        errorPrint("%s",
@@ -6474,8 +6551,9 @@ static void postFreeResource() {
                g_Dbs.db[i].superTbls[j].childTblName = NULL;
            }
        }
+        tmfree(g_Dbs.db[i].superTbls);
    }
-
+    tmfree(g_Dbs.db);
    tmfree(g_randbool_buff);
    tmfree(g_randint_buff);
    tmfree(g_rand_voltage_buff);
@@ -6498,6 +6576,7 @@ static void postFreeResource() {
        }
    }
    tmfree(g_sampleBindBatchArray);
+
 #endif
 }

@@ -6979,7 +7058,7 @@ static int32_t execInsert(threadInfo *pThreadInfo, uint32_t k)
 {
    int32_t affectedRows;
    SSuperTable* stbInfo = pThreadInfo->stbInfo;
-
+    int32_t code;
    uint16_t iface;
    if (stbInfo)
        iface = stbInfo->iface;
@@ -7009,7 +7088,7 @@ static int32_t execInsert(threadInfo *pThreadInfo, uint32_t k)
            verbosePrint("[%d] %s() LN%d %s\n", pThreadInfo->threadID,
                    __func__, __LINE__, pThreadInfo->buffer);

-            if (0 != postProceSql(g_Dbs.host, &g_Dbs.serv_addr, g_Dbs.port,
+            if (0 != postProceSql(g_Dbs.host, g_Dbs.port,
                        pThreadInfo->buffer, pThreadInfo)) {
                affectedRows = -1;
                printf("========restful return fail, threadID[%d]\n",
@@ -7031,7 +7110,19 @@ static int32_t execInsert(threadInfo *pThreadInfo, uint32_t k)
            }
            affectedRows = k;
            break;
-
+        case SML_IFACE:
+            code = taos_schemaless_insert(pThreadInfo->taos, pThreadInfo->lines, k, 0, pThreadInfo->time_precision == TSDB_TIME_PRECISION_MILLI
+                    ? "ms"
+                    : (pThreadInfo->time_precision == TSDB_TIME_PRECISION_MICRO
+                           ? "us"
+                           : "ns"));
+            if (code) {
+                errorPrint2("%s() LN%d, failed to execute schemaless insert. reason: %s\n",
+                __func__, __LINE__, tstrerror(code));
+                exit(EXIT_FAILURE);
+            }
+            affectedRows = k;
+            break;
        default:
            errorPrint2("%s() LN%d: unknown insert mode: %d\n",
                    __func__, __LINE__, stbInfo->iface);
@@ -9517,11 +9608,222 @@ free_of_interlace_stmt:

 #endif

-// sync write interlace data
-static void* syncWriteInterlace(threadInfo *pThreadInfo, uint32_t interlaceRows) {
-    debugPrint("[%d] %s() LN%d: ### interlace write\n",
-            pThreadInfo->threadID, __func__, __LINE__);
+static void generateSmlHead(char* smlHead, SSuperTable* stbInfo, threadInfo* pThreadInfo, int tbSeq) {
+    int64_t dataLen = 0;
+    dataLen += snprintf(smlHead + dataLen, HEAD_BUFF_LEN - dataLen,
+                            "%s,id=%s%" PRIu64 "", stbInfo->stbName,
+                            stbInfo->childTblPrefix,
+                            tbSeq + pThreadInfo->start_table_from);
+        for (int j = 0; j < stbInfo->tagCount; j++) {
+            tstrncpy(smlHead + dataLen, ",", 2);
+            dataLen += 1;
+            switch (stbInfo->tags[j].data_type) {
+                case TSDB_DATA_TYPE_TIMESTAMP:
+                    errorPrint2(
+                        "%s() LN%d, Does not support data type %s as tag\n",
+                        __func__, __LINE__, stbInfo->tags[j].dataType);
+                    exit(EXIT_FAILURE);
+                case TSDB_DATA_TYPE_BOOL:
+                    dataLen +=
+                        snprintf(smlHead + dataLen, HEAD_BUFF_LEN - dataLen,
+                                 "T%d=%s", j, rand_bool_str());
+                    break;
+                case TSDB_DATA_TYPE_TINYINT:
+                    dataLen +=
+                        snprintf(smlHead + dataLen, HEAD_BUFF_LEN - dataLen,
+                                 "T%d=%si8", j, rand_tinyint_str());
+                    break;
+                case TSDB_DATA_TYPE_UTINYINT:
+                    dataLen +=
+                        snprintf(smlHead + dataLen, HEAD_BUFF_LEN - dataLen,
+                                 "T%d=%su8", j, rand_utinyint_str());
+                    break;
+                case TSDB_DATA_TYPE_SMALLINT:
+                    dataLen +=
+                        snprintf(smlHead + dataLen, HEAD_BUFF_LEN - dataLen,
+                                 "T%d=%si16", j, rand_smallint_str());
+                    break;
+                case TSDB_DATA_TYPE_USMALLINT:
+                    dataLen +=
+                        snprintf(smlHead + dataLen, HEAD_BUFF_LEN - dataLen,
+                                 "T%d=%su16", j, rand_usmallint_str());
+                    break;
+                case TSDB_DATA_TYPE_INT:
+                    dataLen +=
+                        snprintf(smlHead + dataLen, HEAD_BUFF_LEN - dataLen,
+                                 "T%d=%si32", j, rand_int_str());
+                    break;
+                case TSDB_DATA_TYPE_UINT:
+                    dataLen +=
+                        snprintf(smlHead + dataLen, HEAD_BUFF_LEN - dataLen,
+                                 "T%d=%su32", j, rand_uint_str());
+                    break;
+                case TSDB_DATA_TYPE_BIGINT:
+                    dataLen +=
+                        snprintf(smlHead + dataLen, HEAD_BUFF_LEN - dataLen,
+                                 "T%d=%si64", j, rand_bigint_str());
+                    break;
+                case TSDB_DATA_TYPE_UBIGINT:
+                    dataLen +=
+                        snprintf(smlHead + dataLen, HEAD_BUFF_LEN - dataLen,
+                                 "T%d=%su64", j, rand_ubigint_str());
+                    break;
+                case TSDB_DATA_TYPE_FLOAT:
+                    dataLen +=
+                        snprintf(smlHead + dataLen, HEAD_BUFF_LEN - dataLen,
+                                 "T%d=%sf32", j, rand_float_str());
+                    break;
+                case TSDB_DATA_TYPE_DOUBLE:
+                    dataLen +=
+                        snprintf(smlHead + dataLen, HEAD_BUFF_LEN - dataLen,
+                                 "T%d=%sf64", j, rand_double_str());
+                    break;
+                case TSDB_DATA_TYPE_BINARY:
+                case TSDB_DATA_TYPE_NCHAR:
+                    if (stbInfo->tags[j].dataLen > TSDB_MAX_BINARY_LEN) {
+                        errorPrint2(
+                            "binary or nchar length overflow, maxsize:%u\n",
+                            (uint32_t)TSDB_MAX_BINARY_LEN);
+                        exit(EXIT_FAILURE);
+                    }
+                    char *buf = (char *)calloc(stbInfo->tags[j].dataLen + 1, 1);
+                    if (NULL == buf) {
+                        errorPrint2("calloc failed! size:%d\n",
+                                    stbInfo->tags[j].dataLen);
+                        exit(EXIT_FAILURE);
+                    }
+                    rand_string(buf, stbInfo->tags[j].dataLen);
+                    if (stbInfo->tags[j].data_type == TSDB_DATA_TYPE_BINARY) {
+                        dataLen += snprintf(smlHead + dataLen,
+                                            HEAD_BUFF_LEN - dataLen,
+                                            "T%d=\"%s\"", j, buf);
+                    } else {
+                        dataLen += snprintf(smlHead + dataLen,
+                                            HEAD_BUFF_LEN - dataLen,
+                                            "T%d=L\"%s\"", j, buf);
+                    }
+                    tmfree(buf);
+                    break;
+
+                default:
+                    errorPrint2("%s() LN%d, Unknown data type %s\n", __func__,
+                                __LINE__, stbInfo->tags[j].dataType);
+                    exit(EXIT_FAILURE);
+            }
+        }
+}
+
+static void generateSmlTail(char* line, char* smlHead, SSuperTable* stbInfo, 
+                                threadInfo* pThreadInfo, int64_t timestamp) {
+    int dataLen = 0;
+    dataLen = snprintf(line, BUFFER_SIZE, "%s ", smlHead);
+    for (uint32_t c = 0; c < stbInfo->columnCount; c++) {
+        if (c != 0) {
+            tstrncpy(line + dataLen, ",", 2);
+            dataLen += 1;
+        }
+        switch (stbInfo->columns[c].data_type) {
+            case TSDB_DATA_TYPE_TIMESTAMP:
+                errorPrint2(
+                    "%s() LN%d, Does not support data type %s as tag\n",
+                    __func__, __LINE__, stbInfo->columns[c].dataType);
+                exit(EXIT_FAILURE);
+            case TSDB_DATA_TYPE_BOOL:
+                dataLen += snprintf(line + dataLen,
+                                    BUFFER_SIZE - dataLen, "c%d=%s",
+                                    c, rand_bool_str());
+                break;
+            case TSDB_DATA_TYPE_TINYINT:
+                dataLen += snprintf(line + dataLen,
+                                    BUFFER_SIZE - dataLen, "c%d=%si8",
+                                    c, rand_tinyint_str());
+                break;
+            case TSDB_DATA_TYPE_UTINYINT:
+                dataLen += snprintf(line + dataLen,
+                                    BUFFER_SIZE - dataLen, "c%d=%su8",
+                                    c, rand_utinyint_str());
+                break;
+            case TSDB_DATA_TYPE_SMALLINT:
+                dataLen += snprintf(
+                    line + dataLen, BUFFER_SIZE - dataLen,
+                    "c%d=%si16", c, rand_smallint_str());
+                break;
+            case TSDB_DATA_TYPE_USMALLINT:
+                dataLen += snprintf(
+                    line + dataLen, BUFFER_SIZE - dataLen,
+                    "c%d=%su16", c, rand_usmallint_str());
+                break;
+            case TSDB_DATA_TYPE_INT:
+                dataLen += snprintf(line + dataLen,
+                                    BUFFER_SIZE - dataLen,
+                                    "c%d=%si32", c, rand_int_str());
+                break;
+            case TSDB_DATA_TYPE_UINT:
+                dataLen += snprintf(line + dataLen,
+                                    BUFFER_SIZE - dataLen,
+                                    "c%d=%su32", c, rand_uint_str());
+                break;
+            case TSDB_DATA_TYPE_BIGINT:
+                dataLen += snprintf(line + dataLen,
+                                    BUFFER_SIZE - dataLen,
+                                    "c%d=%si64", c, rand_bigint_str());
+                break;
+            case TSDB_DATA_TYPE_UBIGINT:
+                dataLen += snprintf(line + dataLen,
+                                    BUFFER_SIZE - dataLen,
+                                    "c%d=%su64", c, rand_ubigint_str());
+                break;
+            case TSDB_DATA_TYPE_FLOAT:
+                dataLen += snprintf(line + dataLen,
+                                    BUFFER_SIZE - dataLen,
+                                    "c%d=%sf32", c, rand_float_str());
+                break;
+            case TSDB_DATA_TYPE_DOUBLE:
+                dataLen += snprintf(line + dataLen,
+                                    BUFFER_SIZE - dataLen,
+                                    "c%d=%sf64", c, rand_double_str());
+                break;
+            case TSDB_DATA_TYPE_BINARY:
+            case TSDB_DATA_TYPE_NCHAR:
+                if (stbInfo->columns[c].dataLen > TSDB_MAX_BINARY_LEN) {
+                    errorPrint2(
+                        "binary or nchar length overflow, maxsize:%u\n",
+                        (uint32_t)TSDB_MAX_BINARY_LEN);
+                    exit(EXIT_FAILURE);
+                }
+                char *buf =
+                    (char *)calloc(stbInfo->columns[c].dataLen + 1, 1);
+                if (NULL == buf) {
+                    errorPrint2("calloc failed! size:%d\n",
+                                stbInfo->columns[c].dataLen);
+                    exit(EXIT_FAILURE);
+                }
+                rand_string(buf, stbInfo->columns[c].dataLen);
+                if (stbInfo->columns[c].data_type ==
+                    TSDB_DATA_TYPE_BINARY) {
+                    dataLen += snprintf(line + dataLen,
+                                        BUFFER_SIZE - dataLen,
+                                        "c%d=\"%s\"", c, buf);
+                } else {
+                    dataLen += snprintf(line + dataLen,
+                                        BUFFER_SIZE - dataLen,
+                                        "c%d=L\"%s\"", c, buf);
+                }
+                tmfree(buf);
+                break;
+            default:
+                errorPrint2("%s() LN%d, Unknown data type %s\n",
+                            __func__, __LINE__,
+                            stbInfo->columns[c].dataType);
+                exit(EXIT_FAILURE);
+        }
+    }
+    dataLen += snprintf(line + dataLen, BUFFER_SIZE - dataLen," %" PRId64 "", timestamp);
+}

+static void* syncWriteInterlaceSml(threadInfo *pThreadInfo, uint32_t interlaceRows) {
+    debugPrint("[%d] %s() LN%d: ### interlace schemaless write\n",
+            pThreadInfo->threadID, __func__, __LINE__);
    int64_t insertRows;
    uint64_t maxSqlLen;
    int64_t timeStampStep;
@@ -9545,7 +9847,7 @@ static void* syncWriteInterlace(threadInfo *pThreadInfo, uint32_t interlaceRows)
            pThreadInfo->threadID, __func__, __LINE__,
            pThreadInfo->start_table_from,
            pThreadInfo->ntables, insertRows);
-#if 1
+
    if (interlaceRows > g_args.reqPerReq)
        interlaceRows = g_args.reqPerReq;

@@ -9558,27 +9860,16 @@ static void* syncWriteInterlace(threadInfo *pThreadInfo, uint32_t interlaceRows)
    } else {
        batchPerTblTimes = 1;
    }
-#else
-    uint32_t batchPerTbl;
-    if (interlaceRows > g_args.reqPerReq)
-        batchPerTbl = g_args.reqPerReq;
-    else
-        batchPerTbl = interlaceRows;
-
-    uint32_t batchPerTblTimes;
    
-    if ((interlaceRows > 0) && (pThreadInfo->ntables > 1)) {
-        batchPerTblTimes =
-            interlaceRows / batchPerTbl;
-    } else {
-        batchPerTblTimes = 1;
+    char *smlHead[pThreadInfo->ntables];
+    for (int t = 0; t < pThreadInfo->ntables; t++) {
+        smlHead[t] = (char *)calloc(HEAD_BUFF_LEN, 1);
+        if (NULL == smlHead[t]) {
+            errorPrint2("calloc failed! size:%d\n", HEAD_BUFF_LEN);
+            exit(EXIT_FAILURE);
        }
-#endif
-    pThreadInfo->buffer = calloc(maxSqlLen, 1);
-    if (NULL == pThreadInfo->buffer) {
-        errorPrint2( "%s() LN%d, Failed to alloc %"PRIu64" Bytes, reason:%s\n",
-                __func__, __LINE__, maxSqlLen, strerror(errno));
-        return NULL;
+        generateSmlHead(smlHead[t], stbInfo, pThreadInfo, t);
+        
    }

    pThreadInfo->totalInsertRows = 0;
@@ -9601,6 +9892,14 @@ static void* syncWriteInterlace(threadInfo *pThreadInfo, uint32_t interlaceRows)
    int percentComplete = 0;
    int64_t totalRows = insertRows * pThreadInfo->ntables;

+    pThreadInfo->lines = calloc(g_args.reqPerReq, sizeof(char *));
+    if (NULL == pThreadInfo->lines) {
+        errorPrint2("Failed to alloc %"PRIu64" bytes, reason:%s\n",
+                g_args.reqPerReq * sizeof(char *),
+                strerror(errno));
+        return NULL;
+    }
+    
    while(pThreadInfo->totalInsertRows < pThreadInfo->ntables * insertRows) {
        if ((flagSleep) && (insert_interval)) {
            st = taosGetTimestampMs();
@@ -9608,51 +9907,278 @@ static void* syncWriteInterlace(threadInfo *pThreadInfo, uint32_t interlaceRows)
        }

        // generate data
-        memset(pThreadInfo->buffer, 0, maxSqlLen);
-        uint64_t remainderBufLen = maxSqlLen;
-
-        char *pstr = pThreadInfo->buffer;
-
-        int len = snprintf(pstr,
-                strlen(STR_INSERT_INTO) + 1, "%s", STR_INSERT_INTO);
-        pstr += len;
-        remainderBufLen -= len;

        uint32_t recOfBatch = 0;

-        int32_t generated;
-        for (uint64_t i = 0; i < batchPerTblTimes; i ++) {
-            char tableName[TSDB_TABLE_NAME_LEN];
-
-            getTableName(tableName, pThreadInfo, tableSeq);
-            if (0 == strlen(tableName)) {
-                errorPrint2("[%d] %s() LN%d, getTableName return null\n",
-                        pThreadInfo->threadID, __func__, __LINE__);
-                free(pThreadInfo->buffer);
-                return NULL;
+        for (uint64_t i = 0; i < batchPerTblTimes; i++) {
+            int64_t timestamp = startTime;
+            for (int j = recOfBatch; j < recOfBatch + batchPerTbl; j++) {
+                pThreadInfo->lines[j] = calloc(BUFFER_SIZE, 1);
+                if (NULL == pThreadInfo->lines[j]) {
+                    errorPrint2("Failed to alloc %d bytes, reason:%s\n",
+                        BUFFER_SIZE, strerror(errno));
+                }
+                generateSmlTail(pThreadInfo->lines[j], smlHead[i], stbInfo, pThreadInfo, timestamp);
+                timestamp += timeStampStep;
            }
+            tableSeq ++;
+            recOfBatch += batchPerTbl;

-            uint64_t oldRemainderLen = remainderBufLen;
+            pThreadInfo->totalInsertRows += batchPerTbl;

-            if (stbInfo) {
-                generated = generateStbInterlaceData(
-                        pThreadInfo,
-                        tableName, batchPerTbl, i,
-                        batchPerTblTimes,
-                        tableSeq,
-                        pstr,
-                        insertRows,
-                        startTime,
-                        &remainderBufLen);
-            } else {
-                generated = generateInterlaceDataWithoutStb(
-                        tableName, batchPerTbl,
-                        tableSeq,
-                        pThreadInfo->db_name, pstr,
-                        insertRows,
-                        startTime,
-                        &remainderBufLen);
-            }
+            verbosePrint("[%d] %s() LN%d batchPerTbl=%d recOfBatch=%d\n",
+                    pThreadInfo->threadID, __func__, __LINE__,
+                    batchPerTbl, recOfBatch);
+
+            if (tableSeq == pThreadInfo->start_table_from + pThreadInfo->ntables) {
+                // turn to first table
+                tableSeq = pThreadInfo->start_table_from;
+                generatedRecPerTbl += batchPerTbl;
+
+                startTime = pThreadInfo->start_time
+                    + generatedRecPerTbl * timeStampStep;
+
+                flagSleep = true;
+                if (generatedRecPerTbl >= insertRows)
+                    break;
+
+                int64_t remainRows = insertRows - generatedRecPerTbl;
+                if ((remainRows > 0) && (batchPerTbl > remainRows))
+                    batchPerTbl = remainRows;
+
+                if (pThreadInfo->ntables * batchPerTbl < g_args.reqPerReq)
+                    break;
+            }
+
+            verbosePrint("[%d] %s() LN%d generatedRecPerTbl=%"PRId64" insertRows=%"PRId64"\n",
+                    pThreadInfo->threadID, __func__, __LINE__,
+                    generatedRecPerTbl, insertRows);
+
+            if ((g_args.reqPerReq - recOfBatch) < batchPerTbl)
+                break;
+        }
+
+        verbosePrint("[%d] %s() LN%d recOfBatch=%d totalInsertRows=%"PRIu64"\n",
+                pThreadInfo->threadID, __func__, __LINE__, recOfBatch,
+                pThreadInfo->totalInsertRows);
+        verbosePrint("[%d] %s() LN%d, buffer=%s\n",
+                pThreadInfo->threadID, __func__, __LINE__, pThreadInfo->buffer);
+
+        startTs = taosGetTimestampUs();
+
+        if (recOfBatch == 0) {
+            errorPrint2("[%d] %s() LN%d Failed to insert records of batch %d\n",
+                    pThreadInfo->threadID, __func__, __LINE__,
+                    batchPerTbl);
+            if (batchPerTbl > 0) {
+                errorPrint("\tIf the batch is %d, the length of the SQL to insert a row must be less then %"PRId64"\n",
+                        batchPerTbl, maxSqlLen / batchPerTbl);
+            }
+            errorPrint("\tPlease check if the buffer length(%"PRId64") or batch(%d) is set with proper value!\n",
+                    maxSqlLen, batchPerTbl);
+            goto free_of_interlace;
+        }
+        int64_t affectedRows = execInsert(pThreadInfo, recOfBatch);
+
+        endTs = taosGetTimestampUs();
+        uint64_t delay = endTs - startTs;
+        performancePrint("%s() LN%d, insert execution time is %10.2f ms\n",
+                __func__, __LINE__, delay / 1000.0);
+        verbosePrint("[%d] %s() LN%d affectedRows=%"PRId64"\n",
+                pThreadInfo->threadID,
+                __func__, __LINE__, affectedRows);
+
+        if (delay > pThreadInfo->maxDelay) pThreadInfo->maxDelay = delay;
+        if (delay < pThreadInfo->minDelay) pThreadInfo->minDelay = delay;
+        pThreadInfo->cntDelay++;
+        pThreadInfo->totalDelay += delay;
+
+        if (recOfBatch != affectedRows) {
+            errorPrint2("[%d] %s() LN%d execInsert insert %d, affected rows: %"PRId64"\n%s\n",
+                    pThreadInfo->threadID, __func__, __LINE__,
+                    recOfBatch, affectedRows, pThreadInfo->buffer);
+            goto free_of_interlace;
+        }
+
+        pThreadInfo->totalAffectedRows += affectedRows;
+
+        int currentPercent = pThreadInfo->totalAffectedRows * 100 / totalRows;
+        if (currentPercent > percentComplete ) {
+            printf("[%d]:%d%%\n", pThreadInfo->threadID, currentPercent);
+            percentComplete = currentPercent;
+        }
+        int64_t  currentPrintTime = taosGetTimestampMs();
+        if (currentPrintTime - lastPrintTime > 30*1000) {
+            printf("thread[%d] has currently inserted rows: %"PRIu64 ", affected rows: %"PRIu64 "\n",
+                    pThreadInfo->threadID,
+                    pThreadInfo->totalInsertRows,
+                    pThreadInfo->totalAffectedRows);
+            lastPrintTime = currentPrintTime;
+        }
+
+        if ((insert_interval) && flagSleep) {
+            et = taosGetTimestampMs();
+
+            if (insert_interval > (et - st) ) {
+                uint64_t sleepTime = insert_interval - (et -st);
+                performancePrint("%s() LN%d sleep: %"PRId64" ms for insert interval\n",
+                        __func__, __LINE__, sleepTime);
+                taosMsleep(sleepTime); // ms
+                sleepTimeTotal += insert_interval;
+            }
+        }
+        for (int index = 0; index < g_args.reqPerReq; index++) {
+            free(pThreadInfo->lines[index]);
+        }
+    }
+    if (percentComplete < 100)
+        printf("[%d]:%d%%\n", pThreadInfo->threadID, percentComplete);
+
+free_of_interlace:
+    tmfree(pThreadInfo->lines);
+    for (int index = 0; index < pThreadInfo->ntables; index++) {
+        free(smlHead[index]);
+    }
+    printStatPerThread(pThreadInfo);
+    return NULL;
+}
+
+// sync write interlace data
+static void* syncWriteInterlace(threadInfo *pThreadInfo, uint32_t interlaceRows) {
+    debugPrint("[%d] %s() LN%d: ### interlace write\n",
+            pThreadInfo->threadID, __func__, __LINE__);
+
+    int64_t insertRows;
+    uint64_t maxSqlLen;
+    int64_t timeStampStep;
+    uint64_t insert_interval;
+
+    SSuperTable* stbInfo = pThreadInfo->stbInfo;
+
+    if (stbInfo) {
+        insertRows = stbInfo->insertRows;
+        maxSqlLen = stbInfo->maxSqlLen;
+        timeStampStep = stbInfo->timeStampStep;
+        insert_interval = stbInfo->insertInterval;
+    } else {
+        insertRows = g_args.insertRows;
+        maxSqlLen = g_args.max_sql_len;
+        timeStampStep = g_args.timestamp_step;
+        insert_interval = g_args.insert_interval;
+    }
+
+    debugPrint("[%d] %s() LN%d: start_table_from=%"PRIu64" ntables=%"PRId64" insertRows=%"PRIu64"\n",
+            pThreadInfo->threadID, __func__, __LINE__,
+            pThreadInfo->start_table_from,
+            pThreadInfo->ntables, insertRows);
+#if 1
+    if (interlaceRows > g_args.reqPerReq)
+        interlaceRows = g_args.reqPerReq;
+
+    uint32_t batchPerTbl = interlaceRows;
+    uint32_t batchPerTblTimes;
+
+    if ((interlaceRows > 0) && (pThreadInfo->ntables > 1)) {
+        batchPerTblTimes =
+            g_args.reqPerReq / interlaceRows;
+    } else {
+        batchPerTblTimes = 1;
+    }
+#else
+    uint32_t batchPerTbl;
+    if (interlaceRows > g_args.reqPerReq)
+        batchPerTbl = g_args.reqPerReq;
+    else
+        batchPerTbl = interlaceRows;
+
+    uint32_t batchPerTblTimes;
+
+    if ((interlaceRows > 0) && (pThreadInfo->ntables > 1)) {
+        batchPerTblTimes =
+            interlaceRows / batchPerTbl;
+    } else {
+        batchPerTblTimes = 1;
+    }
+#endif
+    pThreadInfo->buffer = calloc(maxSqlLen, 1);
+    if (NULL == pThreadInfo->buffer) {
+        errorPrint2( "%s() LN%d, Failed to alloc %"PRIu64" Bytes, reason:%s\n",
+                __func__, __LINE__, maxSqlLen, strerror(errno));
+        return NULL;
+    }
+
+    pThreadInfo->totalInsertRows = 0;
+    pThreadInfo->totalAffectedRows = 0;
+
+    uint64_t st = 0;
+    uint64_t et = UINT64_MAX;
+
+    uint64_t lastPrintTime = taosGetTimestampMs();
+    uint64_t startTs = taosGetTimestampMs();
+    uint64_t endTs;
+
+    uint64_t tableSeq = pThreadInfo->start_table_from;
+    int64_t startTime = pThreadInfo->start_time;
+
+    uint64_t generatedRecPerTbl = 0;
+    bool flagSleep = true;
+    uint64_t sleepTimeTotal = 0;
+
+    int percentComplete = 0;
+    int64_t totalRows = insertRows * pThreadInfo->ntables;
+
+    while(pThreadInfo->totalInsertRows < pThreadInfo->ntables * insertRows) {
+        if ((flagSleep) && (insert_interval)) {
+            st = taosGetTimestampMs();
+            flagSleep = false;
+        }
+
+        // generate data
+        memset(pThreadInfo->buffer, 0, maxSqlLen);
+        uint64_t remainderBufLen = maxSqlLen;
+
+        char *pstr = pThreadInfo->buffer;
+
+        int len = snprintf(pstr,
+                strlen(STR_INSERT_INTO) + 1, "%s", STR_INSERT_INTO);
+        pstr += len;
+        remainderBufLen -= len;
+
+        uint32_t recOfBatch = 0;
+
+        int32_t generated;
+        for (uint64_t i = 0; i < batchPerTblTimes; i ++) {
+            char tableName[TSDB_TABLE_NAME_LEN];
+
+            getTableName(tableName, pThreadInfo, tableSeq);
+            if (0 == strlen(tableName)) {
+                errorPrint2("[%d] %s() LN%d, getTableName return null\n",
+                        pThreadInfo->threadID, __func__, __LINE__);
+                free(pThreadInfo->buffer);
+                return NULL;
+            }
+
+            uint64_t oldRemainderLen = remainderBufLen;
+
+            if (stbInfo) {
+                generated = generateStbInterlaceData(
+                        pThreadInfo,
+                        tableName, batchPerTbl, i,
+                        batchPerTblTimes,
+                        tableSeq,
+                        pstr,
+                        insertRows,
+                        startTime,
+                        &remainderBufLen);
+            } else {
+                generated = generateInterlaceDataWithoutStb(
+                        tableName, batchPerTbl,
+                        tableSeq,
+                        pThreadInfo->db_name, pstr,
+                        insertRows,
+                        startTime,
+                        &remainderBufLen);
+            }

            debugPrint("[%d] %s() LN%d, generated records is %d\n",
                    pThreadInfo->threadID, __func__, __LINE__, generated);
@@ -9919,6 +10445,120 @@ free_of_stmt_progressive:
    printStatPerThread(pThreadInfo);
    return NULL;
 }
+
+static void* syncWriteProgressiveSml(threadInfo *pThreadInfo) {
+    debugPrint("%s() LN%d: ### sml progressive write\n", __func__, __LINE__);
+
+    SSuperTable* stbInfo = pThreadInfo->stbInfo;
+    int64_t timeStampStep =
+        stbInfo?stbInfo->timeStampStep:g_args.timestamp_step;
+    int64_t insertRows =
+        (stbInfo)?stbInfo->insertRows:g_args.insertRows;
+    verbosePrint("%s() LN%d insertRows=%"PRId64"\n",
+            __func__, __LINE__, insertRows);
+
+    uint64_t lastPrintTime = taosGetTimestampMs();
+
+    pThreadInfo->totalInsertRows = 0;
+    pThreadInfo->totalAffectedRows = 0;
+
+    pThreadInfo->samplePos = 0;
+
+    char *smlHead[pThreadInfo->ntables];
+    for (int t = 0; t < pThreadInfo->ntables; t++) {
+        smlHead[t] = (char *)calloc(HEAD_BUFF_LEN, 1);
+        if (NULL == smlHead[t]) {
+            errorPrint2("calloc failed! size:%d\n", HEAD_BUFF_LEN);
+            exit(EXIT_FAILURE);
+        }
+        generateSmlHead(smlHead[t], stbInfo, pThreadInfo, t);
+        
+    }
+    int currentPercent = 0;
+    int percentComplete = 0;
+
+    if (insertRows < g_args.reqPerReq) {
+        g_args.reqPerReq = insertRows;
+    }
+    pThreadInfo->lines = calloc(g_args.reqPerReq, sizeof(char *));
+    if (NULL == pThreadInfo->lines) {
+        errorPrint2("Failed to alloc %"PRIu64" bytes, reason:%s\n",
+                g_args.reqPerReq * sizeof(char *),
+                strerror(errno));
+        return NULL;
+    }
+    
+    for (uint64_t i = 0; i < pThreadInfo->ntables; i++) {
+        int64_t timestamp = pThreadInfo->start_time;
+        
+        for (uint64_t j = 0; j < insertRows;) {
+            for (int k = 0; k < g_args.reqPerReq; k++) {
+                pThreadInfo->lines[k] = calloc(BUFFER_SIZE, 1);
+                if (NULL == pThreadInfo->lines[k]) {
+                    errorPrint2("Failed to alloc %d bytes, reason:%s\n",
+                        BUFFER_SIZE, strerror(errno));
+                }
+                generateSmlTail(pThreadInfo->lines[k], smlHead[i], stbInfo, pThreadInfo, timestamp);
+                timestamp += timeStampStep;
+                j++;
+                if (j == insertRows) {
+                    break;
+                }
+            }
+            uint64_t startTs = taosGetTimestampUs();
+            int32_t affectedRows = execInsert(pThreadInfo, g_args.reqPerReq);
+            uint64_t endTs = taosGetTimestampUs();
+            uint64_t delay = endTs - startTs;
+
+            performancePrint("%s() LN%d, insert execution time is %10.f ms\n",
+                    __func__, __LINE__, delay/1000.0);
+            verbosePrint("[%d] %s() LN%d affectedRows=%d\n",
+                    pThreadInfo->threadID,
+                    __func__, __LINE__, affectedRows);
+
+            if (delay > pThreadInfo->maxDelay){
+                pThreadInfo->maxDelay = delay;
+            }
+            if (delay < pThreadInfo->minDelay){
+                pThreadInfo->minDelay = delay;
+            }
+            pThreadInfo->cntDelay++;
+            pThreadInfo->totalDelay += delay;
+
+            pThreadInfo->totalAffectedRows += affectedRows;
+            pThreadInfo->totalInsertRows += g_args.reqPerReq;
+            currentPercent =
+                    pThreadInfo->totalAffectedRows * g_Dbs.threadCount / insertRows;
+            if (currentPercent > percentComplete) {
+                    printf("[%d]:%d%%\n", pThreadInfo->threadID,
+                           currentPercent);
+                    percentComplete = currentPercent;
+            }
+
+            int64_t  currentPrintTime = taosGetTimestampMs();
+            if (currentPrintTime - lastPrintTime > 30*1000) {
+                printf("thread[%d] has currently inserted rows: %"PRId64 ", affected rows: %"PRId64 "\n",
+                        pThreadInfo->threadID,
+                        pThreadInfo->totalInsertRows,
+                        pThreadInfo->totalAffectedRows);
+                lastPrintTime = currentPrintTime;
+            }
+
+            for (int index = 0; index < g_args.reqPerReq; index++) {
+                    free(pThreadInfo->lines[index]);
+            }
+            if (j == insertRows) {
+                break;
+            }
+        }
+    }
+    tmfree(pThreadInfo->lines);
+    for (int index = 0; index < pThreadInfo->ntables; index++) {
+        free(smlHead[index]);
+    }
+    return NULL;
+}
+
 // sync insertion progressive data
 static void* syncWriteProgressive(threadInfo *pThreadInfo) {
    debugPrint("%s() LN%d: ### progressive write\n", __func__, __LINE__);
@@ -10124,6 +10764,8 @@ static void* syncWrite(void *sarg) {
 #else
                return syncWriteInterlaceStmt(pThreadInfo, interlaceRows);
 #endif
+            } else if (SML_IFACE == stbInfo->iface) {
+                return syncWriteInterlaceSml(pThreadInfo, interlaceRows);
            } else {
                return syncWriteInterlace(pThreadInfo, interlaceRows);
            }
@@ -10133,6 +10775,9 @@ static void* syncWrite(void *sarg) {
      if (((stbInfo) && (STMT_IFACE == stbInfo->iface))
              || (STMT_IFACE == g_args.iface)) {
          return syncWriteProgressiveStmt(pThreadInfo);
+      } else if (((stbInfo) && (SML_IFACE == stbInfo->iface))
+              || (SML_IFACE == g_args.iface)) {
+          return syncWriteProgressiveSml(pThreadInfo);
      } else {
          return syncWriteProgressive(pThreadInfo);
      }
@@ -10290,7 +10935,7 @@ static void startMultiThreadInsertData(int threads, char* db_name,

    // read sample data from file first
    int ret;
-    if (stbInfo) {
+    if (stbInfo && stbInfo->iface != SML_IFACE) {
        ret = prepareSampleForStb(stbInfo);
    } else {
        ret = prepareSampleForNtb();
@@ -10315,6 +10960,7 @@ static void startMultiThreadInsertData(int threads, char* db_name,
    uint64_t tableFrom;
    
    if (stbInfo) {
+        if (stbInfo->iface != SML_IFACE) {
            int64_t limit;
            uint64_t offset;

@@ -10377,8 +11023,11 @@ static void startMultiThreadInsertData(int threads, char* db_name,
                    db_name, stbInfo->stbName,
                    &stbInfo->childTblName, &childTblCount,
                    limit,
-                offset);
+                    offset, stbInfo->escapeChar);
            ntables = childTblCount;
+        } else {
+            ntables = stbInfo->childTblCount;
+        }
    } else {
        ntables = g_args.ntables;
        tableFrom = 0;
@@ -10551,6 +11200,33 @@ static void startMultiThreadInsertData(int threads, char* db_name,
              pThreadInfo->start_time = pThreadInfo->start_time + rand_int() % 10000 - rand_tinyint();
              }
              */
+        
+        if (g_args.iface == REST_IFACE || ((stbInfo) && (stbInfo->iface == REST_IFACE))) {
+#ifdef WINDOWS
+            WSADATA wsaData;
+            WSAStartup(MAKEWORD(2, 2), &wsaData);
+            SOCKET sockfd;
+#else
+            int sockfd;
+#endif
+            sockfd = socket(AF_INET, SOCK_STREAM, 0);
+            if (sockfd < 0) {
+#ifdef WINDOWS
+                errorPrint( "Could not create socket : %d" , WSAGetLastError());
+#endif
+                debugPrint("%s() LN%d, sockfd=%d\n", __func__, __LINE__, sockfd);
+                ERROR_EXIT("opening socket");
+            }
+
+            int retConn = connect(sockfd, (struct sockaddr *)&(g_Dbs.serv_addr), sizeof(struct sockaddr));
+            debugPrint("%s() LN%d connect() return %d\n", __func__, __LINE__, retConn);
+            if (retConn < 0) {
+                ERROR_EXIT("connecting");
+            }
+            pThreadInfo->sockfd = sockfd;
+        }
+        
+
        tsem_init(&(pThreadInfo->lock_sem), 0, 0);
        if (ASYNC_MODE == g_Dbs.asyncMode) {
            pthread_create(pids + i, NULL, asyncWrite, pThreadInfo);
@@ -10588,6 +11264,14 @@ static void startMultiThreadInsertData(int threads, char* db_name,
        tmfree((char *)pThreadInfo->bind_ts_array);
        tmfree(pThreadInfo->bindParams);
        tmfree(pThreadInfo->is_null);
+        if (g_args.iface == REST_IFACE || ((stbInfo) && (stbInfo->iface == REST_IFACE))) {
+#ifdef WINDOWS
+            closesocket(pThreadInfo->sockfd);
+            WSACleanup();
+#else
+            close(pThreadInfo->sockfd);
+#endif
+        }
 #else
        if (pThreadInfo->sampleBindArray) {
            for (int k = 0; k < MAX_SAMPLES; k++) {
@@ -10927,6 +11611,7 @@ static int insertTestProcess() {
    double start;
    double end;

+    if (g_args.iface != SML_IFACE) {
        if (g_totalChildTables > 0) {
            fprintf(stderr,
                    "creating %"PRId64" table(s) with %d thread(s)\n\n",
@@ -10953,7 +11638,7 @@ static int insertTestProcess() {
                    g_Dbs.threadCountForCreateTbl, g_actualChildTables);
            }
        }
-
+    }
    // create sub threads for inserting data
    //start = taosGetTimestampMs();
    for (int i = 0; i < g_Dbs.dbCount; i++) {
@@ -11250,6 +11935,31 @@ static int queryTestProcess() {
                    }
                }

+                if (0 == strncasecmp(g_queryInfo.queryMode, "rest", 4)) {
+#ifdef WINDOWS
+                    WSADATA wsaData;
+                    WSAStartup(MAKEWORD(2, 2), &wsaData);
+                    SOCKET sockfd;
+#else
+                    int sockfd;
+#endif
+                    sockfd = socket(AF_INET, SOCK_STREAM, 0);
+                    if (sockfd < 0) {
+#ifdef WINDOWS
+                        errorPrint( "Could not create socket : %d" , WSAGetLastError());
+#endif
+                        debugPrint("%s() LN%d, sockfd=%d\n", __func__, __LINE__, sockfd);
+                        ERROR_EXIT("opening socket");
+                    }
+
+                    int retConn = connect(sockfd, (struct sockaddr *)&(g_queryInfo.serv_addr),
+                         sizeof(struct sockaddr));
+                    debugPrint("%s() LN%d connect() return %d\n", __func__, __LINE__, retConn);
+                    if (retConn < 0) {
+                        ERROR_EXIT("connecting");
+                    }
+                    pThreadInfo->sockfd = sockfd;
+                }
                pThreadInfo->taos = NULL;// workaround to use separate taos connection;

                pthread_create(pids + seq, NULL, specifiedTableQuery,
@@ -11301,6 +12011,31 @@ static int queryTestProcess() {
            pThreadInfo->end_table_to = i < b ? tableFrom + a : tableFrom + a - 1;
            tableFrom = pThreadInfo->end_table_to + 1;
            pThreadInfo->taos = NULL; // workaround to use separate taos connection;
+            if (0 == strncasecmp(g_queryInfo.queryMode, "rest", 4)) {
+#ifdef WINDOWS
+                WSADATA wsaData;
+                WSAStartup(MAKEWORD(2, 2), &wsaData);
+                SOCKET sockfd;
+#else
+                int sockfd;
+#endif
+                sockfd = socket(AF_INET, SOCK_STREAM, 0);
+                if (sockfd < 0) {
+#ifdef WINDOWS
+                    errorPrint( "Could not create socket : %d" , WSAGetLastError());
+#endif
+                    debugPrint("%s() LN%d, sockfd=%d\n", __func__, __LINE__, sockfd);
+                    ERROR_EXIT("opening socket");
+                }
+
+                int retConn = connect(sockfd, (struct sockaddr *)&(g_queryInfo.serv_addr),
+                        sizeof(struct sockaddr));
+                debugPrint("%s() LN%d connect() return %d\n", __func__, __LINE__, retConn);
+                if (retConn < 0) {
+                    ERROR_EXIT("connecting");
+                }
+                pThreadInfo->sockfd = sockfd;
+            }
            pthread_create(pidsOfSub + i, NULL, superTableQuery, pThreadInfo);
        }

@@ -11313,6 +12048,15 @@ static int queryTestProcess() {
        for (int i = 0; i < nConcurrent; i++) {
            for (int j = 0; j < nSqlCount; j++) {
                pthread_join(pids[i * nSqlCount + j], NULL);
+                if (0 == strncasecmp(g_queryInfo.queryMode, "rest", 4)) {
+                    threadInfo *pThreadInfo = infos + i * nSqlCount + j;
+#ifdef WINDOWS
+                    closesocket(pThreadInfo->sockfd);
+                    WSACleanup();
+#else
+                    close(pThreadInfo->sockfd);
+#endif
+                }
            }
        }
    }
@@ -11322,6 +12066,15 @@ static int queryTestProcess() {

    for (int i = 0; i < g_queryInfo.superQueryInfo.threadCnt; i++) {
        pthread_join(pidsOfSub[i], NULL);
+        if (0 == strncasecmp(g_queryInfo.queryMode, "rest", 4)) {
+            threadInfo *pThreadInfo = infosOfSub + i;
+#ifdef WINDOWS
+            closesocket(pThreadInfo->sockfd);
+            WSACleanup();
+#else
+            close(pThreadInfo->sockfd);
+#endif
+        }
    }

    tmfree((char*)pidsOfSub);
@@ -11824,29 +12577,6 @@ static int subscribeTestProcess() {
    return 0;
 }

-static void initOfInsertMeta() {
-    memset(&g_Dbs, 0, sizeof(SDbs));
-
-    // set default values
-    tstrncpy(g_Dbs.host, "127.0.0.1", MAX_HOSTNAME_SIZE);
-    g_Dbs.port = 6030;
-    tstrncpy(g_Dbs.user, TSDB_DEFAULT_USER, MAX_USERNAME_SIZE);
-    tstrncpy(g_Dbs.password, TSDB_DEFAULT_PASS, SHELL_MAX_PASSWORD_LEN);
-    g_Dbs.threadCount = 2;
-
-    g_Dbs.use_metric = g_args.use_metric;
-}
-
-static void initOfQueryMeta() {
-    memset(&g_queryInfo, 0, sizeof(SQueryMetaInfo));
-
-    // set default values
-    tstrncpy(g_queryInfo.host, "127.0.0.1", MAX_HOSTNAME_SIZE);
-    g_queryInfo.port = 6030;
-    tstrncpy(g_queryInfo.user, TSDB_DEFAULT_USER, MAX_USERNAME_SIZE);
-    tstrncpy(g_queryInfo.password, TSDB_DEFAULT_PASS, SHELL_MAX_PASSWORD_LEN);
-}
-
 static void setParaFromArg() {
    char type[20];
    char length[20];
@@ -11879,7 +12609,7 @@ static void setParaFromArg() {
    tstrncpy(g_Dbs.resultFile, g_args.output_file, MAX_FILE_NAME_LEN);

    g_Dbs.use_metric = g_args.use_metric;
-
+    g_args.prepared_rand = min(g_args.insertRows, MAX_PREPARED_RAND);
    g_Dbs.aggr_func = g_args.aggr_func;

    char dataString[TSDB_MAX_BYTES_PER_ROW];
@@ -11961,10 +12691,12 @@ static void setParaFromArg() {

        tstrncpy(g_Dbs.db[0].superTbls[0].tags[0].dataType,
                "INT", min(DATATYPE_BUFF_LEN, strlen("INT") + 1));
+        g_Dbs.db[0].superTbls[0].tags[0].data_type = TSDB_DATA_TYPE_INT;
        g_Dbs.db[0].superTbls[0].tags[0].dataLen = 0;

        tstrncpy(g_Dbs.db[0].superTbls[0].tags[1].dataType,
                "BINARY", min(DATATYPE_BUFF_LEN, strlen("BINARY") + 1));
+        g_Dbs.db[0].superTbls[0].tags[1].data_type = TSDB_DATA_TYPE_BINARY;
        g_Dbs.db[0].superTbls[0].tags[1].dataLen = g_args.binwidth;
        g_Dbs.db[0].superTbls[0].tagCount = 2;
    } else {
@@ -11997,7 +12729,6 @@ static int regexMatch(const char *s, const char *reg, int cflags) {
        printf("Regex match failed: %s\n", msgbuf);
        exit(EXIT_FAILURE);
    }
-
    return 0;
 }

@@ -12157,8 +12888,6 @@ int main(int argc, char *argv[]) {

    if (g_args.metaFile) {
        g_totalChildTables = 0;
-        initOfInsertMeta();
-        initOfQueryMeta();

        if (false == getInfoFromJsonFile(g_args.metaFile)) {
            printf("Failed to read %s\n", g_args.metaFile);
@@ -12168,6 +12897,10 @@ int main(int argc, char *argv[]) {
        testMetaFile();
    } else {
        memset(&g_Dbs, 0, sizeof(SDbs));
+        g_Dbs.db = calloc(1, sizeof(SDataBase));
+        assert(g_Dbs.db);
+        g_Dbs.db[0].superTbls = calloc(1, sizeof(SSuperTable));
+        assert(g_Dbs.db[0].superTbls);
        setParaFromArg();

        if (NULL != g_args.sqlFile) {

--- a/src/kit/taosdump/taosdump.c
+++ b/src/kit/taosdump/taosdump.c
@@ -638,11 +638,6 @@ static int queryDbImpl(TAOS *taos, char *command) {
    TAOS_RES *res = NULL;
    int32_t   code = -1;

-    if (NULL != res) {
-        taos_free_result(res);
-        res = NULL;
-    }
-
    res = taos_query(taos, command);
    code = taos_errno(res);

@@ -1193,6 +1188,7 @@ static int64_t dumpNormalTable(
            jsonAvroSchema);
    }

+    tfree(jsonAvroSchema);
    freeTbDes(tableDes);
    return ret;
 }
@@ -3009,8 +3005,14 @@ int main(int argc, char *argv[]) {
    printf("debug_print: %d\n", g_args.debug_print);

    for (int32_t i = 0; i < g_args.arg_list_len; i++) {
+        if (g_args.databases || g_args.all_databases) {
+            errorPrint("%s is an invalid input if database(s) be already specified.\n",
+                    g_args.arg_list[i]);
+            exit(EXIT_FAILURE);
+        } else {
            printf("arg_list[%d]: %s\n", i, g_args.arg_list[i]);
        }
+    }

    printf("==============================\n");
    if (checkParam(&g_args) < 0) {

--- a/blm3 @ c67fcc11
+++ b/blm3 @ c67fcc11
-Subproject commit 4bfae86dcabea0d5a40ff81a72be7c822737269b
+Subproject commit c67fcc11bc5e82e3d7aea8db855a8cbf8b109239
--- a/src/plugins/monitor/src/monMain.c
+++ b/src/plugins/monitor/src/monMain.c
@@ -204,6 +204,7 @@ static void monBuildMonitorSql(char *sql, int32_t cmd) {
             ", disk_used float, disk_total int"
             ", band_speed float"
             ", io_read float, io_write float"
+             ", io_read_rate float, io_write_rate float"
             ", req_http int, req_select int, req_insert int"
             ") tags (dnodeid int, fqdn binary(%d))",
             tsMonitorDbName, TSDB_FQDN_LEN);
@@ -325,7 +326,10 @@ static int32_t monBuildIoSql(char *sql) {
    monDebug("failed to get io info");
  }

-  return sprintf(sql, ", %f, %f", readKB, writeKB);
+  float readRate = readKB/tsMonitorInterval;
+  float writeRate = writeKB/tsMonitorInterval;
+
+  return sprintf(sql, ", %f, %f, %f, %f", readKB, writeKB, readRate, writeRate);
 }

 static void monSaveSystemInfo() {

--- a/src/query/src/qExecutor.c
+++ b/src/query/src/qExecutor.c
@@ -365,7 +365,8 @@ int32_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx* pCtx, int3
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
-    if (hasMainFunction && (id == TSDB_FUNC_TS || id == TSDB_FUNC_TAG || id == TSDB_FUNC_TAGPRJ)) {
+    if (hasMainFunction && (id == TSDB_FUNC_TS || id == TSDB_FUNC_TAG || id == TSDB_FUNC_TAGPRJ ||
+                            id == TSDB_FUNC_TS_DUMMY || id == TSDB_FUNC_TAG_DUMMY)) {
      continue;
    }


--- a/src/tsdb/src/tsdbCompact.c
+++ b/src/tsdb/src/tsdbCompact.c
@@ -441,6 +441,7 @@ static int tsdbCompactMeta(STsdbRepo *pRepo) {
      if ((tdInitDataCols(pComph->pDataCols, pSchema) < 0) || (tdInitDataCols(pReadh->pDCols[0], pSchema) < 0) ||
          (tdInitDataCols(pReadh->pDCols[1], pSchema) < 0)) {
        terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
+        tdFreeSchema(pSchema);
        return -1;
      }
      tdFreeSchema(pSchema);

--- a/src/tsdb/src/tsdbRead.c
+++ b/src/tsdb/src/tsdbRead.c
@@ -1620,7 +1620,7 @@ static void mergeTwoRowFromMem(STsdbQueryHandle* pQueryHandle, int32_t capacity,
      SColIdx *pColIdx = kvRowColIdxAt(rowBody, chosen_itr);
      colId = pColIdx->colId;
      offset = pColIdx->offset;
-      value = tdGetKvRowDataOfCol(rowBody, pColIdx->offset);
+      value = tdGetKvRowDataOfCol(rowBody, offset);
    }



--- a/tests/examples/c/makefile
+++ b/tests/examples/c/makefile
@@ -29,7 +29,3 @@ clean:
 	rm $(ROOT)stream
 	rm $(ROOT)subscribe
 	rm $(ROOT)apitest
-
-
-	
-
--- a/tests/examples/c/prepare.c
+++ b/tests/examples/c/prepare.c
@@ -184,6 +184,10 @@ void verify_prepare(TAOS* taos) {
    taos_stmt_close(stmt);
    exit(EXIT_FAILURE);
  }
+
+  int affectedRows = taos_stmt_affected_rows(stmt);
+  printf("sucessfully inserted %d rows\n", affectedRows);
+
  taos_stmt_close(stmt);

  // query the records
@@ -400,6 +404,9 @@ void verify_prepare2(TAOS* taos) {
    exit(EXIT_FAILURE);
  }

+  int affectedRows = taos_stmt_affected_rows(stmt);
+  printf("sucessfully inserted %d rows\n", affectedRows);
+
  taos_stmt_close(stmt);

  // query the records
@@ -784,6 +791,10 @@ void verify_prepare3(TAOS* taos) {
    taos_stmt_close(stmt);
    exit(EXIT_FAILURE);
  }
+  
+  int affectedRows = taos_stmt_affected_rows(stmt);
+  printf("successfully inserted %d rows\n", affectedRows);
+
  taos_stmt_close(stmt);

  // query the records

--- a/tests/pytest/fulltest.sh
+++ b/tests/pytest/fulltest.sh
@@ -391,6 +391,7 @@ python3 ./test.py -f tag_lite/alter_tag.py
 python3 test.py -f tools/taosdemoAllTest/TD-4985/query-limit-offset.py
 python3 test.py -f tools/taosdemoAllTest/TD-5213/insert4096columns_not_use_taosdemo.py
 python3 test.py -f tools/taosdemoAllTest/TD-5213/insertSigcolumnsNum4096.py
+python3 test.py -f tools/taosdemoAllTest/TD-10539/create_taosdemo.py
 python3 ./test.py -f tag_lite/drop_auto_create.py
 python3 test.py -f insert/insert_before_use_db.py
 python3 test.py -f alter/alter_keep.py

--- a/tests/pytest/insert/insertTelnetLines.py
+++ b/tests/pytest/insert/insertTelnetLines.py
@@ -29,7 +29,6 @@ class TDTestCase:
        tdSql.execute("create database if not exists test precision 'us'")
        tdSql.execute('use test')

-
        ### metric ###
        print("============= step1 : test metric  ================")
        lines0 = [
@@ -215,7 +214,7 @@ class TDTestCase:

        #binary
        lines2_7 = [
-                        "stb2_7 1626006833610ms \"binary_val.!@#$%^&*\" host=\"host0\"",
+                        "stb2_7 1626006833610ms \"  binary_val  .!@#$%^&*  \" host=\"host0\"",
                        "stb2_7 1626006833620ms \"binary_val.:;,./?|+-=\" host=\"host0\"",
                        "stb2_7 1626006833630ms \"binary_val.()[]{}<>\" host=\"host0\""
                     ]
@@ -232,7 +231,7 @@ class TDTestCase:

        #nchar
        lines2_8 = [
-                        "stb2_8 1626006833610ms L\"nchar_val数值一\" host=\"host0\"",
+                        "stb2_8 1626006833610ms L\"  nchar_val  数值一  \" host=\"host0\"",
                        "stb2_8 1626006833620ms L\"nchar_val数值二\" host=\"host0\""
                     ]


--- a/tests/pytest/insert/line_insert.py
+++ b/tests/pytest/insert/line_insert.py
@@ -31,9 +31,9 @@ class TDTestCase:

        tdSql.execute('create stable ste(ts timestamp, f int) tags(t1 bigint)')

-        lines = [   "st,t1=3i64,t2=4f64,t3=\"t3\" c1=3i64,c3=L\"passit\",c2=false,c4=4f64 1626006833639000000",
+        lines = [   "st,t1=3i64,t2=4f64,t3=\"t3\" c1=3i64,c3=L\"\"\"a    pa,\"s   si,t \"\"\",c2=false,c4=4f64 1626006833639000000",
                    "st,t1=4i64,t3=\"t4\",t2=5f64,t4=5f64 c1=3i64,c3=L\"passitagin\",c2=true,c4=5f64,c5=5f64 1626006833640000000",
-                    "ste,t2=5f64,t3=L\"ste\" c1=true,c2=4i64,c3=\"iam\" 1626056811823316532",
+                    "ste,t2=5f64,t3=L\"ste\" c1=true,c2=4i64,c3=\" i,\"a \"m,\"\"\" 1626056811823316532",
                    "stf,t1=4i64,t3=\"t4\",t2=5f64,t4=5f64 c1=3i64,c3=L\"passitagin\",c2=true,c4=5f64,c5=5f64,c6=7u64 1626006933640000000",
                    "st,t1=4i64,t2=5f64,t3=\"t4\" c1=3i64,c3=L\"passitagain\",c2=true,c4=5f64 1626006833642000000",
                    "ste,t2=5f64,t3=L\"ste2\" c3=\"iamszhou\",c4=false 1626056811843316532",

--- a/tests/pytest/table/create.py
+++ b/tests/pytest/table/create.py
@@ -52,7 +52,7 @@ class TDTestCase:
            tdLog.info("taosd found in %s" % buildPath)
        binPath = buildPath+ "/build/bin/"

-        os.system("rm -rf table/create1.py.sql")
+        os.system("rm -rf table/create.py.sql")
        tdSql.prepare()

        print("==============step1")
@@ -298,7 +298,6 @@ class TDTestCase:

        print("==============step3,#create regular_table; insert regular_table; show regular_table; select regular_table; drop regular_table")
        self.regular_table = "regular_table~!@#$%^&*()-_+=[]{}';:,<.>/?stST24680~!@#$%^&*()-_+=[]{}"
-        #self.regular_table = "regular_table~!@#$%^&*()-_+=[]{}';:,<.>/?stST24680~!@#$%^&*()-_+=[]{}"
        
        tdSql.execute("create table `%s` (ts timestamp,i int) ;" %self.regular_table)
        tdSql.query("describe `%s` ; "%self.regular_table)
@@ -328,9 +327,9 @@ class TDTestCase:
        tdSql.checkRows(1)

        self.crr_tb = "create_r_table~!@#$%^&*()-_+=[]{}';:,<.>/?stST24680~!@#$%^&*()-_+=[]{}"
-        # tdSql.execute("create table `%s` as select * from `%s` ;" %(self.crr_tb,self.regular_table))
-        # tdSql.query("show db.tables like 'create_r_table%' ")
-        # tdSql.checkRows(1)
+        tdSql.execute("create table `%s` as select * from `%s` ;" %(self.crr_tb,self.regular_table))
+        tdSql.query("show db2.tables like 'create_r_table%' ")
+        tdSql.checkRows(1)

        print("==============drop table\stable")
        try:
@@ -341,15 +340,6 @@ class TDTestCase:
        tdSql.error("select * from `%s`" %self.regular_table)
        
   
-        #表名：192个字符，还要包含前面的数据库名
-        #taosdemo 建数据库表 # 单独放
-        # self.tsdemo = "tsdemo~!@#$%^&*()-_+=[]{}"
-        # os.system("%staosdemo -d test -m `%s` -t 10 -n 100 -l 10 -y " % (binPath,self.tsdemo))
-        # tdSql.execute("use #!#!#!")
-        # tdSql.query("select count (tbname) from #!#!#!")
-        # tdSql.checkData(0, 0, 1000)
-
-   


    def stop(self):

--- a/tests/pytest/tools/taosdemoAllTest/TD-10539/create_taosdemo.py
+++ b/tests/pytest/tools/taosdemoAllTest/TD-10539/create_taosdemo.py
+###################################################################
+#           Copyright (c) 2016 by TAOS Technologies, Inc.
+#                     All rights reserved.
+#
+#  This file is proprietary and confidential to TAOS Technologies.
+#  No part of this file may be reproduced, stored, transmitted,
+#  disclosed or used in any form or by any means other than as
+#  expressly provided by the written permission from Jianhui Tao
+#
+###################################################################
+
+# -*- coding: utf-8 -*-
+
+import sys
+import taos
+import time
+import os
+from util.log import tdLog
+from util.cases import tdCases
+from util.sql import tdSql
+
+
+class TDTestCase:
+    def init(self, conn, logSql):
+        tdLog.debug("start to execute %s" % __file__)
+        tdSql.init(conn.cursor(), logSql)
+
+
+    def getBuildPath(self):
+        selfPath = os.path.dirname(os.path.realpath(__file__))
+
+        if ("community" in selfPath):
+            projPath = selfPath[:selfPath.find("community")]
+        else:
+            projPath = selfPath[:selfPath.find("tests")]
+
+        for root, dirs, files in os.walk(projPath):
+            if ("taosd" in files):
+                rootRealPath = os.path.dirname(os.path.realpath(root))
+                if ("packaging" not in rootRealPath):
+                    buildPath = root[:len(root)-len("/build/bin")]
+                    break
+        return buildPath
+
+    def run(self):
+        buildPath = self.getBuildPath()
+        if (buildPath == ""):
+            tdLog.exit("taosd not found!")
+        else:
+            tdLog.info("taosd found in %s" % buildPath)
+        binPath = buildPath+ "/build/bin/"
+
+        os.system("rm -rf tools/taosdemoAllTest/TD-10539/create_taosdemo.py.sql")
+        tdSql.prepare()
+
+        #print("==============taosdemo,#create stable,table; insert table; show table; select table; drop table")
+        self.tsdemo = "tsdemo~!.@#$%^*[]-_=+{,?.}"
+        #this escape character is not support in shell  . include  & () <> | /
+        os.system("%staosdemo -d test -E -m %s -t 10 -n 100 -l 10 -y " % (binPath,self.tsdemo))
+        tdSql.execute("use test ;" )
+        tdSql.query("select count(*) from meters")
+        tdSql.checkData(0, 0, 1000)
+        tdSql.query("show test.tables like 'tsdemo%'" )
+        tdSql.checkRows(10)
+        tdSql.query("show test.tables like '%s_'" %self.tsdemo)
+        tdSql.checkRows(10)
+        tdSql.query("select _block_dist() from `%s1`" %self.tsdemo)
+        tdSql.checkRows(1)
+        tdSql.query("describe test.`%s1` ; " %self.tsdemo)
+        tdSql.checkRows(13)
+        tdSql.query("show create table test.`%s1` ; " %self.tsdemo)
+        tdSql.checkData(0, 0, self.tsdemo+str(1))
+        tdSql.checkData(0, 1, "CREATE TABLE `%s1` USING `meters` TAGS (1,\"beijing\")" %self.tsdemo)
+
+        print("==============drop table\stable")
+        try:
+            tdSql.execute("drop table test.`%s1` ; " %self.tsdemo)
+        except Exception as e:
+            tdLog.exit(e)
+
+        tdSql.error("select * from test.`%s1` ; " %self.tsdemo)
+        tdSql.query("show test.tables like '%s_'" %self.tsdemo)
+        tdSql.checkRows(9)
+
+        try:
+            tdSql.execute("drop table test.meters ")
+        except Exception as e:
+            tdLog.exit(e)
+
+        tdSql.error("select * from test.meters ")
+        tdSql.error("select * from test.`%s2` ; " %self.tsdemo)
+
+        # Exception
+        os.system("%staosdemo -d test -m %s -t 10 -n 100 -l 10 -y " % (binPath,self.tsdemo))
+        tdSql.query("show test.tables ")
+        tdSql.checkRows(0)
+
+        #print("==============taosdemo,#create regular table; insert table; show table; select table; drop table")
+        self.tsdemo = "tsdemo~!.@#$%^*[]-_=+{,?.}"
+        #this escape character is not support in shell  . include  & () <> | /
+        os.system("%staosdemo -N -E -m %s -t 10 -n 100 -l 10 -y " % (binPath,self.tsdemo))
+        tdSql.execute("use test ;" )
+        tdSql.query("select count(*) from `%s1`" %self.tsdemo)
+        tdSql.checkData(0, 0, 100)
+        tdSql.query("show test.tables like 'tsdemo%'" )
+        tdSql.checkRows(10)
+        tdSql.query("show test.tables like '%s_'" %self.tsdemo)
+        tdSql.checkRows(10)
+        tdSql.query("select _block_dist() from `%s1`" %self.tsdemo)
+        tdSql.checkRows(1)
+        tdSql.query("describe test.`%s1` ; " %self.tsdemo)
+        tdSql.checkRows(11)
+        tdSql.query("show create table test.`%s1` ; " %self.tsdemo)
+        tdSql.checkData(0, 0, self.tsdemo+str(1))
+        tdSql.checkData(0, 1, "create table `%s1` (ts TIMESTAMP,c0 FLOAT,c1 INT,c2 INT,c3 INT,c4 INT,c5 INT,c6 INT,c7 INT,c8 INT,c9 INT)" %self.tsdemo)
+
+        print("==============drop table\stable")
+        try:
+            tdSql.execute("drop table test.`%s1` ; " %self.tsdemo)
+        except Exception as e:
+            tdLog.exit(e)
+
+        tdSql.error("select * from test.`%s1` ; " %self.tsdemo)
+        tdSql.query("show test.tables like '%s_'" %self.tsdemo)
+        tdSql.checkRows(9)
+
+        # Exception
+        os.system("%staosdemo -N -m %s -t 10 -n 100 -l 10 -y " % (binPath,self.tsdemo))
+        tdSql.query("show test.tables ")
+        tdSql.checkRows(0)
+
+
+        #print("==============taosdemo——json_yes,#create stable,table; insert table; show table; select table; drop table")
+        os.system("%staosdemo -f tools/taosdemoAllTest/TD-10539/create_taosdemo_yes.json -y " % binPath)
+        tdSql.execute("use dbyes")
+
+        self.tsdemo_stable = "tsdemo_stable~!.@#$%^*[]-_=+{,?.}"
+        self.tsdemo = "tsdemo~!.@#$%^*[]-_=+{,?.}"
+
+        tdSql.query("select count(*) from dbyes.`%s`" %self.tsdemo_stable)
+        tdSql.checkData(0, 0, 1000)
+        tdSql.query("show dbyes.tables like 'tsdemo%'" )
+        tdSql.checkRows(10)
+        tdSql.query("show dbyes.tables like '%s_'" %self.tsdemo)
+        tdSql.checkRows(10)
+        tdSql.query("select _block_dist() from `%s1`" %self.tsdemo)
+        tdSql.checkRows(1)
+        tdSql.query("describe dbyes.`%s1` ; " %self.tsdemo)
+        tdSql.checkRows(13)
+        tdSql.query("show create table dbyes.`%s1` ; " %self.tsdemo)
+        tdSql.checkData(0, 0, self.tsdemo+str(1))
+        tdSql.checkData(0, 1, "CREATE TABLE `%s1` USING `%s` TAGS (1,1)" %(self.tsdemo,self.tsdemo_stable))
+
+        print("==============drop table\stable")
+        try:
+            tdSql.execute("drop table dbyes.`%s1` ; " %self.tsdemo)
+        except Exception as e:
+            tdLog.exit(e)
+
+        tdSql.error("select * from dbyes.`%s1` ; " %self.tsdemo)
+        tdSql.query("show dbyes.tables like '%s_'" %self.tsdemo)
+        tdSql.checkRows(9)
+
+        try:
+            tdSql.execute("drop table dbyes.`%s` ; " %self.tsdemo_stable)
+        except Exception as e:
+            tdLog.exit(e)
+
+        tdSql.error("select * from dbyes.`%s` ; " %self.tsdemo_stable)
+        tdSql.error("select * from dbyes.`%s2` ; " %self.tsdemo)
+       
+        #print("==============taosdemo——json_no,#create stable,table; insert table; show table; select table; drop table")
+
+        assert os.system("%staosdemo -f tools/taosdemoAllTest/TD-10539/create_taosdemo_no.json -y " % binPath) == 0
+        tdSql.query("show dbno.tables ")
+        tdSql.checkRows(0)
+        
+
+    def stop(self):
+        tdSql.close()
+        tdLog.success("%s successfully executed" % __file__)
+
+
+tdCases.addWindows(__file__, TDTestCase())
+tdCases.addLinux(__file__, TDTestCase())
--- a/tests/pytest/tools/taosdemoAllTest/TD-10539/create_taosdemo_no.json
+++ b/tests/pytest/tools/taosdemoAllTest/TD-10539/create_taosdemo_no.json
+{
+    "filetype": "insert",
+    "cfgdir": "/etc/taos",
+    "host": "127.0.0.1",
+    "port": 6030,
+    "user": "root",
+    "password": "taosdata",
+    "thread_count": 10,
+    "thread_count_create_tbl": 10,
+    "result_file": "./insert_res.txt",
+    "confirm_parameter_prompt": "no",
+    "insert_interval": 0,
+    "interlace_rows": 10,
+    "num_of_records_per_req": 1,
+    "max_sql_len": 1024000,
+    "databases": [{
+        "dbinfo": {
+            "name": "dbno",
+            "drop": "yes",
+            "replica": 1,
+            "days": 10,
+            "cache": 50,
+            "blocks": 8,
+            "precision": "ms",
+            "keep": 36500,
+            "minRows": 100,
+            "maxRows": 4096,
+            "comp":2,
+            "walLevel":1,
+            "cachelast":0,
+            "quorum":1,
+            "fsync":3000,
+            "update": 0
+        },
+        "super_tables": [{
+            "name": "meters",
+            "child_table_exists":"no",
+            "childtable_count": 10,
+            "childtable_prefix": "tsdemo~!.@#$%^*[]-_=+{,?.}",
+            "escape_character": "no",
+            "auto_create_table": "no",
+            "batch_create_tbl_num": 1,
+            "data_source": "rand",
+            "insert_mode": "taosc",
+            "insert_rows": 100,
+            "childtable_limit": 0,
+            "childtable_offset":0,
+            "multi_thread_write_one_tbl": "no",
+            "interlace_rows": 0,
+            "insert_interval":0,
+            "max_sql_len": 1024000,
+            "disorder_ratio": 0,
+            "disorder_range": 1000,
+            "timestamp_step": 1,
+            "start_timestamp": "2020-10-01 00:00:00.000",
+            "sample_format": "csv",
+            "sample_file": "",
+            "tags_file": "",
+            "columns": [{"type": "INT","count":9}, {"type": "BINARY", "len": 16, "count":1}],
+            "tags": [{"type": "INT", "count":2}]
+        }]
+    }]
+}
--- a/tests/pytest/tools/taosdemoAllTest/TD-10539/create_taosdemo_yes.json
+++ b/tests/pytest/tools/taosdemoAllTest/TD-10539/create_taosdemo_yes.json
+{
+    "filetype": "insert",
+    "cfgdir": "/etc/taos",
+    "host": "127.0.0.1",
+    "port": 6030,
+    "user": "root",
+    "password": "taosdata",
+    "thread_count": 5,
+    "thread_count_create_tbl": 10,
+    "result_file": "./insert_res.txt",
+    "confirm_parameter_prompt": "no",
+    "insert_interval": 0,
+    "interlace_rows": 10,
+    "num_of_records_per_req": 1,
+    "max_sql_len": 1024000,
+    "databases": [{
+        "dbinfo": {
+            "name": "dbyes",
+            "drop": "yes",
+            "replica": 1,
+            "days": 10,
+            "cache": 50,
+            "blocks": 8,
+            "precision": "ms",
+            "keep": 36500,
+            "minRows": 100,
+            "maxRows": 4096,
+            "comp":2,
+            "walLevel":1,
+            "cachelast":0,
+            "quorum":1,
+            "fsync":3000,
+            "update": 0
+        },
+        "super_tables": [{
+            "name": "tsdemo_stable~!.@#$%^*[]-_=+{,?.}",
+            "child_table_exists":"no",
+            "childtable_count": 10,
+            "childtable_prefix": "tsdemo~!.@#$%^*[]-_=+{,?.}",
+            "escape_character": "yes",
+            "auto_create_table": "no",
+            "batch_create_tbl_num": 1,
+            "data_source": "rand",
+            "insert_mode": "taosc",
+            "insert_rows": 100,
+            "childtable_limit": 0,
+            "childtable_offset":0,
+            "multi_thread_write_one_tbl": "no",
+            "interlace_rows": 0,
+            "insert_interval":0,
+            "max_sql_len": 1024000,
+            "disorder_ratio": 0,
+            "disorder_range": 1000,
+            "timestamp_step": 1,
+            "start_timestamp": "2020-10-01 00:00:00.000",
+            "sample_format": "csv",
+            "sample_file": "",
+            "tags_file": "",
+            "columns": [{"type": "INT","count":9}, {"type": "BINARY", "len": 16, "count":1}],
+            "tags": [{"type": "INT", "count":2}]
+        }]
+    }]
+}
--- a/tests/pytest/tools/taosdemoAllTest/stmt/nsertColumnsAndTagNumLarge4096-stmt.json
+++ b/tests/pytest/tools/taosdemoAllTest/stmt/nsertColumnsAndTagNumLarge4096-stmt.json
--- a/tests/pytest/tools/taosdumpTest3.py
+++ b/tests/pytest/tools/taosdumpTest3.py
@@ -62,8 +62,12 @@ class TDTestCase:
            os.makedirs("./taosdumptest/tmp3")
        if not os.path.exists("./taosdumptest/tmp4"):
            os.makedirs("./taosdumptest/tmp4")
-
-
+        if not os.path.exists("./taosdumptest/tmp5"):
+            os.makedirs("./taosdumptest/tmp5")
+        if not os.path.exists("./taosdumptest/tmp6"):
+            os.makedirs("./taosdumptest/tmp6")
+        if not os.path.exists("./taosdumptest/tmp7"):
+            os.makedirs("./taosdumptest/tmp7")
        buildPath = self.getBuildPath()
        if (buildPath == ""):
            tdLog.exit("taosdump not found!")
@@ -72,6 +76,8 @@ class TDTestCase:
        binPath = buildPath + "/build/bin/"

        # create db1 , one stables and one table ;  create general tables
+        tdSql.execute("drop database if  exists dp1")
+        tdSql.execute("drop database  if exists dp2")
        tdSql.execute("create database if not exists dp1")
        tdSql.execute("use dp1")
        tdSql.execute("create stable st0(ts timestamp, c1 int, c2 nchar(10)) tags(t1 int)")
@@ -82,9 +88,10 @@ class TDTestCase:
        tdSql.execute("create table  if not exists gt1 (ts timestamp, c0 int, c1 double)  ")
        tdSql.execute("insert into gt0 values(1614218412000,637,8.861)")
        tdSql.execute("insert into gt1 values(1614218413000,638,8.862)")
+
        # create db1 , three stables:stb0,include ctables stb0_0 \ stb0_1,stb1 include ctables stb1_0 and stb1_1 
        # \stb3,include ctables stb3_0 and stb3_1 
-        # ; create general three tables gt0 gt1 gt2
+        # create general three tables gt0 gt1 gt2
        tdSql.execute("create database if not  exists dp2")
        tdSql.execute("use dp2")
        tdSql.execute("create stable st0(ts timestamp, c01 int, c02 nchar(10)) tags(t1 int)")
@@ -102,94 +109,188 @@ class TDTestCase:
        tdSql.execute("create table  if not exists gt0 (ts timestamp, c00 int, c01 float)  ")
        tdSql.execute("create table  if not exists gt1 (ts timestamp, c10 int, c11 double)  ")
        tdSql.execute("create table  if not exists gt2 (ts timestamp, c20 int, c21 float)  ")
-        tdSql.execute("insert into gt0 values(1614218412000,8637,78.86155)")
-        tdSql.execute("insert into gt1 values(1614218413000,8638,78.862020199)")
-        tdSql.execute("insert into gt2 values(1614218413000,8639,78.863)")
+        tdSql.execute("insert into gt0 values(1614218412700,8637,78.86155)")
+        tdSql.execute("insert into gt1 values(1614218413800,8638,78.862020199)")
+        tdSql.execute("insert into gt2 values(1614218413900,8639,78.863)")
+
+        # create 
+        tdSql.execute("create database if not exists dp3 precision 'ns'")
+        tdSql.execute("use dp3")
+        tdSql.execute("create stable st0(ts timestamp, c01 int, c02 nchar(10)) tags(t1 int)")
+        tdSql.execute("create table st0_0 using st0 tags(0) st0_1 using st0 tags(1) ")
+        tdSql.execute("insert into st0_0 values(1614218412000000001,8600,'R')(1614218422000000002,8600,'E')")
+        tdSql.execute("insert into st0_1 values(1614218413000000001,8601,'A')(1614218423000000002,8601,'D')")
+

        # tdSql.execute("insert into t0 values(1614218422000,8638,'R')")
        os.system("rm -rf ./taosdumptest/tmp1/*")
        os.system("rm -rf ./taosdumptest/tmp2/*")
        os.system("rm -rf ./taosdumptest/tmp3/*")
        os.system("rm -rf ./taosdumptest/tmp4/*")
+        os.system("rm -rf ./taosdumptest/tmp5/*")
+
        # #  taosdump stable and  general table
-        # os.system("%staosdump  -o ./taosdumptest/tmp1 -D dp1 dp2  " % binPath)
-        # os.system("%staosdump  -o ./taosdumptest/tmp2 dp1 st0 gt0  " % binPath)
-        # os.system("%staosdump  -o ./taosdumptest/tmp3 dp2 st0 st1_0 gt0" % binPath)
-        # os.system("%staosdump  -o ./taosdumptest/tmp4 dp2 st0 st2 gt0 gt2" % binPath)、
+        os.system("%staosdump  -o ./taosdumptest/tmp1 -D dp1,dp2  " % binPath)
+        os.system("%staosdump  -o ./taosdumptest/tmp2 dp1 st0 gt0  " % binPath)
+        os.system("%staosdump  -o ./taosdumptest/tmp3 dp2 st0 st1_0 gt0" % binPath)
+        os.system("%staosdump  -o ./taosdumptest/tmp4 dp2 st0 st2 gt0 gt2" % binPath)
+
+        # verify ns 
+        os.system("%staosdump  -o ./taosdumptest/tmp6 dp3 st0_0" % binPath)
+        assert os.system("%staosdump  -o ./taosdumptest/tmp6 dp3 st0_0 -C ns " % binPath) != 0
+
        # verify -D:--database
-        # os.system("%staosdump --databases dp1 -o ./taosdumptest/tmp3 dp2 st0 st1_0 gt0" % binPath)
-        # os.system("%staosdump --databases dp1,dp2 -o ./taosdumptest/tmp3 " % binPath)
-
-        # #check taosdumptest/tmp1
-        # tdSql.execute("drop database  dp1")
-        # tdSql.execute("drop database  dp2")
-        # os.system("%staosdump -i ./taosdumptest/tmp1 -T 2   " % binPath)
-        # tdSql.execute("use dp1")
-        # tdSql.query("show stables")
-        # tdSql.checkRows(1)
-        # tdSql.query("show tables")
-        # tdSql.checkRows(4)
-        # tdSql.execute("use dp2")
-        # tdSql.query("show stables")
-        # tdSql.checkRows(3)
-        # tdSql.query("show tables")
-        # tdSql.checkRows(9)
-        # tdSql.query("select c01 from gt0")
-        # tdSql.checkData(0,0,78.86155)
-        # tdSql.query("select c11 from gt1")
-        # tdSql.checkData(0, 0, 78.862020199)
-        # tdSql.query("select c21 from gt2")
-        # tdSql.checkData(0, 0, 78.86300)
-
-        # #check taosdumptest/tmp2
-        # tdSql.execute("drop database  dp1")
-        # tdSql.execute("drop database  dp2")
-        # os.system("%staosdump -i ./taosdumptest/tmp2 -T 2   " % binPath)
-        # tdSql.execute("use dp1")
-        # tdSql.query("show stables")
-        # tdSql.checkRows(1)
-        # tdSql.query("show tables")
-        # tdSql.checkRows(3)
-        # tdSql.error("use dp2")
-        # tdSql.query("select c01 from gt0")
-        # tdSql.checkData(0,0,78.86155)
-
-        # #check taosdumptest/tmp3
-        # tdSql.execute("drop database  dp1")
-        # os.system("%staosdump  -i ./taosdumptest/tmp3 -T 2 " % binPath)  
-        # tdSql.execute("use dp2")
-        # tdSql.query("show stables")
-        # tdSql.checkRows(2)
-        # tdSql.query("show tables")
-        # tdSql.checkRows(4)
-        # tdSql.query("select count(*) from st1_0")
-        # tdSql.query("select c01 from gt0")
-        # tdSql.checkData(0,0,78.86155)
-        # tdSql.error("use dp1")
-        # tdSql.error("select count(*) from st2_0")
-        # tdSql.error("select count(*) from gt2")
-
-        # #check taosdumptest/tmp4
-        # tdSql.execute("drop database  dp2")
-        # os.system("%staosdump  -i ./taosdumptest/tmp4 -T 2 " % binPath)  
-        # tdSql.execute("use dp2")
-        # tdSql.query("show stables")
-        # tdSql.checkRows(2)
-        # tdSql.query("show tables")
-        # tdSql.checkRows(6)
-        # tdSql.query("select c21 from gt2")
-        # tdSql.checkData(0, 0, 78.86300)
-        # tdSql.query("select count(*) from st2_0")
-        # tdSql.error("use dp1")
-        # tdSql.error("select count(*) from st1_0")
-        # tdSql.error("select count(*) from gt3")
-        # tdSql.execute("drop database  dp2")
-
-
-        # os.system("rm -rf ./taosdumptest/tmp1")
-        # os.system("rm -rf ./taosdumptest/tmp2")
-        # os.system("rm -rf ./dump_result.txt")
-        # os.system("rm -rf ./db.csv")
+        os.system("%staosdump  -o ./taosdumptest/tmp5  --databases dp1,dp2 " % binPath)
+        # verify mixed -D:--database and dbname tbname
+        assert os.system("%staosdump --databases dp1 -o ./taosdumptest/tmp5 dp2 st0 st1_0 gt0" % binPath) != 0
+
+        #check taosdumptest/tmp1
+        tdSql.execute("drop database  dp1")
+        tdSql.execute("drop database  dp2")
+        os.system("%staosdump -i ./taosdumptest/tmp1 -T 2   " % binPath)
+        tdSql.execute("use dp1")
+        tdSql.query("show stables")
+        tdSql.checkRows(1)
+        tdSql.query("show tables")
+        tdSql.checkRows(4)
+        tdSql.query("select c1 from st0_0 order by ts")
+        tdSql.checkData(0,0,8537)
+        tdSql.query("select c2 from st0_1 order by ts")
+        tdSql.checkData(1,0,"D")
+        tdSql.query("select * from gt0")
+        tdSql.checkData(0,0,'2021-02-25 10:00:12.000')
+        tdSql.checkData(0,1,637)
+        tdSql.execute("use dp2")
+        tdSql.query("show stables")
+        tdSql.checkRows(3)
+        tdSql.query("show tables")
+        tdSql.checkRows(9)
+        tdSql.query("select ts from gt0")
+        tdSql.checkData(0,0,'2021-02-25 10:00:12.700')
+        tdSql.query("select c10 from gt1")
+        tdSql.checkData(0, 0, 8638)
+        tdSql.query("select c20 from gt2")
+        tdSql.checkData(0, 0, 8639)
+
+
+        #check taosdumptest/tmp2
+        tdSql.execute("drop database  dp1")
+        tdSql.execute("drop database  dp2")
+        os.system("%staosdump -i ./taosdumptest/tmp2 -T 2   " % binPath)
+        tdSql.execute("use dp1")
+        tdSql.query("show stables")
+        tdSql.checkRows(1)
+        tdSql.query("show tables")
+        tdSql.checkRows(3)
+        tdSql.query("select c1 from st0_0 order by ts")
+        tdSql.checkData(0,0,8537)
+        tdSql.query("select c2 from st0_1 order by ts")
+        tdSql.checkData(1,0,"D")
+        tdSql.query("select * from gt0")
+        tdSql.checkData(0,0,'2021-02-25 10:00:12.000')
+        tdSql.checkData(0,1,637)
+        tdSql.error("select count(*) from gt1")
+        tdSql.error("use dp2")
+
+
+        #check taosdumptest/tmp3
+        tdSql.execute("drop database  dp1")
+        os.system("%staosdump  -i ./taosdumptest/tmp3 -T 2 " % binPath)  
+        tdSql.execute("use dp2")
+        tdSql.query("show stables")
+        tdSql.checkRows(2)
+        tdSql.query("show tables")
+        tdSql.checkRows(4)
+        tdSql.query("select count(*) from st1_0")
+        tdSql.checkData(0,0,2)
+        tdSql.query("select ts from gt0")
+        tdSql.checkData(0,0,'2021-02-25 10:00:12.700')
+        tdSql.error("use dp1")
+        tdSql.error("select count(*) from st2_0")
+        tdSql.error("select count(*) from gt2")
+
+        #check taosdumptest/tmp4
+        tdSql.execute("drop database  dp2")
+        os.system("%staosdump  -i ./taosdumptest/tmp4 -T 2 " % binPath)  
+        tdSql.execute("use dp2")
+        tdSql.query("show stables")
+        tdSql.checkRows(2)
+        tdSql.query("show tables")
+        tdSql.checkRows(6)
+        tdSql.query("select c20 from gt2")
+        tdSql.checkData(0, 0, 8639)
+        tdSql.query("select count(*) from st0_0")
+        tdSql.checkData(0, 0, 2)
+        tdSql.query("select count(*) from st0_1")
+        tdSql.checkData(0, 0, 2)
+        tdSql.query("select count(*) from st2_1")
+        tdSql.checkData(0, 0, 2)
+        tdSql.query("select count(*) from st2_0")
+        tdSql.checkData(0, 0, 2)
+        tdSql.error("use dp1")
+        tdSql.error("select count(*) from st1_0")
+        tdSql.error("select count(*) from st1_1")
+        tdSql.error("select count(*) from gt3")
+
+
+        #check taosdumptest/tmp5
+        tdSql.execute("drop database  dp2")
+        os.system("%staosdump  -i ./taosdumptest/tmp5 -T 2 " % binPath)  
+        tdSql.execute("use dp2")
+        tdSql.query("show stables")
+        tdSql.checkRows(3)
+        tdSql.query("show tables")
+        tdSql.checkRows(9)
+        tdSql.query("select c20 from gt2")
+        tdSql.checkData(0, 0, 8639)
+        tdSql.query("select count(*) from st0_0")
+        tdSql.checkData(0, 0, 2)
+        tdSql.query("select count(*) from st0_1")
+        tdSql.checkData(0, 0, 2)
+        tdSql.query("select count(*) from st2_1")
+        tdSql.checkData(0, 0, 2)
+        tdSql.query("select count(*) from st2_0")
+        tdSql.checkData(0, 0, 2)
+        tdSql.query("select count(*) from st1_1")
+        tdSql.checkData(0, 0, 2)
+        tdSql.query("select count(*) from st1_0")
+        tdSql.checkData(0, 0, 2)
+        tdSql.execute("use dp1")
+        tdSql.query("show stables")
+        tdSql.checkRows(1)
+        tdSql.query("show tables")
+        tdSql.checkRows(4)
+        tdSql.query("select c1 from st0_0 order by ts")
+        tdSql.checkData(0,0,8537)
+        tdSql.query("select c2 from st0_1 order by ts")
+        tdSql.checkData(1,0,"D")
+        tdSql.query("select * from gt0")
+        tdSql.checkData(0,0,'2021-02-25 10:00:12.000')
+        tdSql.checkData(0,1,637)
+
+       #check taosdumptest/tmp6
+        tdSql.execute("drop database dp1")
+        tdSql.execute("drop database dp2")
+        tdSql.execute("drop database dp3")
+        os.system("%staosdump  -i ./taosdumptest/tmp6 -T 2 " % binPath)  
+        tdSql.execute("use dp3")
+        tdSql.query("show stables")
+        tdSql.checkRows(1)
+        tdSql.query("show tables")
+        tdSql.checkRows(1)
+        tdSql.query("select count(*) from st0_0")
+        tdSql.checkData(0, 0, 2)        
+        tdSql.query("select * from st0 order by ts")
+        tdSql.checkData(0,0,'2021-02-25 10:00:12.000000001')
+        tdSql.checkData(0,1,8600)
+
+        os.system("rm -rf ./taosdumptest/tmp1")
+        os.system("rm -rf ./taosdumptest/tmp2")
+        os.system("rm -rf ./taosdumptest/tmp3")
+        os.system("rm -rf ./taosdumptest/tmp4")
+        os.system("rm -rf ./taosdumptest/tmp5")
+        os.system("rm -rf ./dump_result.txt")
+        os.system("rm -rf ./db.csv")

    def stop(self):
        tdSql.close()

--- a/tests/script/api/batchprepare.c
+++ b/tests/script/api/batchprepare.c
@@ -120,6 +120,10 @@ int stmt_scol_func1(TAOS_STMT *stmt) {
    exit(1);
  }
  
+  int affectedRows = taos_stmt_affected_rows(stmt);
+  if (affectedRows != 100) {
+    printf("failed to insert 100 rows");
+  }
  return 0;
 }


--- a/tests/script/api/stmt.c
+++ b/tests/script/api/stmt.c
@@ -46,6 +46,7 @@ void taos_stmt_init_test() {
  }
  stmt = taos_stmt_init(taos);
  assert(stmt != NULL);
+  assert(taos_stmt_affected_rows(stmt) == 0);
  assert(taos_stmt_close(stmt) == 0);
  printf("finish taos_stmt_init test\n");
 }
@@ -127,6 +128,7 @@ void taos_stmt_set_tbname_test() {
  assert(taos_stmt_set_tbname(stmt, name) == 0);
  free(name);
  free(stmt_sql);
+  assert(taos_stmt_affected_rows(stmt) == 0);
  taos_stmt_close(stmt);
  printf("finish taos_stmt_set_tbname test\n");
 }
@@ -166,6 +168,7 @@ void taos_stmt_set_tbname_tags_test() {
  free(stmt_sql);
  free(name);
  free(tags);
+  assert(taos_stmt_affected_rows(stmt) == 0);
  taos_stmt_close(stmt);
  printf("finish taos_stmt_set_tbname_tags test\n");
 }
@@ -194,8 +197,10 @@ void taos_stmt_set_sub_tbname_test() {
  assert(taos_stmt_set_sub_tbname(stmt, name) != 0);
  sprintf(name, "tb");
  assert(taos_stmt_set_sub_tbname(stmt, name) == 0);
+  assert(taos_stmt_affected_rows(stmt) == 0);
  assert(taos_load_table_info(taos, "super, tb") == 0);
  assert(taos_stmt_set_sub_tbname(stmt, name) == 0);
+  assert(taos_stmt_affected_rows(stmt) == 0);
  free(name);
  free(stmt_sql);
  assert(taos_stmt_close(stmt) == 0);
@@ -238,6 +243,7 @@ void taos_stmt_bind_param_test() {
  assert(taos_stmt_bind_param(stmt, params) != 0);
  assert(taos_stmt_set_tbname(stmt, "super") == 0);
  assert(taos_stmt_bind_param(stmt, params) == 0);
+  assert(taos_stmt_affected_rows(stmt) == 0);
  free(params);
  free(stmt_sql);
  taos_stmt_close(stmt);
@@ -249,6 +255,7 @@ void taos_stmt_bind_single_param_batch_test() {
  TAOS_STMT *      stmt = NULL;
  TAOS_MULTI_BIND *bind = NULL;
  assert(taos_stmt_bind_single_param_batch(stmt, bind, 0) != 0);
+  assert(taos_stmt_affected_rows(stmt) == 0);
  printf("finish taos_stmt_bind_single_param_batch test\n");
 }

@@ -257,6 +264,7 @@ void taos_stmt_bind_param_batch_test() {
  TAOS_STMT *      stmt = NULL;
  TAOS_MULTI_BIND *bind = NULL;
  assert(taos_stmt_bind_param_batch(stmt, bind) != 0);
+  assert(taos_stmt_affected_rows(stmt) == 0);
  printf("finish taos_stmt_bind_param_batch test\n");
 }

@@ -293,10 +301,14 @@ void taos_stmt_add_batch_test() {
  params[1].length = &params[1].buffer_length;
  params[1].is_null = NULL;
  assert(taos_stmt_set_tbname(stmt, "super") == 0);
+  assert(taos_stmt_affected_rows(stmt) == 0);
  assert(taos_stmt_bind_param(stmt, params) == 0);
+  assert(taos_stmt_affected_rows(stmt) == 0);
  assert(taos_stmt_add_batch(stmt) == 0);
+  assert(taos_stmt_affected_rows(stmt) == 0);
  free(params);
  free(stmt_sql);
+  assert(taos_stmt_affected_rows(stmt) == 0);
  assert(taos_stmt_close(stmt) == 0);
  printf("finish taos_stmt_add_batch test\n");
 }
@@ -317,10 +329,13 @@ void taos_stmt_execute_test() {
  stmt = taos_stmt_init(taos);
  assert(stmt != NULL);
  assert(taos_stmt_execute(stmt) != 0);
+  assert(taos_stmt_affected_rows(stmt) == 0);
  char *stmt_sql = calloc(1, 1000);
  sprintf(stmt_sql, "insert into ? values (?,?)");
  assert(taos_stmt_prepare(stmt, stmt_sql, 0) == 0);
+  assert(taos_stmt_affected_rows(stmt) == 0);
  assert(taos_stmt_execute(stmt) != 0);
+  assert(taos_stmt_affected_rows(stmt) == 0);
  TAOS_BIND *params = calloc(2, sizeof(TAOS_BIND));
  int64_t    ts = (int64_t)1591060628000;
  params[0].buffer_type = TSDB_DATA_TYPE_TIMESTAMP;
@@ -335,11 +350,17 @@ void taos_stmt_execute_test() {
  params[1].length = &params[1].buffer_length;
  params[1].is_null = NULL;
  assert(taos_stmt_set_tbname(stmt, "super") == 0);
+  assert(taos_stmt_affected_rows(stmt) == 0);
  assert(taos_stmt_execute(stmt) != 0);
+  assert(taos_stmt_affected_rows(stmt) == 0);
  assert(taos_stmt_bind_param(stmt, params) == 0);
+  assert(taos_stmt_affected_rows(stmt) == 0);
  assert(taos_stmt_execute(stmt) != 0);
+  assert(taos_stmt_affected_rows(stmt) == 0);
  assert(taos_stmt_add_batch(stmt) == 0);
+  assert(taos_stmt_affected_rows(stmt) == 0);
  assert(taos_stmt_execute(stmt) == 0);
+  assert(taos_stmt_affected_rows(stmt) == 1);
  free(params);
  free(stmt_sql);
  assert(taos_stmt_close(stmt) == 0);

--- a/tests/script/api/stmtTest.c
+++ b/tests/script/api/stmtTest.c
@@ -229,6 +229,14 @@ int main(int argc, char *argv[]) {
    PRINT_SUCCESS
    printf("Successfully execute insert statement.\n");

+    int affectedRows = taos_stmt_affected_rows(stmt);
+    printf("Successfully inserted %d rows\n", affectedRows);
+    if (affectedRows != 10) {
+      PRINT_ERROR
+      printf("failed to insert 10 rows\n");
+      exit(EXIT_FAILURE);
+    }
+
    taos_stmt_close(stmt);
    for (int i = 0; i < 10; i++) {
        check_result(taos, i, 1);