diff --git a/CMakeLists.txt b/CMakeLists.txt index 6f50aca0797cb2d3a2b61796d43137fb417f76ee..584debd75676962df90c73b941fa14d6ed1e641f 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.8) IF (CMAKE_VERSION VERSION_LESS 3.0) PROJECT(TDengine CXX) SET(PROJECT_VERSION_MAJOR "${LIB_MAJOR_VERSION}") - SET(PROJECT_VERSION_MINOR "${LIB_MINOR_VERSION}") + SET(PROJECT_VERSION_MINOR "${:_MINOR_VERSION}") SET(PROJECT_VERSION_PATCH "${LIB_PATCH_VERSION}") SET(PROJECT_VERSION "${LIB_VERSION_STRING}") ELSE () @@ -43,11 +43,14 @@ INCLUDE(cmake/version.inc) INCLUDE(cmake/install.inc) IF (CMAKE_SYSTEM_NAME MATCHES "Linux") - SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pipe -Wall -Wshadow -Werror") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pipe -Wall -Wshadow -Werror") + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pipe -Wall ") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pipe -Wall") ENDIF () MESSAGE(STATUS "CMAKE_C_FLAGS: ${CMAKE_C_FLAGS}") MESSAGE(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") +MESSAGE(STATUS "COMMON_FLAGS: ${COMMON_FLAGS}") + + ADD_SUBDIRECTORY(deps) ADD_SUBDIRECTORY(src) diff --git a/cmake/define.inc b/cmake/define.inc index a15e0aecbb2d30ad2ec7aa1c5761c9d2a40f3323..26c12aeb7660f8364fd826fcd9a6c161c26964a0 100755 --- a/cmake/define.inc +++ b/cmake/define.inc @@ -57,7 +57,7 @@ IF (TD_LINUX_64) ADD_DEFINITIONS(-D_M_X64) ADD_DEFINITIONS(-D_TD_LINUX_64) MESSAGE(STATUS "linux64 is defined") - SET(COMMON_FLAGS "-std=gnu99 -Wall -Werror -fPIC -gdwarf-2 -msse4.2 -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE") + SET(COMMON_FLAGS "-std=gnu99 -Wall -fPIC -gdwarf-2 -msse4.2 -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE") ADD_DEFINITIONS(-DUSE_LIBICONV) IF (JEMALLOC_ENABLED) @@ -70,7 +70,7 @@ IF (TD_LINUX_32) ADD_DEFINITIONS(-D_TD_LINUX_32) ADD_DEFINITIONS(-DUSE_LIBICONV) MESSAGE(STATUS "linux32 is defined") - SET(COMMON_FLAGS "-std=gnu99 -Wall -Werror -fPIC -fsigned-char -munaligned-access -fpack-struct=8 -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE") + SET(COMMON_FLAGS "-std=gnu99 -Wall - -fPIC -fsigned-char -munaligned-access -fpack-struct=8 -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE") ENDIF () IF (TD_ARM_64) @@ -78,7 +78,7 @@ IF (TD_ARM_64) ADD_DEFINITIONS(-D_TD_ARM_) ADD_DEFINITIONS(-DUSE_LIBICONV) MESSAGE(STATUS "arm64 is defined") - SET(COMMON_FLAGS "-std=gnu99 -Wall -Werror -fPIC -fsigned-char -fpack-struct=8 -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE") + SET(COMMON_FLAGS "-std=gnu99 -Wall - -fPIC -fsigned-char -fpack-struct=8 -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE") ENDIF () IF (TD_ARM_32) @@ -86,7 +86,7 @@ IF (TD_ARM_32) ADD_DEFINITIONS(-D_TD_ARM_) ADD_DEFINITIONS(-DUSE_LIBICONV) MESSAGE(STATUS "arm32 is defined") - SET(COMMON_FLAGS "-std=gnu99 -Wall -Werror -fPIC -fsigned-char -fpack-struct=8 -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE -Wno-pointer-to-int-cast -Wno-int-to-pointer-cast -Wno-incompatible-pointer-types ") + SET(COMMON_FLAGS "-std=gnu99 -Wall - -fPIC -fsigned-char -fpack-struct=8 -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE -Wno-pointer-to-int-cast -Wno-int-to-pointer-cast -Wno-incompatible-pointer-types ") ENDIF () IF (TD_MIPS_64) @@ -94,7 +94,7 @@ IF (TD_MIPS_64) ADD_DEFINITIONS(-D_TD_MIPS_64) ADD_DEFINITIONS(-DUSE_LIBICONV) MESSAGE(STATUS "mips64 is defined") - SET(COMMON_FLAGS "-std=gnu99 -Wall -Werror -fPIC -fsigned-char -fpack-struct=8 -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE") + SET(COMMON_FLAGS "-std=gnu99 -Wall - -fPIC -fsigned-char -fpack-struct=8 -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE") ENDIF () IF (TD_MIPS_32) @@ -102,7 +102,7 @@ IF (TD_MIPS_32) ADD_DEFINITIONS(-D_TD_MIPS_32) ADD_DEFINITIONS(-DUSE_LIBICONV) MESSAGE(STATUS "mips32 is defined") - SET(COMMON_FLAGS "-std=gnu99 -Wall -Werror -fPIC -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE") + SET(COMMON_FLAGS "-std=gnu99 -Wall - -fPIC -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE") ENDIF () IF (TD_APLHINE) @@ -147,7 +147,7 @@ IF (TD_DARWIN_64) ADD_DEFINITIONS(-D_REENTRANT -D__USE_POSIX -D_LIBC_REENTRANT) ADD_DEFINITIONS(-DUSE_LIBICONV) MESSAGE(STATUS "darwin64 is defined") - SET(COMMON_FLAGS "-std=gnu99 -Wall -Werror -Wno-missing-braces -fPIC -msse4.2 -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE") + SET(COMMON_FLAGS "-std=gnu99 -Wall - -Wno-missing-braces -fPIC -msse4.2 -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE") IF (TD_MEMORY_SANITIZER) SET(DEBUG_FLAGS "-fsanitize=address -fsanitize=undefined -fno-sanitize-recover=all -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow -fno-sanitize=null -fno-sanitize=alignment -O0 -g3 -DDEBUG") ELSE () diff --git a/deps/CMakeLists.txt b/deps/CMakeLists.txt index 99152c6ce365768b3b782809cca5aacbec1ef7fd..c6d186dd1fc17d2a73b95ad64171a3d2070c3fba 100644 --- a/deps/CMakeLists.txt +++ b/deps/CMakeLists.txt @@ -10,6 +10,8 @@ ADD_SUBDIRECTORY(cJson) ADD_SUBDIRECTORY(wepoll) ADD_SUBDIRECTORY(MsvcLibX) ADD_SUBDIRECTORY(rmonotonic) +ADD_SUBDIRECTORY(SZ) + IF (TD_LINUX AND TD_MQTT) ADD_SUBDIRECTORY(MQTT-C) diff --git a/deps/SZ/.dockerignore b/deps/SZ/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..a31aedb8b96ec0b35b11793897a2e845ec1e3bb9 --- /dev/null +++ b/deps/SZ/.dockerignore @@ -0,0 +1 @@ +build.* diff --git a/deps/SZ/.gitignore b/deps/SZ/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..c20ca922a33b81c8ef63aa0e1d5af134be5cdea3 --- /dev/null +++ b/deps/SZ/.gitignore @@ -0,0 +1,6 @@ +build +compile_commands.json +tags +CMakeCache.txt +cmake-build-debug/ +CMakeFiles/ diff --git a/deps/SZ/.travis.yml b/deps/SZ/.travis.yml new file mode 100644 index 0000000000000000000000000000000000000000..0f058c74b20980adbeab1dbb87149b278957fdc7 --- /dev/null +++ b/deps/SZ/.travis.yml @@ -0,0 +1,45 @@ +sudo: false + +language: c + +before_install: +- cd test/travis-ci && ./getData.sh && cd - + +matrix: + include: + - dist: xenial + os: linux + addons: + apt: + sources: + - ubuntu-toolchain-r-test # For gcc 4.9, 5 and 7 + packages: + - gcc-7 + - gfortran-7 + - zstd + - libzstd1-dev + - exuberant-ctags + - libcunit1-dev + - libnetcdf-dev + - osx_image: xcode11 + os: osx + env: PATH=/usr/local/bin:$PATH +install: + - mkdir build + - cd build + - | + if [[ "${TRAVIS_OS_NAME}" != "linux" ]]; then + brew install ctags + brew install cunit + brew upgrade pkg-config + fi + - cmake -DCMAKE_INSTALL_PREFIX=$HOME -DBUILD_TESTS=ON -DBUILD_INTEGRATION_TESTS=ON .. + - make + - make install + - make test + +script: +- cd .. +- ./configure && make +- cd example && ./test.sh && cd - +- cd test/travis-ci && ./test.sh && cd - diff --git a/deps/SZ/CMakeLists.txt b/deps/SZ/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..a265ad1728e0b8166a11a962657434cc4f0c7de7 --- /dev/null +++ b/deps/SZ/CMakeLists.txt @@ -0,0 +1,23 @@ +CMAKE_MINIMUM_REQUIRED(VERSION 2.8) +PROJECT(TDengine) + +# include +INCLUDE_DIRECTORIES(sz/include) +INCLUDE_DIRECTORIES(zlib/) +INCLUDE_DIRECTORIES(zstd/) + +# source +AUX_SOURCE_DIRECTORY(sz/src SRC1) +AUX_SOURCE_DIRECTORY(zlib/ SRC2) +AUX_SOURCE_DIRECTORY(zstd/common SRC3) +AUX_SOURCE_DIRECTORY(zstd/compress SRC4) +AUX_SOURCE_DIRECTORY(zstd/decompress SRC5) +AUX_SOURCE_DIRECTORY(zstd/deprecated SRC6) +AUX_SOURCE_DIRECTORY(zstd/legacy SRC7) +AUX_SOURCE_DIRECTORY(zstd/dictBuilder SRC8) + +# archive +ADD_LIBRARY(SZ STATIC ${SRC1} ${SRC2} ${SRC3} ${SRC4} ${SRC5} ${SRC6} ${SRC7} ${SRC8}) + + + diff --git a/deps/SZ/sz/CMakeLists.txt b/deps/SZ/sz/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..2c5a9f909922da8221ecaed37707c5fd90a8aaf0 --- /dev/null +++ b/deps/SZ/sz/CMakeLists.txt @@ -0,0 +1,91 @@ +add_library (SZ + src/ArithmeticCoding.c + src/ByteToolkit.c + src/CacheTable.c + src/callZlib.c + src/CompressElement.c + src/conf.c + src/dataCompression.c + src/dictionary.c + src/DynamicByteArray.c + src/DynamicDoubleArray.c + src/DynamicFloatArray.c + src/DynamicIntArray.c + src/Huffman.c + src/iniparser.c + src/MultiLevelCacheTable.c + src/MultiLevelCacheTableWideInterval.c + src/pastri.c + src/exafelSZ.c + src/rw.c + src/rwf.c + src/sz.c + src/szd_double.c + src/szd_double_pwr.c + src/szd_double_ts.c + src/szd_float.c + src/szd_float_pwr.c + src/szd_float_ts.c + src/szd_int16.c + src/szd_int32.c + src/szd_int64.c + src/szd_int8.c + src/sz_double.c + src/sz_double_pwr.c + src/sz_double_ts.c + src/szd_uint16.c + src/szd_uint32.c + src/szd_uint64.c + src/szd_uint8.c + src/szf.c + src/sz_float.c + src/sz_float_pwr.c + src/sz_float_ts.c + src/sz_int16.c + src/sz_int32.c + src/sz_int64.c + src/sz_int8.c + src/sz_omp.c + src/sz_uint16.c + src/sz_uint32.c + src/sz_uint64.c + src/sz_uint8.c + src/TightDataPointStorageD.c + src/TightDataPointStorageF.c + src/TightDataPointStorageI.c + src/TypeManager.c + src/utility.c + src/VarSet.c + src/sz_stats.c +) + +target_include_directories(SZ + PUBLIC + $ + $ + ) + + +target_compile_options(SZ + PRIVATE $<$:-Wall -Wextra -Wpedantic -Wno-unused-parameter> + ) + +if(BUILD_PASTRI) + target_compile_definitions(SZ PUBLIC HAVE_PASTRI) +endif() +if(BUILD_TIMECMPR) + target_compile_definitions(SZ PUBLIC HAVE_TIMECMPR) +endif() +if(BUILD_RANDOMACCESS) + target_compile_definitions(SZ PUBLIC HAVE_RANDOMACCESS) +endif() +if(BUILD_FORTRAN) + enable_language(Fortran) + target_sources(SZ PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/src/rw_interface.F90 + ${CMAKE_CURRENT_SOURCE_DIR}/src/sz_interface.F90 + ) +endif() +if(BUILD_STATS) + target_compile_definitions(SZ PUBLIC HAVE_WRITESTATS) +endif() diff --git a/deps/SZ/sz/Makefile.am b/deps/SZ/sz/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..fa784116589529632fb9e8f1f8bd1e68dd64a222 --- /dev/null +++ b/deps/SZ/sz/Makefile.am @@ -0,0 +1,93 @@ +#AM_CFLAGS = -I./include -I../zlib +#LDFLAGS=-fPIC -shared + +AUTOMAKE_OPTIONS=foreign +if FORTRAN +include_HEADERS=include/MultiLevelCacheTable.h include/MultiLevelCacheTableWideInterval.h include/CacheTable.h include/defines.h\ + include/CompressElement.h include/DynamicDoubleArray.h include/rw.h include/conf.h include/dataCompression.h\ + include/dictionary.h include/DynamicFloatArray.h include/VarSet.h include/sz.h include/Huffman.h include/ByteToolkit.h include/szf.h\ + include/sz_float.h include/sz_double.h include/callZlib.h include/iniparser.h include/TypeManager.h\ + include/sz_int8.h include/sz_int16.h include/sz_int32.h include/sz_int64.h include/szd_int8.h include/szd_int16.h include/szd_int32.h include/szd_int64.h\ + include/sz_uint8.h include/sz_uint16.h include/sz_uint32.h include/sz_uint64.h include/szd_uint8.h include/szd_uint16.h include/szd_uint32.h include/szd_uint64.h\ + include/sz_float_pwr.h include/sz_double_pwr.h include/szd_float.h include/szd_double.h include/szd_float_pwr.h include/szd_double_pwr.h\ + include/sz_float_ts.h include/szd_float_ts.h include/sz_double_ts.h include/szd_double_ts.h include/utility.h include/sz_opencl.h\ + include/DynamicByteArray.h include/DynamicIntArray.h include/TightDataPointStorageI.h include/TightDataPointStorageD.h include/TightDataPointStorageF.h\ + include/pastriD.h include/pastriF.h include/pastriGeneral.h include/pastri.h include/exafelSZ.h include/ArithmeticCoding.h include/sz_omp.h include/sz_stats.h sz.mod rw.mod +lib_LTLIBRARIES=libSZ.la +libSZ_la_CFLAGS=-I./include -I../zlib/ -I../zstd/ +if TIMECMPR +libSZ_la_CFLAGS+=-DHAVE_TIMECMPR +endif +if RANDOMACCESS +libSZ_la_CFLAGS+=-DHAVE_RANDOMACCESS +endif +if OPENMP +libSZ_la_CFLAGS+=-fopenmp +endif +libSZ_la_LDFLAGS = -version-info 2:1:0 +libSZ_la_LIDADD=../zlib/.libs/libzlib.a ../zstd/.libs/libzstd.a +libSZ_la_SOURCES=src/MultiLevelCacheTable.c src/MultiLevelCacheTableWideInterval.c \ + src/ByteToolkit.c src/dataCompression.c src/DynamicIntArray.c src/iniparser.c src/szf.c \ + src/CompressElement.c src/DynamicByteArray.c src/rw.c src/utility.c\ + src/TightDataPointStorageI.c src/TightDataPointStorageD.c src/TightDataPointStorageF.c \ + src/conf.c src/DynamicDoubleArray.c src/rwf.c src/TypeManager.c \ + src/dictionary.c src/DynamicFloatArray.c src/VarSet.c src/callZlib.c src/Huffman.c \ + src/sz_float.c src/sz_double.c src/sz_int8.c src/sz_int16.c src/sz_int32.c src/sz_int64.c\ + src/sz_uint8.c src/sz_uint16.c src/sz_uint32.c src/sz_uint64.c src/szd_uint8.c src/szd_uint16.c src/szd_uint32.c src/szd_uint64.c\ + src/szd_float.c src/szd_double.c src/szd_int8.c src/szd_int16.c src/szd_int32.c src/szd_int64.c src/sz.c\ + src/sz_float_pwr.c src/sz_double_pwr.c src/szd_float_pwr.c src/szd_double_pwr.c src/ArithmeticCoding.c src/CacheTable.c\ + src/sz_interface.F90 src/rw_interface.F90 src/exafelSZ.c +libSZ_la_LINK=$(AM_V_CC)$(LIBTOOL) --tag=FC --mode=link $(FCLD) $(libSZ_la_CFLAGS) -O3 $(libSZ_la_LDFLAGS) -o $(lib_LTLIBRARIES) +else +include_HEADERS=include/MultiLevelCacheTable.h include/MultiLevelCacheTableWideInterval.h include/CacheTable.h include/defines.h\ + include/CompressElement.h include/DynamicDoubleArray.h include/rw.h include/conf.h include/dataCompression.h\ + include/dictionary.h include/DynamicFloatArray.h include/VarSet.h include/sz.h include/Huffman.h include/ByteToolkit.h\ + include/sz_float.h include/sz_double.h include/callZlib.h include/iniparser.h include/TypeManager.h\ + include/sz_int8.h include/sz_int16.h include/sz_int32.h include/sz_int64.h include/szd_int8.h include/szd_int16.h include/szd_int32.h include/szd_int64.h\ + include/sz_uint8.h include/sz_uint16.h include/sz_uint32.h include/sz_uint64.h include/szd_uint8.h include/szd_uint16.h include/szd_uint32.h include/szd_uint64.h\ + include/sz_float_pwr.h include/sz_double_pwr.h include/szd_float.h include/szd_double.h include/szd_float_pwr.h include/szd_double_pwr.h\ + include/sz_float_ts.h include/szd_float_ts.h include/sz_double_ts.h include/szd_double_ts.h include/utility.h include/sz_opencl.h\ + include/DynamicByteArray.h include/DynamicIntArray.h include/TightDataPointStorageI.h include/TightDataPointStorageD.h include/TightDataPointStorageF.h\ + include/pastriD.h include/pastriF.h include/pastriGeneral.h include/pastri.h include/exafelSZ.h include/ArithmeticCoding.h include/sz_omp.h include/sz_stats.h + +lib_LTLIBRARIES=libSZ.la +libSZ_la_CFLAGS=-I./include -I../zlib -I../zstd/ +if WRITESTATS +libSZ_la_CFLAGS+=-DHAVE_WRITESTATS +endif +if TIMECMPR +libSZ_la_CFLAGS+=-DHAVE_TIMECMPR +endif +if RANDOMACCESS +libSZ_la_CFLAGS+=-DHAVE_RANDOMACCESS +endif +if OPENMP +libSZ_la_CFLAGS+=-fopenmp +endif +libSZ_la_LDFLAGS = -version-info 1:4:0 +libSZ_la_LIDADD=../zlib/.libs/libzlib.a ../zlib/.libs/libzstd.a +libSZ_la_SOURCES=src/MultiLevelCacheTable.c src/MultiLevelCacheTableWideInterval.c \ + src/ByteToolkit.c src/dataCompression.c src/DynamicIntArray.c src/iniparser.c\ + src/CompressElement.c src/DynamicByteArray.c src/rw.c src/utility.c\ + src/TightDataPointStorageI.c src/TightDataPointStorageD.c src/TightDataPointStorageF.c \ + src/conf.c src/DynamicDoubleArray.c src/TypeManager.c \ + src/dictionary.c src/DynamicFloatArray.c src/VarSet.c src/callZlib.c src/Huffman.c \ + src/sz_float.c src/sz_double.c src/sz_int8.c src/sz_int16.c src/sz_int32.c src/sz_int64.c\ + src/sz_uint8.c src/sz_uint16.c src/sz_uint32.c src/sz_uint64.c src/szd_uint8.c src/szd_uint16.c src/szd_uint32.c src/szd_uint64.c\ + src/szd_float.c src/szd_double.c src/szd_int8.c src/szd_int16.c src/szd_int32.c src/szd_int64.c src/sz.c\ + src/sz_float_pwr.c src/sz_double_pwr.c src/szd_float_pwr.c src/szd_double_pwr.c src/ArithmeticCoding.c src/exafelSZ.c src/CacheTable.c +if PASTRI +libSZ_la_SOURCES+=src/pastri.c +endif +if OPENMP +libSZ_la_SOURCES+=src/sz_omp.c +endif +if TIMECMPR +libSZ_la_SOURCES+=src/sz_float_ts.c src/szd_float_ts.c src/sz_double_ts.c src/szd_double_ts.c +endif +if WRITESTATS +libSZ_la_SOURCES+=src/sz_stats.c +endif + +libSZ_la_LINK= $(AM_V_CC)$(LIBTOOL) --tag=CC --mode=link $(CCLD) $(libSZ_la_CFLAGS) -O3 $(libSZ_la_LDFLAGS) -o $(lib_LTLIBRARIES) +endif diff --git a/deps/SZ/sz/Makefile.in b/deps/SZ/sz/Makefile.in new file mode 100644 index 0000000000000000000000000000000000000000..8209df4a6f40865ef987ce9c0b64710ebfd9e98f --- /dev/null +++ b/deps/SZ/sz/Makefile.in @@ -0,0 +1,1729 @@ +# Makefile.in generated by automake 1.16.2 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2020 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +#AM_CFLAGS = -I./include -I../zlib +#LDFLAGS=-fPIC -shared + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +@FORTRAN_TRUE@@TIMECMPR_TRUE@am__append_1 = -DHAVE_TIMECMPR +@FORTRAN_TRUE@@RANDOMACCESS_TRUE@am__append_2 = -DHAVE_RANDOMACCESS +@FORTRAN_TRUE@@OPENMP_TRUE@am__append_3 = -fopenmp +@FORTRAN_FALSE@@WRITESTATS_TRUE@am__append_4 = -DHAVE_WRITESTATS +@FORTRAN_FALSE@@TIMECMPR_TRUE@am__append_5 = -DHAVE_TIMECMPR +@FORTRAN_FALSE@@RANDOMACCESS_TRUE@am__append_6 = -DHAVE_RANDOMACCESS +@FORTRAN_FALSE@@OPENMP_TRUE@am__append_7 = -fopenmp +@FORTRAN_FALSE@@PASTRI_TRUE@am__append_8 = src/pastri.c +@FORTRAN_FALSE@@OPENMP_TRUE@am__append_9 = src/sz_omp.c +@FORTRAN_FALSE@@TIMECMPR_TRUE@am__append_10 = src/sz_float_ts.c src/szd_float_ts.c src/sz_double_ts.c src/szd_double_ts.c +@FORTRAN_FALSE@@WRITESTATS_TRUE@am__append_11 = src/sz_stats.c +subdir = sz +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__include_HEADERS_DIST) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" +LTLIBRARIES = $(lib_LTLIBRARIES) +libSZ_la_LIBADD = +am__libSZ_la_SOURCES_DIST = src/MultiLevelCacheTable.c \ + src/MultiLevelCacheTableWideInterval.c src/ByteToolkit.c \ + src/dataCompression.c src/DynamicIntArray.c src/iniparser.c \ + src/CompressElement.c src/DynamicByteArray.c src/rw.c \ + src/utility.c src/TightDataPointStorageI.c \ + src/TightDataPointStorageD.c src/TightDataPointStorageF.c \ + src/conf.c src/DynamicDoubleArray.c src/TypeManager.c \ + src/dictionary.c src/DynamicFloatArray.c src/VarSet.c \ + src/callZlib.c src/Huffman.c src/sz_float.c src/sz_double.c \ + src/sz_int8.c src/sz_int16.c src/sz_int32.c src/sz_int64.c \ + src/sz_uint8.c src/sz_uint16.c src/sz_uint32.c src/sz_uint64.c \ + src/szd_uint8.c src/szd_uint16.c src/szd_uint32.c \ + src/szd_uint64.c src/szd_float.c src/szd_double.c \ + src/szd_int8.c src/szd_int16.c src/szd_int32.c src/szd_int64.c \ + src/sz.c src/sz_float_pwr.c src/sz_double_pwr.c \ + src/szd_float_pwr.c src/szd_double_pwr.c \ + src/ArithmeticCoding.c src/exafelSZ.c src/CacheTable.c \ + src/pastri.c src/sz_omp.c src/sz_float_ts.c src/szd_float_ts.c \ + src/sz_double_ts.c src/szd_double_ts.c src/sz_stats.c \ + src/szf.c src/rwf.c src/sz_interface.F90 src/rw_interface.F90 +am__dirstamp = $(am__leading_dot)dirstamp +@FORTRAN_FALSE@@PASTRI_TRUE@am__objects_1 = src/libSZ_la-pastri.lo +@FORTRAN_FALSE@@OPENMP_TRUE@am__objects_2 = src/libSZ_la-sz_omp.lo +@FORTRAN_FALSE@@TIMECMPR_TRUE@am__objects_3 = \ +@FORTRAN_FALSE@@TIMECMPR_TRUE@ src/libSZ_la-sz_float_ts.lo \ +@FORTRAN_FALSE@@TIMECMPR_TRUE@ src/libSZ_la-szd_float_ts.lo \ +@FORTRAN_FALSE@@TIMECMPR_TRUE@ src/libSZ_la-sz_double_ts.lo \ +@FORTRAN_FALSE@@TIMECMPR_TRUE@ src/libSZ_la-szd_double_ts.lo +@FORTRAN_FALSE@@WRITESTATS_TRUE@am__objects_4 = \ +@FORTRAN_FALSE@@WRITESTATS_TRUE@ src/libSZ_la-sz_stats.lo +@FORTRAN_FALSE@am_libSZ_la_OBJECTS = \ +@FORTRAN_FALSE@ src/libSZ_la-MultiLevelCacheTable.lo \ +@FORTRAN_FALSE@ src/libSZ_la-MultiLevelCacheTableWideInterval.lo \ +@FORTRAN_FALSE@ src/libSZ_la-ByteToolkit.lo \ +@FORTRAN_FALSE@ src/libSZ_la-dataCompression.lo \ +@FORTRAN_FALSE@ src/libSZ_la-DynamicIntArray.lo \ +@FORTRAN_FALSE@ src/libSZ_la-iniparser.lo \ +@FORTRAN_FALSE@ src/libSZ_la-CompressElement.lo \ +@FORTRAN_FALSE@ src/libSZ_la-DynamicByteArray.lo \ +@FORTRAN_FALSE@ src/libSZ_la-rw.lo src/libSZ_la-utility.lo \ +@FORTRAN_FALSE@ src/libSZ_la-TightDataPointStorageI.lo \ +@FORTRAN_FALSE@ src/libSZ_la-TightDataPointStorageD.lo \ +@FORTRAN_FALSE@ src/libSZ_la-TightDataPointStorageF.lo \ +@FORTRAN_FALSE@ src/libSZ_la-conf.lo \ +@FORTRAN_FALSE@ src/libSZ_la-DynamicDoubleArray.lo \ +@FORTRAN_FALSE@ src/libSZ_la-TypeManager.lo \ +@FORTRAN_FALSE@ src/libSZ_la-dictionary.lo \ +@FORTRAN_FALSE@ src/libSZ_la-DynamicFloatArray.lo \ +@FORTRAN_FALSE@ src/libSZ_la-VarSet.lo src/libSZ_la-callZlib.lo \ +@FORTRAN_FALSE@ src/libSZ_la-Huffman.lo \ +@FORTRAN_FALSE@ src/libSZ_la-sz_float.lo \ +@FORTRAN_FALSE@ src/libSZ_la-sz_double.lo \ +@FORTRAN_FALSE@ src/libSZ_la-sz_int8.lo \ +@FORTRAN_FALSE@ src/libSZ_la-sz_int16.lo \ +@FORTRAN_FALSE@ src/libSZ_la-sz_int32.lo \ +@FORTRAN_FALSE@ src/libSZ_la-sz_int64.lo \ +@FORTRAN_FALSE@ src/libSZ_la-sz_uint8.lo \ +@FORTRAN_FALSE@ src/libSZ_la-sz_uint16.lo \ +@FORTRAN_FALSE@ src/libSZ_la-sz_uint32.lo \ +@FORTRAN_FALSE@ src/libSZ_la-sz_uint64.lo \ +@FORTRAN_FALSE@ src/libSZ_la-szd_uint8.lo \ +@FORTRAN_FALSE@ src/libSZ_la-szd_uint16.lo \ +@FORTRAN_FALSE@ src/libSZ_la-szd_uint32.lo \ +@FORTRAN_FALSE@ src/libSZ_la-szd_uint64.lo \ +@FORTRAN_FALSE@ src/libSZ_la-szd_float.lo \ +@FORTRAN_FALSE@ src/libSZ_la-szd_double.lo \ +@FORTRAN_FALSE@ src/libSZ_la-szd_int8.lo \ +@FORTRAN_FALSE@ src/libSZ_la-szd_int16.lo \ +@FORTRAN_FALSE@ src/libSZ_la-szd_int32.lo \ +@FORTRAN_FALSE@ src/libSZ_la-szd_int64.lo src/libSZ_la-sz.lo \ +@FORTRAN_FALSE@ src/libSZ_la-sz_float_pwr.lo \ +@FORTRAN_FALSE@ src/libSZ_la-sz_double_pwr.lo \ +@FORTRAN_FALSE@ src/libSZ_la-szd_float_pwr.lo \ +@FORTRAN_FALSE@ src/libSZ_la-szd_double_pwr.lo \ +@FORTRAN_FALSE@ src/libSZ_la-ArithmeticCoding.lo \ +@FORTRAN_FALSE@ src/libSZ_la-exafelSZ.lo \ +@FORTRAN_FALSE@ src/libSZ_la-CacheTable.lo $(am__objects_1) \ +@FORTRAN_FALSE@ $(am__objects_2) $(am__objects_3) \ +@FORTRAN_FALSE@ $(am__objects_4) +@FORTRAN_TRUE@am_libSZ_la_OBJECTS = \ +@FORTRAN_TRUE@ src/libSZ_la-MultiLevelCacheTable.lo \ +@FORTRAN_TRUE@ src/libSZ_la-MultiLevelCacheTableWideInterval.lo \ +@FORTRAN_TRUE@ src/libSZ_la-ByteToolkit.lo \ +@FORTRAN_TRUE@ src/libSZ_la-dataCompression.lo \ +@FORTRAN_TRUE@ src/libSZ_la-DynamicIntArray.lo \ +@FORTRAN_TRUE@ src/libSZ_la-iniparser.lo src/libSZ_la-szf.lo \ +@FORTRAN_TRUE@ src/libSZ_la-CompressElement.lo \ +@FORTRAN_TRUE@ src/libSZ_la-DynamicByteArray.lo \ +@FORTRAN_TRUE@ src/libSZ_la-rw.lo src/libSZ_la-utility.lo \ +@FORTRAN_TRUE@ src/libSZ_la-TightDataPointStorageI.lo \ +@FORTRAN_TRUE@ src/libSZ_la-TightDataPointStorageD.lo \ +@FORTRAN_TRUE@ src/libSZ_la-TightDataPointStorageF.lo \ +@FORTRAN_TRUE@ src/libSZ_la-conf.lo \ +@FORTRAN_TRUE@ src/libSZ_la-DynamicDoubleArray.lo \ +@FORTRAN_TRUE@ src/libSZ_la-rwf.lo src/libSZ_la-TypeManager.lo \ +@FORTRAN_TRUE@ src/libSZ_la-dictionary.lo \ +@FORTRAN_TRUE@ src/libSZ_la-DynamicFloatArray.lo \ +@FORTRAN_TRUE@ src/libSZ_la-VarSet.lo src/libSZ_la-callZlib.lo \ +@FORTRAN_TRUE@ src/libSZ_la-Huffman.lo src/libSZ_la-sz_float.lo \ +@FORTRAN_TRUE@ src/libSZ_la-sz_double.lo \ +@FORTRAN_TRUE@ src/libSZ_la-sz_int8.lo src/libSZ_la-sz_int16.lo \ +@FORTRAN_TRUE@ src/libSZ_la-sz_int32.lo \ +@FORTRAN_TRUE@ src/libSZ_la-sz_int64.lo \ +@FORTRAN_TRUE@ src/libSZ_la-sz_uint8.lo \ +@FORTRAN_TRUE@ src/libSZ_la-sz_uint16.lo \ +@FORTRAN_TRUE@ src/libSZ_la-sz_uint32.lo \ +@FORTRAN_TRUE@ src/libSZ_la-sz_uint64.lo \ +@FORTRAN_TRUE@ src/libSZ_la-szd_uint8.lo \ +@FORTRAN_TRUE@ src/libSZ_la-szd_uint16.lo \ +@FORTRAN_TRUE@ src/libSZ_la-szd_uint32.lo \ +@FORTRAN_TRUE@ src/libSZ_la-szd_uint64.lo \ +@FORTRAN_TRUE@ src/libSZ_la-szd_float.lo \ +@FORTRAN_TRUE@ src/libSZ_la-szd_double.lo \ +@FORTRAN_TRUE@ src/libSZ_la-szd_int8.lo \ +@FORTRAN_TRUE@ src/libSZ_la-szd_int16.lo \ +@FORTRAN_TRUE@ src/libSZ_la-szd_int32.lo \ +@FORTRAN_TRUE@ src/libSZ_la-szd_int64.lo src/libSZ_la-sz.lo \ +@FORTRAN_TRUE@ src/libSZ_la-sz_float_pwr.lo \ +@FORTRAN_TRUE@ src/libSZ_la-sz_double_pwr.lo \ +@FORTRAN_TRUE@ src/libSZ_la-szd_float_pwr.lo \ +@FORTRAN_TRUE@ src/libSZ_la-szd_double_pwr.lo \ +@FORTRAN_TRUE@ src/libSZ_la-ArithmeticCoding.lo \ +@FORTRAN_TRUE@ src/libSZ_la-CacheTable.lo src/sz_interface.lo \ +@FORTRAN_TRUE@ src/rw_interface.lo src/libSZ_la-exafelSZ.lo \ +@FORTRAN_TRUE@ $(am__objects_1) $(am__objects_2) \ +@FORTRAN_TRUE@ $(am__objects_3) $(am__objects_4) +libSZ_la_OBJECTS = $(am_libSZ_la_OBJECTS) +@FORTRAN_FALSE@am_libSZ_la_rpath = -rpath $(libdir) +@FORTRAN_TRUE@am_libSZ_la_rpath = -rpath $(libdir) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = src/$(DEPDIR)/libSZ_la-ArithmeticCoding.Plo \ + src/$(DEPDIR)/libSZ_la-ByteToolkit.Plo \ + src/$(DEPDIR)/libSZ_la-CacheTable.Plo \ + src/$(DEPDIR)/libSZ_la-CompressElement.Plo \ + src/$(DEPDIR)/libSZ_la-DynamicByteArray.Plo \ + src/$(DEPDIR)/libSZ_la-DynamicDoubleArray.Plo \ + src/$(DEPDIR)/libSZ_la-DynamicFloatArray.Plo \ + src/$(DEPDIR)/libSZ_la-DynamicIntArray.Plo \ + src/$(DEPDIR)/libSZ_la-Huffman.Plo \ + src/$(DEPDIR)/libSZ_la-MultiLevelCacheTable.Plo \ + src/$(DEPDIR)/libSZ_la-MultiLevelCacheTableWideInterval.Plo \ + src/$(DEPDIR)/libSZ_la-TightDataPointStorageD.Plo \ + src/$(DEPDIR)/libSZ_la-TightDataPointStorageF.Plo \ + src/$(DEPDIR)/libSZ_la-TightDataPointStorageI.Plo \ + src/$(DEPDIR)/libSZ_la-TypeManager.Plo \ + src/$(DEPDIR)/libSZ_la-VarSet.Plo \ + src/$(DEPDIR)/libSZ_la-callZlib.Plo \ + src/$(DEPDIR)/libSZ_la-conf.Plo \ + src/$(DEPDIR)/libSZ_la-dataCompression.Plo \ + src/$(DEPDIR)/libSZ_la-dictionary.Plo \ + src/$(DEPDIR)/libSZ_la-exafelSZ.Plo \ + src/$(DEPDIR)/libSZ_la-iniparser.Plo \ + src/$(DEPDIR)/libSZ_la-pastri.Plo \ + src/$(DEPDIR)/libSZ_la-rw.Plo src/$(DEPDIR)/libSZ_la-rwf.Plo \ + src/$(DEPDIR)/libSZ_la-sz.Plo \ + src/$(DEPDIR)/libSZ_la-sz_double.Plo \ + src/$(DEPDIR)/libSZ_la-sz_double_pwr.Plo \ + src/$(DEPDIR)/libSZ_la-sz_double_ts.Plo \ + src/$(DEPDIR)/libSZ_la-sz_float.Plo \ + src/$(DEPDIR)/libSZ_la-sz_float_pwr.Plo \ + src/$(DEPDIR)/libSZ_la-sz_float_ts.Plo \ + src/$(DEPDIR)/libSZ_la-sz_int16.Plo \ + src/$(DEPDIR)/libSZ_la-sz_int32.Plo \ + src/$(DEPDIR)/libSZ_la-sz_int64.Plo \ + src/$(DEPDIR)/libSZ_la-sz_int8.Plo \ + src/$(DEPDIR)/libSZ_la-sz_omp.Plo \ + src/$(DEPDIR)/libSZ_la-sz_stats.Plo \ + src/$(DEPDIR)/libSZ_la-sz_uint16.Plo \ + src/$(DEPDIR)/libSZ_la-sz_uint32.Plo \ + src/$(DEPDIR)/libSZ_la-sz_uint64.Plo \ + src/$(DEPDIR)/libSZ_la-sz_uint8.Plo \ + src/$(DEPDIR)/libSZ_la-szd_double.Plo \ + src/$(DEPDIR)/libSZ_la-szd_double_pwr.Plo \ + src/$(DEPDIR)/libSZ_la-szd_double_ts.Plo \ + src/$(DEPDIR)/libSZ_la-szd_float.Plo \ + src/$(DEPDIR)/libSZ_la-szd_float_pwr.Plo \ + src/$(DEPDIR)/libSZ_la-szd_float_ts.Plo \ + src/$(DEPDIR)/libSZ_la-szd_int16.Plo \ + src/$(DEPDIR)/libSZ_la-szd_int32.Plo \ + src/$(DEPDIR)/libSZ_la-szd_int64.Plo \ + src/$(DEPDIR)/libSZ_la-szd_int8.Plo \ + src/$(DEPDIR)/libSZ_la-szd_uint16.Plo \ + src/$(DEPDIR)/libSZ_la-szd_uint32.Plo \ + src/$(DEPDIR)/libSZ_la-szd_uint64.Plo \ + src/$(DEPDIR)/libSZ_la-szd_uint8.Plo \ + src/$(DEPDIR)/libSZ_la-szf.Plo \ + src/$(DEPDIR)/libSZ_la-utility.Plo +am__mv = mv -f +PPFCCOMPILE = $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +LTPPFCCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_FCFLAGS) $(FCFLAGS) +AM_V_PPFC = $(am__v_PPFC_@AM_V@) +am__v_PPFC_ = $(am__v_PPFC_@AM_DEFAULT_V@) +am__v_PPFC_0 = @echo " PPFC " $@; +am__v_PPFC_1 = +FCLD = $(FC) +FCLINK = $(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(FCLD) $(AM_FCFLAGS) $(FCFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_FCLD = $(am__v_FCLD_@AM_V@) +am__v_FCLD_ = $(am__v_FCLD_@AM_DEFAULT_V@) +am__v_FCLD_0 = @echo " FCLD " $@; +am__v_FCLD_1 = +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libSZ_la_SOURCES) +DIST_SOURCES = $(am__libSZ_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__include_HEADERS_DIST = include/MultiLevelCacheTable.h \ + include/MultiLevelCacheTableWideInterval.h \ + include/CacheTable.h include/defines.h \ + include/CompressElement.h include/DynamicDoubleArray.h \ + include/rw.h include/conf.h include/dataCompression.h \ + include/dictionary.h include/DynamicFloatArray.h \ + include/VarSet.h include/sz.h include/Huffman.h \ + include/ByteToolkit.h include/sz_float.h include/sz_double.h \ + include/callZlib.h include/iniparser.h include/TypeManager.h \ + include/sz_int8.h include/sz_int16.h include/sz_int32.h \ + include/sz_int64.h include/szd_int8.h include/szd_int16.h \ + include/szd_int32.h include/szd_int64.h include/sz_uint8.h \ + include/sz_uint16.h include/sz_uint32.h include/sz_uint64.h \ + include/szd_uint8.h include/szd_uint16.h include/szd_uint32.h \ + include/szd_uint64.h include/sz_float_pwr.h \ + include/sz_double_pwr.h include/szd_float.h \ + include/szd_double.h include/szd_float_pwr.h \ + include/szd_double_pwr.h include/sz_float_ts.h \ + include/szd_float_ts.h include/sz_double_ts.h \ + include/szd_double_ts.h include/utility.h include/sz_opencl.h \ + include/DynamicByteArray.h include/DynamicIntArray.h \ + include/TightDataPointStorageI.h \ + include/TightDataPointStorageD.h \ + include/TightDataPointStorageF.h include/pastriD.h \ + include/pastriF.h include/pastriGeneral.h include/pastri.h \ + include/exafelSZ.h include/ArithmeticCoding.h include/sz_omp.h \ + include/sz_stats.h include/szf.h sz.mod rw.mod +HEADERS = $(include_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FGREP = @FGREP@ +GREP = @GREP@ +GSL_CFLAGS = @GSL_CFLAGS@ +GSL_CONFIG = @GSL_CONFIG@ +GSL_HDR = @GSL_HDR@ +GSL_LIB = @GSL_LIB@ +GSL_LIBS = @GSL_LIBS@ +GSL_STATIC = @GSL_STATIC@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_FLAGS = @OPENMP_FLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PASTRI_FLAGS = @PASTRI_FLAGS@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +RANDOMACCESS_FLAGS = @RANDOMACCESS_FLAGS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +TIMECMPR_FLAGS = @TIMECMPR_FLAGS@ +VERSION = @VERSION@ +WRITESTATS_FLAGS = @WRITESTATS_FLAGS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AUTOMAKE_OPTIONS = foreign +@FORTRAN_FALSE@include_HEADERS = include/MultiLevelCacheTable.h include/MultiLevelCacheTableWideInterval.h include/CacheTable.h include/defines.h\ +@FORTRAN_FALSE@ include/CompressElement.h include/DynamicDoubleArray.h include/rw.h include/conf.h include/dataCompression.h\ +@FORTRAN_FALSE@ include/dictionary.h include/DynamicFloatArray.h include/VarSet.h include/sz.h include/Huffman.h include/ByteToolkit.h\ +@FORTRAN_FALSE@ include/sz_float.h include/sz_double.h include/callZlib.h include/iniparser.h include/TypeManager.h\ +@FORTRAN_FALSE@ include/sz_int8.h include/sz_int16.h include/sz_int32.h include/sz_int64.h include/szd_int8.h include/szd_int16.h include/szd_int32.h include/szd_int64.h\ +@FORTRAN_FALSE@ include/sz_uint8.h include/sz_uint16.h include/sz_uint32.h include/sz_uint64.h include/szd_uint8.h include/szd_uint16.h include/szd_uint32.h include/szd_uint64.h\ +@FORTRAN_FALSE@ include/sz_float_pwr.h include/sz_double_pwr.h include/szd_float.h include/szd_double.h include/szd_float_pwr.h include/szd_double_pwr.h\ +@FORTRAN_FALSE@ include/sz_float_ts.h include/szd_float_ts.h include/sz_double_ts.h include/szd_double_ts.h include/utility.h include/sz_opencl.h\ +@FORTRAN_FALSE@ include/DynamicByteArray.h include/DynamicIntArray.h include/TightDataPointStorageI.h include/TightDataPointStorageD.h include/TightDataPointStorageF.h\ +@FORTRAN_FALSE@ include/pastriD.h include/pastriF.h include/pastriGeneral.h include/pastri.h include/exafelSZ.h include/ArithmeticCoding.h include/sz_omp.h include/sz_stats.h + +@FORTRAN_TRUE@include_HEADERS = include/MultiLevelCacheTable.h include/MultiLevelCacheTableWideInterval.h include/CacheTable.h include/defines.h\ +@FORTRAN_TRUE@ include/CompressElement.h include/DynamicDoubleArray.h include/rw.h include/conf.h include/dataCompression.h\ +@FORTRAN_TRUE@ include/dictionary.h include/DynamicFloatArray.h include/VarSet.h include/sz.h include/Huffman.h include/ByteToolkit.h include/szf.h\ +@FORTRAN_TRUE@ include/sz_float.h include/sz_double.h include/callZlib.h include/iniparser.h include/TypeManager.h\ +@FORTRAN_TRUE@ include/sz_int8.h include/sz_int16.h include/sz_int32.h include/sz_int64.h include/szd_int8.h include/szd_int16.h include/szd_int32.h include/szd_int64.h\ +@FORTRAN_TRUE@ include/sz_uint8.h include/sz_uint16.h include/sz_uint32.h include/sz_uint64.h include/szd_uint8.h include/szd_uint16.h include/szd_uint32.h include/szd_uint64.h\ +@FORTRAN_TRUE@ include/sz_float_pwr.h include/sz_double_pwr.h include/szd_float.h include/szd_double.h include/szd_float_pwr.h include/szd_double_pwr.h\ +@FORTRAN_TRUE@ include/sz_float_ts.h include/szd_float_ts.h include/sz_double_ts.h include/szd_double_ts.h include/utility.h include/sz_opencl.h\ +@FORTRAN_TRUE@ include/DynamicByteArray.h include/DynamicIntArray.h include/TightDataPointStorageI.h include/TightDataPointStorageD.h include/TightDataPointStorageF.h\ +@FORTRAN_TRUE@ include/pastriD.h include/pastriF.h include/pastriGeneral.h include/pastri.h include/exafelSZ.h include/ArithmeticCoding.h include/sz_omp.h include/sz_stats.h sz.mod rw.mod + +@FORTRAN_FALSE@lib_LTLIBRARIES = libSZ.la +@FORTRAN_TRUE@lib_LTLIBRARIES = libSZ.la +@FORTRAN_FALSE@libSZ_la_CFLAGS = -I./include -I../zlib -I../zstd/ \ +@FORTRAN_FALSE@ $(am__append_4) $(am__append_5) $(am__append_6) \ +@FORTRAN_FALSE@ $(am__append_7) +@FORTRAN_TRUE@libSZ_la_CFLAGS = -I./include -I../zlib/ -I../zstd/ \ +@FORTRAN_TRUE@ $(am__append_1) $(am__append_2) $(am__append_3) \ +@FORTRAN_TRUE@ $(am__append_4) $(am__append_5) $(am__append_6) \ +@FORTRAN_TRUE@ $(am__append_7) +@FORTRAN_FALSE@libSZ_la_LDFLAGS = -version-info 1:4:0 +@FORTRAN_TRUE@libSZ_la_LDFLAGS = -version-info 2:1:0 +@FORTRAN_FALSE@libSZ_la_LIDADD = ../zlib/.libs/libzlib.a ../zlib/.libs/libzstd.a +@FORTRAN_TRUE@libSZ_la_LIDADD = ../zlib/.libs/libzlib.a ../zstd/.libs/libzstd.a +@FORTRAN_FALSE@libSZ_la_SOURCES = src/MultiLevelCacheTable.c \ +@FORTRAN_FALSE@ src/MultiLevelCacheTableWideInterval.c \ +@FORTRAN_FALSE@ src/ByteToolkit.c src/dataCompression.c \ +@FORTRAN_FALSE@ src/DynamicIntArray.c src/iniparser.c \ +@FORTRAN_FALSE@ src/CompressElement.c src/DynamicByteArray.c \ +@FORTRAN_FALSE@ src/rw.c src/utility.c \ +@FORTRAN_FALSE@ src/TightDataPointStorageI.c \ +@FORTRAN_FALSE@ src/TightDataPointStorageD.c \ +@FORTRAN_FALSE@ src/TightDataPointStorageF.c src/conf.c \ +@FORTRAN_FALSE@ src/DynamicDoubleArray.c src/TypeManager.c \ +@FORTRAN_FALSE@ src/dictionary.c src/DynamicFloatArray.c \ +@FORTRAN_FALSE@ src/VarSet.c src/callZlib.c src/Huffman.c \ +@FORTRAN_FALSE@ src/sz_float.c src/sz_double.c src/sz_int8.c \ +@FORTRAN_FALSE@ src/sz_int16.c src/sz_int32.c src/sz_int64.c \ +@FORTRAN_FALSE@ src/sz_uint8.c src/sz_uint16.c src/sz_uint32.c \ +@FORTRAN_FALSE@ src/sz_uint64.c src/szd_uint8.c \ +@FORTRAN_FALSE@ src/szd_uint16.c src/szd_uint32.c \ +@FORTRAN_FALSE@ src/szd_uint64.c src/szd_float.c \ +@FORTRAN_FALSE@ src/szd_double.c src/szd_int8.c src/szd_int16.c \ +@FORTRAN_FALSE@ src/szd_int32.c src/szd_int64.c src/sz.c \ +@FORTRAN_FALSE@ src/sz_float_pwr.c src/sz_double_pwr.c \ +@FORTRAN_FALSE@ src/szd_float_pwr.c src/szd_double_pwr.c \ +@FORTRAN_FALSE@ src/ArithmeticCoding.c src/exafelSZ.c \ +@FORTRAN_FALSE@ src/CacheTable.c $(am__append_8) \ +@FORTRAN_FALSE@ $(am__append_9) $(am__append_10) \ +@FORTRAN_FALSE@ $(am__append_11) +@FORTRAN_TRUE@libSZ_la_SOURCES = src/MultiLevelCacheTable.c \ +@FORTRAN_TRUE@ src/MultiLevelCacheTableWideInterval.c \ +@FORTRAN_TRUE@ src/ByteToolkit.c src/dataCompression.c \ +@FORTRAN_TRUE@ src/DynamicIntArray.c src/iniparser.c src/szf.c \ +@FORTRAN_TRUE@ src/CompressElement.c src/DynamicByteArray.c \ +@FORTRAN_TRUE@ src/rw.c src/utility.c \ +@FORTRAN_TRUE@ src/TightDataPointStorageI.c \ +@FORTRAN_TRUE@ src/TightDataPointStorageD.c \ +@FORTRAN_TRUE@ src/TightDataPointStorageF.c src/conf.c \ +@FORTRAN_TRUE@ src/DynamicDoubleArray.c src/rwf.c \ +@FORTRAN_TRUE@ src/TypeManager.c src/dictionary.c \ +@FORTRAN_TRUE@ src/DynamicFloatArray.c src/VarSet.c \ +@FORTRAN_TRUE@ src/callZlib.c src/Huffman.c src/sz_float.c \ +@FORTRAN_TRUE@ src/sz_double.c src/sz_int8.c src/sz_int16.c \ +@FORTRAN_TRUE@ src/sz_int32.c src/sz_int64.c src/sz_uint8.c \ +@FORTRAN_TRUE@ src/sz_uint16.c src/sz_uint32.c src/sz_uint64.c \ +@FORTRAN_TRUE@ src/szd_uint8.c src/szd_uint16.c \ +@FORTRAN_TRUE@ src/szd_uint32.c src/szd_uint64.c \ +@FORTRAN_TRUE@ src/szd_float.c src/szd_double.c src/szd_int8.c \ +@FORTRAN_TRUE@ src/szd_int16.c src/szd_int32.c src/szd_int64.c \ +@FORTRAN_TRUE@ src/sz.c src/sz_float_pwr.c src/sz_double_pwr.c \ +@FORTRAN_TRUE@ src/szd_float_pwr.c src/szd_double_pwr.c \ +@FORTRAN_TRUE@ src/ArithmeticCoding.c src/CacheTable.c \ +@FORTRAN_TRUE@ src/sz_interface.F90 src/rw_interface.F90 \ +@FORTRAN_TRUE@ src/exafelSZ.c $(am__append_8) $(am__append_9) \ +@FORTRAN_TRUE@ $(am__append_10) $(am__append_11) +@FORTRAN_FALSE@libSZ_la_LINK = $(AM_V_CC)$(LIBTOOL) --tag=CC --mode=link $(CCLD) $(libSZ_la_CFLAGS) -O3 $(libSZ_la_LDFLAGS) -o $(lib_LTLIBRARIES) +@FORTRAN_TRUE@libSZ_la_LINK = $(AM_V_CC)$(LIBTOOL) --tag=FC --mode=link $(FCLD) $(libSZ_la_CFLAGS) -O3 $(libSZ_la_LDFLAGS) -o $(lib_LTLIBRARIES) +all: all-am + +.SUFFIXES: +.SUFFIXES: .F90 .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign sz/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign sz/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } +src/$(am__dirstamp): + @$(MKDIR_P) src + @: > src/$(am__dirstamp) +src/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) src/$(DEPDIR) + @: > src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-MultiLevelCacheTable.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-MultiLevelCacheTableWideInterval.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-ByteToolkit.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-dataCompression.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-DynamicIntArray.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-iniparser.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-CompressElement.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-DynamicByteArray.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-rw.lo: src/$(am__dirstamp) src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-utility.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-TightDataPointStorageI.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-TightDataPointStorageD.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-TightDataPointStorageF.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-conf.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-DynamicDoubleArray.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-TypeManager.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-dictionary.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-DynamicFloatArray.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-VarSet.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-callZlib.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-Huffman.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-sz_float.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-sz_double.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-sz_int8.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-sz_int16.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-sz_int32.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-sz_int64.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-sz_uint8.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-sz_uint16.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-sz_uint32.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-sz_uint64.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-szd_uint8.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-szd_uint16.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-szd_uint32.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-szd_uint64.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-szd_float.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-szd_double.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-szd_int8.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-szd_int16.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-szd_int32.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-szd_int64.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-sz.lo: src/$(am__dirstamp) src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-sz_float_pwr.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-sz_double_pwr.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-szd_float_pwr.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-szd_double_pwr.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-ArithmeticCoding.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-exafelSZ.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-CacheTable.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-pastri.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-sz_omp.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-sz_float_ts.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-szd_float_ts.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-sz_double_ts.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-szd_double_ts.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-sz_stats.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-szf.lo: src/$(am__dirstamp) src/$(DEPDIR)/$(am__dirstamp) +src/libSZ_la-rwf.lo: src/$(am__dirstamp) src/$(DEPDIR)/$(am__dirstamp) +src/sz_interface.lo: src/$(am__dirstamp) src/$(DEPDIR)/$(am__dirstamp) +src/rw_interface.lo: src/$(am__dirstamp) src/$(DEPDIR)/$(am__dirstamp) + +libSZ.la: $(libSZ_la_OBJECTS) $(libSZ_la_DEPENDENCIES) $(EXTRA_libSZ_la_DEPENDENCIES) + $(AM_V_GEN)$(libSZ_la_LINK) $(am_libSZ_la_rpath) $(libSZ_la_OBJECTS) $(libSZ_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f src/*.$(OBJEXT) + -rm -f src/*.lo + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-ArithmeticCoding.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-ByteToolkit.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-CacheTable.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-CompressElement.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-DynamicByteArray.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-DynamicDoubleArray.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-DynamicFloatArray.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-DynamicIntArray.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-Huffman.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-MultiLevelCacheTable.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-MultiLevelCacheTableWideInterval.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-TightDataPointStorageD.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-TightDataPointStorageF.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-TightDataPointStorageI.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-TypeManager.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-VarSet.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-callZlib.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-conf.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-dataCompression.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-dictionary.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-exafelSZ.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-iniparser.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-pastri.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-rw.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-rwf.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-sz.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-sz_double.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-sz_double_pwr.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-sz_double_ts.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-sz_float.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-sz_float_pwr.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-sz_float_ts.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-sz_int16.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-sz_int32.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-sz_int64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-sz_int8.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-sz_omp.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-sz_stats.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-sz_uint16.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-sz_uint32.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-sz_uint64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-sz_uint8.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-szd_double.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-szd_double_pwr.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-szd_double_ts.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-szd_float.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-szd_float_pwr.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-szd_float_ts.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-szd_int16.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-szd_int32.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-szd_int64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-szd_int8.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-szd_uint16.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-szd_uint32.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-szd_uint64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-szd_uint8.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-szf.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libSZ_la-utility.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.F90.o: + $(AM_V_PPFC)$(PPFCCOMPILE) -c -o $@ $< + +.F90.obj: + $(AM_V_PPFC)$(PPFCCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.F90.lo: + $(AM_V_PPFC)$(LTPPFCCOMPILE) -c -o $@ $< + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +src/libSZ_la-MultiLevelCacheTable.lo: src/MultiLevelCacheTable.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-MultiLevelCacheTable.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-MultiLevelCacheTable.Tpo -c -o src/libSZ_la-MultiLevelCacheTable.lo `test -f 'src/MultiLevelCacheTable.c' || echo '$(srcdir)/'`src/MultiLevelCacheTable.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-MultiLevelCacheTable.Tpo src/$(DEPDIR)/libSZ_la-MultiLevelCacheTable.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/MultiLevelCacheTable.c' object='src/libSZ_la-MultiLevelCacheTable.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-MultiLevelCacheTable.lo `test -f 'src/MultiLevelCacheTable.c' || echo '$(srcdir)/'`src/MultiLevelCacheTable.c + +src/libSZ_la-MultiLevelCacheTableWideInterval.lo: src/MultiLevelCacheTableWideInterval.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-MultiLevelCacheTableWideInterval.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-MultiLevelCacheTableWideInterval.Tpo -c -o src/libSZ_la-MultiLevelCacheTableWideInterval.lo `test -f 'src/MultiLevelCacheTableWideInterval.c' || echo '$(srcdir)/'`src/MultiLevelCacheTableWideInterval.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-MultiLevelCacheTableWideInterval.Tpo src/$(DEPDIR)/libSZ_la-MultiLevelCacheTableWideInterval.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/MultiLevelCacheTableWideInterval.c' object='src/libSZ_la-MultiLevelCacheTableWideInterval.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-MultiLevelCacheTableWideInterval.lo `test -f 'src/MultiLevelCacheTableWideInterval.c' || echo '$(srcdir)/'`src/MultiLevelCacheTableWideInterval.c + +src/libSZ_la-ByteToolkit.lo: src/ByteToolkit.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-ByteToolkit.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-ByteToolkit.Tpo -c -o src/libSZ_la-ByteToolkit.lo `test -f 'src/ByteToolkit.c' || echo '$(srcdir)/'`src/ByteToolkit.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-ByteToolkit.Tpo src/$(DEPDIR)/libSZ_la-ByteToolkit.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/ByteToolkit.c' object='src/libSZ_la-ByteToolkit.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-ByteToolkit.lo `test -f 'src/ByteToolkit.c' || echo '$(srcdir)/'`src/ByteToolkit.c + +src/libSZ_la-dataCompression.lo: src/dataCompression.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-dataCompression.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-dataCompression.Tpo -c -o src/libSZ_la-dataCompression.lo `test -f 'src/dataCompression.c' || echo '$(srcdir)/'`src/dataCompression.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-dataCompression.Tpo src/$(DEPDIR)/libSZ_la-dataCompression.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/dataCompression.c' object='src/libSZ_la-dataCompression.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-dataCompression.lo `test -f 'src/dataCompression.c' || echo '$(srcdir)/'`src/dataCompression.c + +src/libSZ_la-DynamicIntArray.lo: src/DynamicIntArray.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-DynamicIntArray.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-DynamicIntArray.Tpo -c -o src/libSZ_la-DynamicIntArray.lo `test -f 'src/DynamicIntArray.c' || echo '$(srcdir)/'`src/DynamicIntArray.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-DynamicIntArray.Tpo src/$(DEPDIR)/libSZ_la-DynamicIntArray.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/DynamicIntArray.c' object='src/libSZ_la-DynamicIntArray.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-DynamicIntArray.lo `test -f 'src/DynamicIntArray.c' || echo '$(srcdir)/'`src/DynamicIntArray.c + +src/libSZ_la-iniparser.lo: src/iniparser.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-iniparser.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-iniparser.Tpo -c -o src/libSZ_la-iniparser.lo `test -f 'src/iniparser.c' || echo '$(srcdir)/'`src/iniparser.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-iniparser.Tpo src/$(DEPDIR)/libSZ_la-iniparser.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/iniparser.c' object='src/libSZ_la-iniparser.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-iniparser.lo `test -f 'src/iniparser.c' || echo '$(srcdir)/'`src/iniparser.c + +src/libSZ_la-CompressElement.lo: src/CompressElement.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-CompressElement.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-CompressElement.Tpo -c -o src/libSZ_la-CompressElement.lo `test -f 'src/CompressElement.c' || echo '$(srcdir)/'`src/CompressElement.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-CompressElement.Tpo src/$(DEPDIR)/libSZ_la-CompressElement.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/CompressElement.c' object='src/libSZ_la-CompressElement.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-CompressElement.lo `test -f 'src/CompressElement.c' || echo '$(srcdir)/'`src/CompressElement.c + +src/libSZ_la-DynamicByteArray.lo: src/DynamicByteArray.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-DynamicByteArray.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-DynamicByteArray.Tpo -c -o src/libSZ_la-DynamicByteArray.lo `test -f 'src/DynamicByteArray.c' || echo '$(srcdir)/'`src/DynamicByteArray.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-DynamicByteArray.Tpo src/$(DEPDIR)/libSZ_la-DynamicByteArray.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/DynamicByteArray.c' object='src/libSZ_la-DynamicByteArray.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-DynamicByteArray.lo `test -f 'src/DynamicByteArray.c' || echo '$(srcdir)/'`src/DynamicByteArray.c + +src/libSZ_la-rw.lo: src/rw.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-rw.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-rw.Tpo -c -o src/libSZ_la-rw.lo `test -f 'src/rw.c' || echo '$(srcdir)/'`src/rw.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-rw.Tpo src/$(DEPDIR)/libSZ_la-rw.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/rw.c' object='src/libSZ_la-rw.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-rw.lo `test -f 'src/rw.c' || echo '$(srcdir)/'`src/rw.c + +src/libSZ_la-utility.lo: src/utility.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-utility.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-utility.Tpo -c -o src/libSZ_la-utility.lo `test -f 'src/utility.c' || echo '$(srcdir)/'`src/utility.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-utility.Tpo src/$(DEPDIR)/libSZ_la-utility.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/utility.c' object='src/libSZ_la-utility.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-utility.lo `test -f 'src/utility.c' || echo '$(srcdir)/'`src/utility.c + +src/libSZ_la-TightDataPointStorageI.lo: src/TightDataPointStorageI.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-TightDataPointStorageI.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-TightDataPointStorageI.Tpo -c -o src/libSZ_la-TightDataPointStorageI.lo `test -f 'src/TightDataPointStorageI.c' || echo '$(srcdir)/'`src/TightDataPointStorageI.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-TightDataPointStorageI.Tpo src/$(DEPDIR)/libSZ_la-TightDataPointStorageI.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/TightDataPointStorageI.c' object='src/libSZ_la-TightDataPointStorageI.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-TightDataPointStorageI.lo `test -f 'src/TightDataPointStorageI.c' || echo '$(srcdir)/'`src/TightDataPointStorageI.c + +src/libSZ_la-TightDataPointStorageD.lo: src/TightDataPointStorageD.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-TightDataPointStorageD.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-TightDataPointStorageD.Tpo -c -o src/libSZ_la-TightDataPointStorageD.lo `test -f 'src/TightDataPointStorageD.c' || echo '$(srcdir)/'`src/TightDataPointStorageD.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-TightDataPointStorageD.Tpo src/$(DEPDIR)/libSZ_la-TightDataPointStorageD.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/TightDataPointStorageD.c' object='src/libSZ_la-TightDataPointStorageD.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-TightDataPointStorageD.lo `test -f 'src/TightDataPointStorageD.c' || echo '$(srcdir)/'`src/TightDataPointStorageD.c + +src/libSZ_la-TightDataPointStorageF.lo: src/TightDataPointStorageF.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-TightDataPointStorageF.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-TightDataPointStorageF.Tpo -c -o src/libSZ_la-TightDataPointStorageF.lo `test -f 'src/TightDataPointStorageF.c' || echo '$(srcdir)/'`src/TightDataPointStorageF.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-TightDataPointStorageF.Tpo src/$(DEPDIR)/libSZ_la-TightDataPointStorageF.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/TightDataPointStorageF.c' object='src/libSZ_la-TightDataPointStorageF.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-TightDataPointStorageF.lo `test -f 'src/TightDataPointStorageF.c' || echo '$(srcdir)/'`src/TightDataPointStorageF.c + +src/libSZ_la-conf.lo: src/conf.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-conf.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-conf.Tpo -c -o src/libSZ_la-conf.lo `test -f 'src/conf.c' || echo '$(srcdir)/'`src/conf.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-conf.Tpo src/$(DEPDIR)/libSZ_la-conf.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/conf.c' object='src/libSZ_la-conf.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-conf.lo `test -f 'src/conf.c' || echo '$(srcdir)/'`src/conf.c + +src/libSZ_la-DynamicDoubleArray.lo: src/DynamicDoubleArray.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-DynamicDoubleArray.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-DynamicDoubleArray.Tpo -c -o src/libSZ_la-DynamicDoubleArray.lo `test -f 'src/DynamicDoubleArray.c' || echo '$(srcdir)/'`src/DynamicDoubleArray.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-DynamicDoubleArray.Tpo src/$(DEPDIR)/libSZ_la-DynamicDoubleArray.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/DynamicDoubleArray.c' object='src/libSZ_la-DynamicDoubleArray.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-DynamicDoubleArray.lo `test -f 'src/DynamicDoubleArray.c' || echo '$(srcdir)/'`src/DynamicDoubleArray.c + +src/libSZ_la-TypeManager.lo: src/TypeManager.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-TypeManager.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-TypeManager.Tpo -c -o src/libSZ_la-TypeManager.lo `test -f 'src/TypeManager.c' || echo '$(srcdir)/'`src/TypeManager.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-TypeManager.Tpo src/$(DEPDIR)/libSZ_la-TypeManager.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/TypeManager.c' object='src/libSZ_la-TypeManager.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-TypeManager.lo `test -f 'src/TypeManager.c' || echo '$(srcdir)/'`src/TypeManager.c + +src/libSZ_la-dictionary.lo: src/dictionary.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-dictionary.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-dictionary.Tpo -c -o src/libSZ_la-dictionary.lo `test -f 'src/dictionary.c' || echo '$(srcdir)/'`src/dictionary.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-dictionary.Tpo src/$(DEPDIR)/libSZ_la-dictionary.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/dictionary.c' object='src/libSZ_la-dictionary.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-dictionary.lo `test -f 'src/dictionary.c' || echo '$(srcdir)/'`src/dictionary.c + +src/libSZ_la-DynamicFloatArray.lo: src/DynamicFloatArray.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-DynamicFloatArray.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-DynamicFloatArray.Tpo -c -o src/libSZ_la-DynamicFloatArray.lo `test -f 'src/DynamicFloatArray.c' || echo '$(srcdir)/'`src/DynamicFloatArray.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-DynamicFloatArray.Tpo src/$(DEPDIR)/libSZ_la-DynamicFloatArray.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/DynamicFloatArray.c' object='src/libSZ_la-DynamicFloatArray.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-DynamicFloatArray.lo `test -f 'src/DynamicFloatArray.c' || echo '$(srcdir)/'`src/DynamicFloatArray.c + +src/libSZ_la-VarSet.lo: src/VarSet.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-VarSet.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-VarSet.Tpo -c -o src/libSZ_la-VarSet.lo `test -f 'src/VarSet.c' || echo '$(srcdir)/'`src/VarSet.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-VarSet.Tpo src/$(DEPDIR)/libSZ_la-VarSet.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/VarSet.c' object='src/libSZ_la-VarSet.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-VarSet.lo `test -f 'src/VarSet.c' || echo '$(srcdir)/'`src/VarSet.c + +src/libSZ_la-callZlib.lo: src/callZlib.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-callZlib.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-callZlib.Tpo -c -o src/libSZ_la-callZlib.lo `test -f 'src/callZlib.c' || echo '$(srcdir)/'`src/callZlib.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-callZlib.Tpo src/$(DEPDIR)/libSZ_la-callZlib.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/callZlib.c' object='src/libSZ_la-callZlib.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-callZlib.lo `test -f 'src/callZlib.c' || echo '$(srcdir)/'`src/callZlib.c + +src/libSZ_la-Huffman.lo: src/Huffman.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-Huffman.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-Huffman.Tpo -c -o src/libSZ_la-Huffman.lo `test -f 'src/Huffman.c' || echo '$(srcdir)/'`src/Huffman.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-Huffman.Tpo src/$(DEPDIR)/libSZ_la-Huffman.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/Huffman.c' object='src/libSZ_la-Huffman.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-Huffman.lo `test -f 'src/Huffman.c' || echo '$(srcdir)/'`src/Huffman.c + +src/libSZ_la-sz_float.lo: src/sz_float.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-sz_float.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-sz_float.Tpo -c -o src/libSZ_la-sz_float.lo `test -f 'src/sz_float.c' || echo '$(srcdir)/'`src/sz_float.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-sz_float.Tpo src/$(DEPDIR)/libSZ_la-sz_float.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/sz_float.c' object='src/libSZ_la-sz_float.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-sz_float.lo `test -f 'src/sz_float.c' || echo '$(srcdir)/'`src/sz_float.c + +src/libSZ_la-sz_double.lo: src/sz_double.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-sz_double.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-sz_double.Tpo -c -o src/libSZ_la-sz_double.lo `test -f 'src/sz_double.c' || echo '$(srcdir)/'`src/sz_double.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-sz_double.Tpo src/$(DEPDIR)/libSZ_la-sz_double.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/sz_double.c' object='src/libSZ_la-sz_double.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-sz_double.lo `test -f 'src/sz_double.c' || echo '$(srcdir)/'`src/sz_double.c + +src/libSZ_la-sz_int8.lo: src/sz_int8.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-sz_int8.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-sz_int8.Tpo -c -o src/libSZ_la-sz_int8.lo `test -f 'src/sz_int8.c' || echo '$(srcdir)/'`src/sz_int8.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-sz_int8.Tpo src/$(DEPDIR)/libSZ_la-sz_int8.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/sz_int8.c' object='src/libSZ_la-sz_int8.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-sz_int8.lo `test -f 'src/sz_int8.c' || echo '$(srcdir)/'`src/sz_int8.c + +src/libSZ_la-sz_int16.lo: src/sz_int16.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-sz_int16.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-sz_int16.Tpo -c -o src/libSZ_la-sz_int16.lo `test -f 'src/sz_int16.c' || echo '$(srcdir)/'`src/sz_int16.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-sz_int16.Tpo src/$(DEPDIR)/libSZ_la-sz_int16.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/sz_int16.c' object='src/libSZ_la-sz_int16.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-sz_int16.lo `test -f 'src/sz_int16.c' || echo '$(srcdir)/'`src/sz_int16.c + +src/libSZ_la-sz_int32.lo: src/sz_int32.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-sz_int32.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-sz_int32.Tpo -c -o src/libSZ_la-sz_int32.lo `test -f 'src/sz_int32.c' || echo '$(srcdir)/'`src/sz_int32.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-sz_int32.Tpo src/$(DEPDIR)/libSZ_la-sz_int32.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/sz_int32.c' object='src/libSZ_la-sz_int32.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-sz_int32.lo `test -f 'src/sz_int32.c' || echo '$(srcdir)/'`src/sz_int32.c + +src/libSZ_la-sz_int64.lo: src/sz_int64.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-sz_int64.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-sz_int64.Tpo -c -o src/libSZ_la-sz_int64.lo `test -f 'src/sz_int64.c' || echo '$(srcdir)/'`src/sz_int64.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-sz_int64.Tpo src/$(DEPDIR)/libSZ_la-sz_int64.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/sz_int64.c' object='src/libSZ_la-sz_int64.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-sz_int64.lo `test -f 'src/sz_int64.c' || echo '$(srcdir)/'`src/sz_int64.c + +src/libSZ_la-sz_uint8.lo: src/sz_uint8.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-sz_uint8.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-sz_uint8.Tpo -c -o src/libSZ_la-sz_uint8.lo `test -f 'src/sz_uint8.c' || echo '$(srcdir)/'`src/sz_uint8.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-sz_uint8.Tpo src/$(DEPDIR)/libSZ_la-sz_uint8.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/sz_uint8.c' object='src/libSZ_la-sz_uint8.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-sz_uint8.lo `test -f 'src/sz_uint8.c' || echo '$(srcdir)/'`src/sz_uint8.c + +src/libSZ_la-sz_uint16.lo: src/sz_uint16.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-sz_uint16.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-sz_uint16.Tpo -c -o src/libSZ_la-sz_uint16.lo `test -f 'src/sz_uint16.c' || echo '$(srcdir)/'`src/sz_uint16.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-sz_uint16.Tpo src/$(DEPDIR)/libSZ_la-sz_uint16.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/sz_uint16.c' object='src/libSZ_la-sz_uint16.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-sz_uint16.lo `test -f 'src/sz_uint16.c' || echo '$(srcdir)/'`src/sz_uint16.c + +src/libSZ_la-sz_uint32.lo: src/sz_uint32.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-sz_uint32.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-sz_uint32.Tpo -c -o src/libSZ_la-sz_uint32.lo `test -f 'src/sz_uint32.c' || echo '$(srcdir)/'`src/sz_uint32.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-sz_uint32.Tpo src/$(DEPDIR)/libSZ_la-sz_uint32.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/sz_uint32.c' object='src/libSZ_la-sz_uint32.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-sz_uint32.lo `test -f 'src/sz_uint32.c' || echo '$(srcdir)/'`src/sz_uint32.c + +src/libSZ_la-sz_uint64.lo: src/sz_uint64.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-sz_uint64.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-sz_uint64.Tpo -c -o src/libSZ_la-sz_uint64.lo `test -f 'src/sz_uint64.c' || echo '$(srcdir)/'`src/sz_uint64.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-sz_uint64.Tpo src/$(DEPDIR)/libSZ_la-sz_uint64.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/sz_uint64.c' object='src/libSZ_la-sz_uint64.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-sz_uint64.lo `test -f 'src/sz_uint64.c' || echo '$(srcdir)/'`src/sz_uint64.c + +src/libSZ_la-szd_uint8.lo: src/szd_uint8.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-szd_uint8.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-szd_uint8.Tpo -c -o src/libSZ_la-szd_uint8.lo `test -f 'src/szd_uint8.c' || echo '$(srcdir)/'`src/szd_uint8.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-szd_uint8.Tpo src/$(DEPDIR)/libSZ_la-szd_uint8.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/szd_uint8.c' object='src/libSZ_la-szd_uint8.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-szd_uint8.lo `test -f 'src/szd_uint8.c' || echo '$(srcdir)/'`src/szd_uint8.c + +src/libSZ_la-szd_uint16.lo: src/szd_uint16.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-szd_uint16.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-szd_uint16.Tpo -c -o src/libSZ_la-szd_uint16.lo `test -f 'src/szd_uint16.c' || echo '$(srcdir)/'`src/szd_uint16.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-szd_uint16.Tpo src/$(DEPDIR)/libSZ_la-szd_uint16.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/szd_uint16.c' object='src/libSZ_la-szd_uint16.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-szd_uint16.lo `test -f 'src/szd_uint16.c' || echo '$(srcdir)/'`src/szd_uint16.c + +src/libSZ_la-szd_uint32.lo: src/szd_uint32.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-szd_uint32.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-szd_uint32.Tpo -c -o src/libSZ_la-szd_uint32.lo `test -f 'src/szd_uint32.c' || echo '$(srcdir)/'`src/szd_uint32.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-szd_uint32.Tpo src/$(DEPDIR)/libSZ_la-szd_uint32.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/szd_uint32.c' object='src/libSZ_la-szd_uint32.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-szd_uint32.lo `test -f 'src/szd_uint32.c' || echo '$(srcdir)/'`src/szd_uint32.c + +src/libSZ_la-szd_uint64.lo: src/szd_uint64.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-szd_uint64.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-szd_uint64.Tpo -c -o src/libSZ_la-szd_uint64.lo `test -f 'src/szd_uint64.c' || echo '$(srcdir)/'`src/szd_uint64.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-szd_uint64.Tpo src/$(DEPDIR)/libSZ_la-szd_uint64.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/szd_uint64.c' object='src/libSZ_la-szd_uint64.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-szd_uint64.lo `test -f 'src/szd_uint64.c' || echo '$(srcdir)/'`src/szd_uint64.c + +src/libSZ_la-szd_float.lo: src/szd_float.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-szd_float.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-szd_float.Tpo -c -o src/libSZ_la-szd_float.lo `test -f 'src/szd_float.c' || echo '$(srcdir)/'`src/szd_float.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-szd_float.Tpo src/$(DEPDIR)/libSZ_la-szd_float.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/szd_float.c' object='src/libSZ_la-szd_float.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-szd_float.lo `test -f 'src/szd_float.c' || echo '$(srcdir)/'`src/szd_float.c + +src/libSZ_la-szd_double.lo: src/szd_double.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-szd_double.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-szd_double.Tpo -c -o src/libSZ_la-szd_double.lo `test -f 'src/szd_double.c' || echo '$(srcdir)/'`src/szd_double.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-szd_double.Tpo src/$(DEPDIR)/libSZ_la-szd_double.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/szd_double.c' object='src/libSZ_la-szd_double.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-szd_double.lo `test -f 'src/szd_double.c' || echo '$(srcdir)/'`src/szd_double.c + +src/libSZ_la-szd_int8.lo: src/szd_int8.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-szd_int8.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-szd_int8.Tpo -c -o src/libSZ_la-szd_int8.lo `test -f 'src/szd_int8.c' || echo '$(srcdir)/'`src/szd_int8.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-szd_int8.Tpo src/$(DEPDIR)/libSZ_la-szd_int8.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/szd_int8.c' object='src/libSZ_la-szd_int8.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-szd_int8.lo `test -f 'src/szd_int8.c' || echo '$(srcdir)/'`src/szd_int8.c + +src/libSZ_la-szd_int16.lo: src/szd_int16.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-szd_int16.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-szd_int16.Tpo -c -o src/libSZ_la-szd_int16.lo `test -f 'src/szd_int16.c' || echo '$(srcdir)/'`src/szd_int16.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-szd_int16.Tpo src/$(DEPDIR)/libSZ_la-szd_int16.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/szd_int16.c' object='src/libSZ_la-szd_int16.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-szd_int16.lo `test -f 'src/szd_int16.c' || echo '$(srcdir)/'`src/szd_int16.c + +src/libSZ_la-szd_int32.lo: src/szd_int32.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-szd_int32.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-szd_int32.Tpo -c -o src/libSZ_la-szd_int32.lo `test -f 'src/szd_int32.c' || echo '$(srcdir)/'`src/szd_int32.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-szd_int32.Tpo src/$(DEPDIR)/libSZ_la-szd_int32.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/szd_int32.c' object='src/libSZ_la-szd_int32.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-szd_int32.lo `test -f 'src/szd_int32.c' || echo '$(srcdir)/'`src/szd_int32.c + +src/libSZ_la-szd_int64.lo: src/szd_int64.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-szd_int64.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-szd_int64.Tpo -c -o src/libSZ_la-szd_int64.lo `test -f 'src/szd_int64.c' || echo '$(srcdir)/'`src/szd_int64.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-szd_int64.Tpo src/$(DEPDIR)/libSZ_la-szd_int64.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/szd_int64.c' object='src/libSZ_la-szd_int64.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-szd_int64.lo `test -f 'src/szd_int64.c' || echo '$(srcdir)/'`src/szd_int64.c + +src/libSZ_la-sz.lo: src/sz.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-sz.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-sz.Tpo -c -o src/libSZ_la-sz.lo `test -f 'src/sz.c' || echo '$(srcdir)/'`src/sz.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-sz.Tpo src/$(DEPDIR)/libSZ_la-sz.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/sz.c' object='src/libSZ_la-sz.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-sz.lo `test -f 'src/sz.c' || echo '$(srcdir)/'`src/sz.c + +src/libSZ_la-sz_float_pwr.lo: src/sz_float_pwr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-sz_float_pwr.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-sz_float_pwr.Tpo -c -o src/libSZ_la-sz_float_pwr.lo `test -f 'src/sz_float_pwr.c' || echo '$(srcdir)/'`src/sz_float_pwr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-sz_float_pwr.Tpo src/$(DEPDIR)/libSZ_la-sz_float_pwr.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/sz_float_pwr.c' object='src/libSZ_la-sz_float_pwr.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-sz_float_pwr.lo `test -f 'src/sz_float_pwr.c' || echo '$(srcdir)/'`src/sz_float_pwr.c + +src/libSZ_la-sz_double_pwr.lo: src/sz_double_pwr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-sz_double_pwr.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-sz_double_pwr.Tpo -c -o src/libSZ_la-sz_double_pwr.lo `test -f 'src/sz_double_pwr.c' || echo '$(srcdir)/'`src/sz_double_pwr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-sz_double_pwr.Tpo src/$(DEPDIR)/libSZ_la-sz_double_pwr.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/sz_double_pwr.c' object='src/libSZ_la-sz_double_pwr.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-sz_double_pwr.lo `test -f 'src/sz_double_pwr.c' || echo '$(srcdir)/'`src/sz_double_pwr.c + +src/libSZ_la-szd_float_pwr.lo: src/szd_float_pwr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-szd_float_pwr.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-szd_float_pwr.Tpo -c -o src/libSZ_la-szd_float_pwr.lo `test -f 'src/szd_float_pwr.c' || echo '$(srcdir)/'`src/szd_float_pwr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-szd_float_pwr.Tpo src/$(DEPDIR)/libSZ_la-szd_float_pwr.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/szd_float_pwr.c' object='src/libSZ_la-szd_float_pwr.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-szd_float_pwr.lo `test -f 'src/szd_float_pwr.c' || echo '$(srcdir)/'`src/szd_float_pwr.c + +src/libSZ_la-szd_double_pwr.lo: src/szd_double_pwr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-szd_double_pwr.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-szd_double_pwr.Tpo -c -o src/libSZ_la-szd_double_pwr.lo `test -f 'src/szd_double_pwr.c' || echo '$(srcdir)/'`src/szd_double_pwr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-szd_double_pwr.Tpo src/$(DEPDIR)/libSZ_la-szd_double_pwr.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/szd_double_pwr.c' object='src/libSZ_la-szd_double_pwr.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-szd_double_pwr.lo `test -f 'src/szd_double_pwr.c' || echo '$(srcdir)/'`src/szd_double_pwr.c + +src/libSZ_la-ArithmeticCoding.lo: src/ArithmeticCoding.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-ArithmeticCoding.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-ArithmeticCoding.Tpo -c -o src/libSZ_la-ArithmeticCoding.lo `test -f 'src/ArithmeticCoding.c' || echo '$(srcdir)/'`src/ArithmeticCoding.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-ArithmeticCoding.Tpo src/$(DEPDIR)/libSZ_la-ArithmeticCoding.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/ArithmeticCoding.c' object='src/libSZ_la-ArithmeticCoding.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-ArithmeticCoding.lo `test -f 'src/ArithmeticCoding.c' || echo '$(srcdir)/'`src/ArithmeticCoding.c + +src/libSZ_la-exafelSZ.lo: src/exafelSZ.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-exafelSZ.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-exafelSZ.Tpo -c -o src/libSZ_la-exafelSZ.lo `test -f 'src/exafelSZ.c' || echo '$(srcdir)/'`src/exafelSZ.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-exafelSZ.Tpo src/$(DEPDIR)/libSZ_la-exafelSZ.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/exafelSZ.c' object='src/libSZ_la-exafelSZ.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-exafelSZ.lo `test -f 'src/exafelSZ.c' || echo '$(srcdir)/'`src/exafelSZ.c + +src/libSZ_la-CacheTable.lo: src/CacheTable.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-CacheTable.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-CacheTable.Tpo -c -o src/libSZ_la-CacheTable.lo `test -f 'src/CacheTable.c' || echo '$(srcdir)/'`src/CacheTable.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-CacheTable.Tpo src/$(DEPDIR)/libSZ_la-CacheTable.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/CacheTable.c' object='src/libSZ_la-CacheTable.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-CacheTable.lo `test -f 'src/CacheTable.c' || echo '$(srcdir)/'`src/CacheTable.c + +src/libSZ_la-pastri.lo: src/pastri.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-pastri.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-pastri.Tpo -c -o src/libSZ_la-pastri.lo `test -f 'src/pastri.c' || echo '$(srcdir)/'`src/pastri.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-pastri.Tpo src/$(DEPDIR)/libSZ_la-pastri.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pastri.c' object='src/libSZ_la-pastri.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-pastri.lo `test -f 'src/pastri.c' || echo '$(srcdir)/'`src/pastri.c + +src/libSZ_la-sz_omp.lo: src/sz_omp.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-sz_omp.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-sz_omp.Tpo -c -o src/libSZ_la-sz_omp.lo `test -f 'src/sz_omp.c' || echo '$(srcdir)/'`src/sz_omp.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-sz_omp.Tpo src/$(DEPDIR)/libSZ_la-sz_omp.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/sz_omp.c' object='src/libSZ_la-sz_omp.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-sz_omp.lo `test -f 'src/sz_omp.c' || echo '$(srcdir)/'`src/sz_omp.c + +src/libSZ_la-sz_float_ts.lo: src/sz_float_ts.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-sz_float_ts.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-sz_float_ts.Tpo -c -o src/libSZ_la-sz_float_ts.lo `test -f 'src/sz_float_ts.c' || echo '$(srcdir)/'`src/sz_float_ts.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-sz_float_ts.Tpo src/$(DEPDIR)/libSZ_la-sz_float_ts.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/sz_float_ts.c' object='src/libSZ_la-sz_float_ts.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-sz_float_ts.lo `test -f 'src/sz_float_ts.c' || echo '$(srcdir)/'`src/sz_float_ts.c + +src/libSZ_la-szd_float_ts.lo: src/szd_float_ts.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-szd_float_ts.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-szd_float_ts.Tpo -c -o src/libSZ_la-szd_float_ts.lo `test -f 'src/szd_float_ts.c' || echo '$(srcdir)/'`src/szd_float_ts.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-szd_float_ts.Tpo src/$(DEPDIR)/libSZ_la-szd_float_ts.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/szd_float_ts.c' object='src/libSZ_la-szd_float_ts.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-szd_float_ts.lo `test -f 'src/szd_float_ts.c' || echo '$(srcdir)/'`src/szd_float_ts.c + +src/libSZ_la-sz_double_ts.lo: src/sz_double_ts.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-sz_double_ts.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-sz_double_ts.Tpo -c -o src/libSZ_la-sz_double_ts.lo `test -f 'src/sz_double_ts.c' || echo '$(srcdir)/'`src/sz_double_ts.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-sz_double_ts.Tpo src/$(DEPDIR)/libSZ_la-sz_double_ts.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/sz_double_ts.c' object='src/libSZ_la-sz_double_ts.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-sz_double_ts.lo `test -f 'src/sz_double_ts.c' || echo '$(srcdir)/'`src/sz_double_ts.c + +src/libSZ_la-szd_double_ts.lo: src/szd_double_ts.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-szd_double_ts.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-szd_double_ts.Tpo -c -o src/libSZ_la-szd_double_ts.lo `test -f 'src/szd_double_ts.c' || echo '$(srcdir)/'`src/szd_double_ts.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-szd_double_ts.Tpo src/$(DEPDIR)/libSZ_la-szd_double_ts.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/szd_double_ts.c' object='src/libSZ_la-szd_double_ts.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-szd_double_ts.lo `test -f 'src/szd_double_ts.c' || echo '$(srcdir)/'`src/szd_double_ts.c + +src/libSZ_la-sz_stats.lo: src/sz_stats.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-sz_stats.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-sz_stats.Tpo -c -o src/libSZ_la-sz_stats.lo `test -f 'src/sz_stats.c' || echo '$(srcdir)/'`src/sz_stats.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-sz_stats.Tpo src/$(DEPDIR)/libSZ_la-sz_stats.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/sz_stats.c' object='src/libSZ_la-sz_stats.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-sz_stats.lo `test -f 'src/sz_stats.c' || echo '$(srcdir)/'`src/sz_stats.c + +src/libSZ_la-szf.lo: src/szf.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-szf.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-szf.Tpo -c -o src/libSZ_la-szf.lo `test -f 'src/szf.c' || echo '$(srcdir)/'`src/szf.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-szf.Tpo src/$(DEPDIR)/libSZ_la-szf.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/szf.c' object='src/libSZ_la-szf.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-szf.lo `test -f 'src/szf.c' || echo '$(srcdir)/'`src/szf.c + +src/libSZ_la-rwf.lo: src/rwf.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -MT src/libSZ_la-rwf.lo -MD -MP -MF src/$(DEPDIR)/libSZ_la-rwf.Tpo -c -o src/libSZ_la-rwf.lo `test -f 'src/rwf.c' || echo '$(srcdir)/'`src/rwf.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libSZ_la-rwf.Tpo src/$(DEPDIR)/libSZ_la-rwf.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/rwf.c' object='src/libSZ_la-rwf.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libSZ_la_CFLAGS) $(CFLAGS) -c -o src/libSZ_la-rwf.lo `test -f 'src/rwf.c' || echo '$(srcdir)/'`src/rwf.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + -rm -rf src/.libs src/_libs +install-includeHEADERS: $(include_HEADERS) + @$(NORMAL_INSTALL) + @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(includedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(includedir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \ + done + +uninstall-includeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir) + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) $(HEADERS) +installdirs: + for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f src/$(DEPDIR)/$(am__dirstamp) + -rm -f src/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + mostlyclean-am + +distclean: distclean-am + -rm -f src/$(DEPDIR)/libSZ_la-ArithmeticCoding.Plo + -rm -f src/$(DEPDIR)/libSZ_la-ByteToolkit.Plo + -rm -f src/$(DEPDIR)/libSZ_la-CacheTable.Plo + -rm -f src/$(DEPDIR)/libSZ_la-CompressElement.Plo + -rm -f src/$(DEPDIR)/libSZ_la-DynamicByteArray.Plo + -rm -f src/$(DEPDIR)/libSZ_la-DynamicDoubleArray.Plo + -rm -f src/$(DEPDIR)/libSZ_la-DynamicFloatArray.Plo + -rm -f src/$(DEPDIR)/libSZ_la-DynamicIntArray.Plo + -rm -f src/$(DEPDIR)/libSZ_la-Huffman.Plo + -rm -f src/$(DEPDIR)/libSZ_la-MultiLevelCacheTable.Plo + -rm -f src/$(DEPDIR)/libSZ_la-MultiLevelCacheTableWideInterval.Plo + -rm -f src/$(DEPDIR)/libSZ_la-TightDataPointStorageD.Plo + -rm -f src/$(DEPDIR)/libSZ_la-TightDataPointStorageF.Plo + -rm -f src/$(DEPDIR)/libSZ_la-TightDataPointStorageI.Plo + -rm -f src/$(DEPDIR)/libSZ_la-TypeManager.Plo + -rm -f src/$(DEPDIR)/libSZ_la-VarSet.Plo + -rm -f src/$(DEPDIR)/libSZ_la-callZlib.Plo + -rm -f src/$(DEPDIR)/libSZ_la-conf.Plo + -rm -f src/$(DEPDIR)/libSZ_la-dataCompression.Plo + -rm -f src/$(DEPDIR)/libSZ_la-dictionary.Plo + -rm -f src/$(DEPDIR)/libSZ_la-exafelSZ.Plo + -rm -f src/$(DEPDIR)/libSZ_la-iniparser.Plo + -rm -f src/$(DEPDIR)/libSZ_la-pastri.Plo + -rm -f src/$(DEPDIR)/libSZ_la-rw.Plo + -rm -f src/$(DEPDIR)/libSZ_la-rwf.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_double.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_double_pwr.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_double_ts.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_float.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_float_pwr.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_float_ts.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_int16.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_int32.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_int64.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_int8.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_omp.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_stats.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_uint16.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_uint32.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_uint64.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_uint8.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_double.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_double_pwr.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_double_ts.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_float.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_float_pwr.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_float_ts.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_int16.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_int32.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_int64.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_int8.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_uint16.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_uint32.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_uint64.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_uint8.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szf.Plo + -rm -f src/$(DEPDIR)/libSZ_la-utility.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-includeHEADERS + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-libLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f src/$(DEPDIR)/libSZ_la-ArithmeticCoding.Plo + -rm -f src/$(DEPDIR)/libSZ_la-ByteToolkit.Plo + -rm -f src/$(DEPDIR)/libSZ_la-CacheTable.Plo + -rm -f src/$(DEPDIR)/libSZ_la-CompressElement.Plo + -rm -f src/$(DEPDIR)/libSZ_la-DynamicByteArray.Plo + -rm -f src/$(DEPDIR)/libSZ_la-DynamicDoubleArray.Plo + -rm -f src/$(DEPDIR)/libSZ_la-DynamicFloatArray.Plo + -rm -f src/$(DEPDIR)/libSZ_la-DynamicIntArray.Plo + -rm -f src/$(DEPDIR)/libSZ_la-Huffman.Plo + -rm -f src/$(DEPDIR)/libSZ_la-MultiLevelCacheTable.Plo + -rm -f src/$(DEPDIR)/libSZ_la-MultiLevelCacheTableWideInterval.Plo + -rm -f src/$(DEPDIR)/libSZ_la-TightDataPointStorageD.Plo + -rm -f src/$(DEPDIR)/libSZ_la-TightDataPointStorageF.Plo + -rm -f src/$(DEPDIR)/libSZ_la-TightDataPointStorageI.Plo + -rm -f src/$(DEPDIR)/libSZ_la-TypeManager.Plo + -rm -f src/$(DEPDIR)/libSZ_la-VarSet.Plo + -rm -f src/$(DEPDIR)/libSZ_la-callZlib.Plo + -rm -f src/$(DEPDIR)/libSZ_la-conf.Plo + -rm -f src/$(DEPDIR)/libSZ_la-dataCompression.Plo + -rm -f src/$(DEPDIR)/libSZ_la-dictionary.Plo + -rm -f src/$(DEPDIR)/libSZ_la-exafelSZ.Plo + -rm -f src/$(DEPDIR)/libSZ_la-iniparser.Plo + -rm -f src/$(DEPDIR)/libSZ_la-pastri.Plo + -rm -f src/$(DEPDIR)/libSZ_la-rw.Plo + -rm -f src/$(DEPDIR)/libSZ_la-rwf.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_double.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_double_pwr.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_double_ts.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_float.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_float_pwr.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_float_ts.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_int16.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_int32.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_int64.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_int8.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_omp.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_stats.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_uint16.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_uint32.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_uint64.Plo + -rm -f src/$(DEPDIR)/libSZ_la-sz_uint8.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_double.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_double_pwr.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_double_ts.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_float.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_float_pwr.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_float_ts.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_int16.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_int32.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_int64.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_int8.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_uint16.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_uint32.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_uint64.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szd_uint8.Plo + -rm -f src/$(DEPDIR)/libSZ_la-szf.Plo + -rm -f src/$(DEPDIR)/libSZ_la-utility.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-includeHEADERS uninstall-libLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-generic clean-libLTLIBRARIES clean-libtool cscopelist-am \ + ctags ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am \ + install-includeHEADERS install-info install-info-am \ + install-libLTLIBRARIES install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am uninstall-includeHEADERS \ + uninstall-libLTLIBRARIES + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/deps/SZ/sz/include/ArithmeticCoding.h b/deps/SZ/sz/include/ArithmeticCoding.h new file mode 100644 index 0000000000000000000000000000000000000000..010a2513215982e239cb0b421697985fd67f6a71 --- /dev/null +++ b/deps/SZ/sz/include/ArithmeticCoding.h @@ -0,0 +1,62 @@ +/** + * @file ArithmeticCoding.h + * @author Sheng Di + * @date Dec, 2018 + * @brief Header file for the ArithmeticCoding.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _ArithmeticCoding_H +#define _ArithmeticCoding_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#define ONE_FOURTH (0x40000000000) //44 bits are absolutely enough to deal with a large dataset (support at most 16TB per process) +#define ONE_HALF (0x80000000000) +#define THREE_FOURTHS (0xC0000000000) +#define MAX_CODE (0xFFFFFFFFFFF) +#define MAX_INTERVALS 1048576 //the limit to the arithmetic coding (at most 2^(20) intervals) + +typedef struct Prob { + size_t low; + size_t high; + int state; +} Prob; + +typedef struct AriCoder +{ + int numOfRealStates; //the # real states menas the number of states after the optimization of # intervals + int numOfValidStates; //the # valid states means the number of non-zero frequency cells (some states/codes actually didn't appear) + size_t total_frequency; + Prob* cumulative_frequency; //used to encode data more efficiencly +} AriCoder; + +void output_bit_1(unsigned int* buf); +void output_bit_0(unsigned int* buf); +unsigned int output_bit_1_plus_pending(int pending_bits); +unsigned int output_bit_0_plus_pending(int pending_bits); + +AriCoder *createAriCoder(int numOfStates, int *s, size_t length); +void freeAriCoder(AriCoder *ariCoder); +void ari_init(AriCoder *ariCoder, int *s, size_t length); +unsigned int pad_ariCoder(AriCoder* ariCoder, unsigned char** out); +int unpad_ariCoder(AriCoder** ariCoder, unsigned char* bytes); + +unsigned char get_bit(unsigned char* p, int offset); + +void ari_encode(AriCoder *ariCoder, int *s, size_t length, unsigned char *out, size_t *outSize); +void ari_decode(AriCoder *ariCoder, unsigned char *s, size_t s_len, size_t targetLength, int *out); + +Prob* getCode(AriCoder *ariCoder, size_t scaled_value); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _ArithmeticCoding_H ----- */ + diff --git a/deps/SZ/sz/include/ByteToolkit.h b/deps/SZ/sz/include/ByteToolkit.h new file mode 100644 index 0000000000000000000000000000000000000000..e88bf020f56c92c36777e58b199f9f0450e3461b --- /dev/null +++ b/deps/SZ/sz/include/ByteToolkit.h @@ -0,0 +1,81 @@ +/** + * @file ByteToolkit.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the ByteToolkit.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _ByteToolkit_H +#define _ByteToolkit_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +//ByteToolkit.c + +unsigned short bytesToUInt16_bigEndian(unsigned char* bytes); +unsigned int bytesToUInt32_bigEndian(unsigned char* bytes); +unsigned long bytesToUInt64_bigEndian(unsigned char* b); + +short bytesToInt16_bigEndian(unsigned char* bytes); +int bytesToInt32_bigEndian(unsigned char* bytes); +long bytesToInt64_bigEndian(unsigned char* b); +int bytesToInt_bigEndian(unsigned char* bytes); + +void intToBytes_bigEndian(unsigned char *b, unsigned int num); + +void int64ToBytes_bigEndian(unsigned char *b, uint64_t num); +void int32ToBytes_bigEndian(unsigned char *b, uint32_t num); +void int16ToBytes_bigEndian(unsigned char *b, uint16_t num); + +long bytesToLong_bigEndian(unsigned char* b); +void longToBytes_bigEndian(unsigned char *b, unsigned long num); +long doubleToOSEndianLong(double value); +int floatToOSEndianInt(float value); +short getExponent_float(float value); +short getPrecisionReqLength_float(float precision); +short getExponent_double(double value); +short getPrecisionReqLength_double(double precision); +unsigned char numberOfLeadingZeros_Int(int i); +unsigned char numberOfLeadingZeros_Long(long i); +unsigned char getLeadingNumbers_Int(int v1, int v2); +unsigned char getLeadingNumbers_Long(long v1, long v2); +short bytesToShort(unsigned char* bytes); +void shortToBytes(unsigned char* b, short value); +int bytesToInt(unsigned char* bytes); +long bytesToLong(unsigned char* bytes); +float bytesToFloat(unsigned char* bytes); +void floatToBytes(unsigned char *b, float num); +double bytesToDouble(unsigned char* bytes); +void doubleToBytes(unsigned char *b, double num); +int extractBytes(unsigned char* byteArray, size_t k, int validLength); +int getMaskRightCode(int m); +int getLeftMovingCode(int kMod8); +int getRightMovingSteps(int kMod8, int resiBitLength); +int getRightMovingCode(int kMod8, int resiBitLength); +short* convertByteDataToShortArray(unsigned char* bytes, size_t byteLength); +unsigned short* convertByteDataToUShortArray(unsigned char* bytes, size_t byteLength); + +void convertShortArrayToBytes(short* states, size_t stateLength, unsigned char* bytes); +void convertUShortArrayToBytes(unsigned short* states, size_t stateLength, unsigned char* bytes); +void convertIntArrayToBytes(int* states, size_t stateLength, unsigned char* bytes); +void convertUIntArrayToBytes(unsigned int* states, size_t stateLength, unsigned char* bytes); +void convertLongArrayToBytes(int64_t* states, size_t stateLength, unsigned char* bytes); +void convertULongArrayToBytes(uint64_t* states, size_t stateLength, unsigned char* bytes); + +size_t bytesToSize(unsigned char* bytes); +void sizeToBytes(unsigned char* outBytes, size_t size); + +void put_codes_to_output(unsigned int buf, int bitSize, unsigned char** p, int* lackBits, size_t *outSize); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _ByteToolkit_H ----- */ + diff --git a/deps/SZ/sz/include/CacheTable.h b/deps/SZ/sz/include/CacheTable.h new file mode 100644 index 0000000000000000000000000000000000000000..f98c8bc75a49eb8e0e275cb3d8938b11f9653dc9 --- /dev/null +++ b/deps/SZ/sz/include/CacheTable.h @@ -0,0 +1,40 @@ +/** + * @file CacheTable.h + * @author Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang, Sheng Di, Dingwen Tao + * @date Jan, 2019 + * @brief Header file. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef SZ_MASTER_CACHETABLE_H +#define SZ_MASTER_CACHETABLE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "stdio.h" +#include "stdint.h" +#include + +extern double* g_CacheTable; +extern uint32_t * g_InverseTable; +extern uint32_t baseIndex; +extern uint32_t topIndex; +extern int bits; + +int doubleGetExpo(double d); +int CacheTableGetRequiredBits(double precision, int quantization_intervals); +uint32_t CacheTableGetIndex(float value, int bits); +uint64_t CacheTableGetIndexDouble(double value, int bits); +int CacheTableIsInBoundary(uint32_t index); +void CacheTableBuild(double * table, int count, double smallest, double largest, double precision, int quantization_intervals); +uint32_t CacheTableFind(uint32_t index); +void CacheTableFree(); + +#ifdef __cplusplus +} +#endif + +#endif //SZ_MASTER_CACHETABLE_H diff --git a/deps/SZ/sz/include/CompressElement.h b/deps/SZ/sz/include/CompressElement.h new file mode 100644 index 0000000000000000000000000000000000000000..1d388c1218bc7928d80aad8ffb7ef506d95b04f9 --- /dev/null +++ b/deps/SZ/sz/include/CompressElement.h @@ -0,0 +1,76 @@ +/** + * @file CompressElement.h + * @author Sheng Di + * @date April, 2016 + * @brief Header file for Compress Elements such as DoubleCompressELement. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include + +#ifndef _CompressElement_H +#define _CompressElement_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct DoubleValueCompressElement +{ + double data; + long curValue; + unsigned char curBytes[8]; //big_endian + int reqBytesLength; + int resiBitsLength; +} DoubleValueCompressElement; + +typedef struct FloatValueCompressElement +{ + float data; + int curValue; + unsigned char curBytes[4]; //big_endian + int reqBytesLength; + int resiBitsLength; +} FloatValueCompressElement; + +typedef struct LossyCompressionElement +{ + int leadingZeroBytes; //0,1,2,or 3 + unsigned char integerMidBytes[8]; + int integerMidBytes_Length; //they are mid_bits actually + //char curBytes[8]; + //int curBytes_Length; //4 for single_precision or 8 for double_precision + int resMidBitsLength; + int residualMidBits; +} LossyCompressionElement; + +char* decompressGroupIDArray(unsigned char* bytes, size_t dataLength); + +short computeGroupNum_float(float value); +short computeGroupNum_double(double value); + +void listAdd_double(double last3CmprsData[3], double value); +void listAdd_float(float last3CmprsData[3], float value); +void listAdd_int(int64_t last3CmprsData[3], int64_t value); +void listAdd_int32(int32_t last3CmprsData[3], int32_t value); +void listAdd_float_group(float *groups, int *flags, char groupNum, float oriValue, float decValue, char* curGroupID); +void listAdd_double_group(double *groups, int *flags, char groupNum, double oriValue, double decValue, char* curGroupID); + +int validPrediction_double(double minErr, double precision); +int validPrediction_float(float minErr, float precision); +double* generateGroupErrBounds(int errorBoundMode, double realPrecision, double pwrErrBound); +int generateGroupMaxIntervalCount(double* groupErrBounds); + +void new_LossyCompressionElement(LossyCompressionElement *lce, int leadingNum, unsigned char* intMidBytes, + int intMidBytes_Length, int resiMidBitsLength, int resiBits); +void updateLossyCompElement_Double(unsigned char* curBytes, unsigned char* preBytes, + int reqBytesLength, int resiBitsLength, LossyCompressionElement *lce); +void updateLossyCompElement_Float(unsigned char* curBytes, unsigned char* preBytes, + int reqBytesLength, int resiBitsLength, LossyCompressionElement *lce); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _CompressElement_H ----- */ diff --git a/deps/SZ/sz/include/DynamicByteArray.h b/deps/SZ/sz/include/DynamicByteArray.h new file mode 100644 index 0000000000000000000000000000000000000000..717097940fc9f7772382d7b7ebc8934b697df34b --- /dev/null +++ b/deps/SZ/sz/include/DynamicByteArray.h @@ -0,0 +1,36 @@ +/** + * @file DynamicByteArray.h + * @author Sheng Di + * @date April, 2016 + * @brief Header file for Dynamic Byte Array. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _DynamicByteArray_H +#define _DynamicByteArray_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +typedef struct DynamicByteArray +{ + unsigned char* array; + size_t size; + size_t capacity; +} DynamicByteArray; + +void new_DBA(DynamicByteArray **dba, size_t cap); +void convertDBAtoBytes(DynamicByteArray *dba, unsigned char** bytes); +void free_DBA(DynamicByteArray *dba); +unsigned char getDBA_Data(DynamicByteArray *dba, size_t pos); +void addDBA_Data(DynamicByteArray *dba, unsigned char value); +void memcpyDBA_Data(DynamicByteArray *dba, unsigned char* data, size_t length); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _DynamicByteArray_H ----- */ diff --git a/deps/SZ/sz/include/DynamicDoubleArray.h b/deps/SZ/sz/include/DynamicDoubleArray.h new file mode 100644 index 0000000000000000000000000000000000000000..9a3ef4b6a4d74fa2a54f6b6ea5ceb82b2bed6e53 --- /dev/null +++ b/deps/SZ/sz/include/DynamicDoubleArray.h @@ -0,0 +1,36 @@ +/** + * @file DynamicDoubleArray.h + * @author Sheng Di + * @date April, 2016 + * @brief Header file for Dynamic Double Array. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _DynamicDoubleArray_H +#define _DynamicDoubleArray_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +typedef struct DynamicDoubleArray +{ + double* array; + size_t size; + double capacity; +} DynamicDoubleArray; + +void new_DDA(DynamicDoubleArray **dda, size_t cap); +void convertDDAtoDoubles(DynamicDoubleArray *dba, double **data); +void free_DDA(DynamicDoubleArray *dda); +double getDDA_Data(DynamicDoubleArray *dda, size_t pos); +void addDDA_Data(DynamicDoubleArray *dda, double value); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _DynamicDoubleArray_H ----- */ diff --git a/deps/SZ/sz/include/DynamicFloatArray.h b/deps/SZ/sz/include/DynamicFloatArray.h new file mode 100644 index 0000000000000000000000000000000000000000..2770f7860bc1da9eab4a478599537e7c29ec4a7f --- /dev/null +++ b/deps/SZ/sz/include/DynamicFloatArray.h @@ -0,0 +1,35 @@ +/** + * @file DynamicFloatArray.h + * @author Sheng Di + * @date April, 2016 + * @brief Header file for Dynamic Float Array. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _DynamicFloatArray_H +#define _DynamicFloatArray_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +typedef struct DynamicFloatArray +{ + float* array; + size_t size; + size_t capacity; +} DynamicFloatArray; + +void new_DFA(DynamicFloatArray **dfa, size_t cap); +void convertDFAtoFloats(DynamicFloatArray *dfa, float **data); +void free_DFA(DynamicFloatArray *dfa); +float getDFA_Data(DynamicFloatArray *dfa, size_t pos); +void addDFA_Data(DynamicFloatArray *dfa, float value); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _DynamicFloatArray_H ----- */ diff --git a/deps/SZ/sz/include/DynamicIntArray.h b/deps/SZ/sz/include/DynamicIntArray.h new file mode 100644 index 0000000000000000000000000000000000000000..c821c5712728ff816ad0643d9bd1a2bbfaff8e85 --- /dev/null +++ b/deps/SZ/sz/include/DynamicIntArray.h @@ -0,0 +1,35 @@ +/** + * @file DynamicIntArray.h + * @author Sheng Di + * @date April, 2016 + * @brief Header file for Dynamic Int Array. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _DynamicIntArray_H +#define _DynamicIntArray_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +typedef struct DynamicIntArray +{ + unsigned char* array; //char* (one byte) is enough, don't have to be int* + size_t size; + size_t capacity; +} DynamicIntArray; + +void new_DIA(DynamicIntArray **dia, size_t cap); +void convertDIAtoInts(DynamicIntArray *dia, unsigned char **data); +void free_DIA(DynamicIntArray *dia); +int getDIA_Data(DynamicIntArray *dia, size_t pos); +void addDIA_Data(DynamicIntArray *dia, int value); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _DynamicIntArray_H ----- */ diff --git a/deps/SZ/sz/include/Huffman.h b/deps/SZ/sz/include/Huffman.h new file mode 100644 index 0000000000000000000000000000000000000000..650d6dd7e9956e0f53b51dee41154ff5bda2229e --- /dev/null +++ b/deps/SZ/sz/include/Huffman.h @@ -0,0 +1,75 @@ +/** + * @file Huffman.h + * @author Sheng Di + * @date Aug., 2016 + * @brief Header file for the exponential segment constructor. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _Huffman_H +#define _Huffman_H + +#ifdef __cplusplus +extern "C" { +#endif + +//Note: when changing the following settings, intvCapacity in sz.h should be changed as well. +//#define allNodes 131072 +//#define stateNum 65536 + +typedef struct node_t { + struct node_t *left, *right; + size_t freq; + char t; //in_node:0; otherwise:1 + unsigned int c; +} *node; + +typedef struct HuffmanTree { + unsigned int stateNum; + unsigned int allNodes; + struct node_t* pool; + node *qqq, *qq; //the root node of the HuffmanTree is qq[1] + int n_nodes; //n_nodes is for compression + int qend; + unsigned long **code; + unsigned char *cout; + int n_inode; //n_inode is for decompression + int maxBitCount; +} HuffmanTree; + +HuffmanTree* createHuffmanTree(int stateNum); +HuffmanTree* createDefaultHuffmanTree(); + +node new_node(HuffmanTree *huffmanTree, size_t freq, unsigned int c, node a, node b); +node new_node2(HuffmanTree *huffmanTree, unsigned int c, unsigned char t); +void qinsert(HuffmanTree *huffmanTree, node n); +node qremove(HuffmanTree *huffmanTree); +void build_code(HuffmanTree *huffmanTree, node n, int len, unsigned long out1, unsigned long out2); +void init(HuffmanTree *huffmanTree, int *s, size_t length); +void init_static(HuffmanTree *huffmanTree, int *s, size_t length); +void encode(HuffmanTree *huffmanTree, int *s, size_t length, unsigned char *out, size_t *outSize); + +void decode(unsigned char *s, size_t targetLength, node t, int *out); +void decode_MSST19(unsigned char *s, size_t targetLength, node t, int *out, int maxBits); + +void pad_tree_uchar(HuffmanTree* huffmanTree, unsigned char* L, unsigned char* R, unsigned int* C, unsigned char* t, unsigned int i, node root); +void pad_tree_ushort(HuffmanTree* huffmanTree, unsigned short* L, unsigned short* R, unsigned int* C, unsigned char* t, unsigned int i, node root); +void pad_tree_uint(HuffmanTree* huffmanTree, unsigned int* L, unsigned int* R, unsigned int* C, unsigned char* t, unsigned int i, node root); +unsigned int convert_HuffTree_to_bytes_anyStates(HuffmanTree* huffmanTree, int nodeCount, unsigned char** out); +void unpad_tree_uchar(HuffmanTree* huffmanTree, unsigned char* L, unsigned char* R, unsigned int* C, unsigned char *t, unsigned int i, node root); +void unpad_tree_ushort(HuffmanTree* huffmanTree, unsigned short* L, unsigned short* R, unsigned int* C, unsigned char* t, unsigned int i, node root); +void unpad_tree_uint(HuffmanTree* huffmanTree, unsigned int* L, unsigned int* R, unsigned int* C, unsigned char* t, unsigned int i, node root); +node reconstruct_HuffTree_from_bytes_anyStates(HuffmanTree *huffmanTree, unsigned char* bytes, int nodeCount); + +void encode_withTree(HuffmanTree* huffmanTree, int *s, size_t length, unsigned char **out, size_t *outSize); +int encode_withTree_MSST19(HuffmanTree* huffmanTree, int *s, size_t length, unsigned char **out, size_t *outSize); +void decode_withTree(HuffmanTree* huffmanTree, unsigned char *s, size_t targetLength, int *out); +void decode_withTree_MSST19(HuffmanTree* huffmanTree, unsigned char *s, size_t targetLength, int *out, int maxBits); +void SZ_ReleaseHuffman(HuffmanTree* huffmanTree); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/deps/SZ/sz/include/MultiLevelCacheTable.h b/deps/SZ/sz/include/MultiLevelCacheTable.h new file mode 100644 index 0000000000000000000000000000000000000000..3ecc931fdeafc8ceaa8085bcc4e062b047ddbf1c --- /dev/null +++ b/deps/SZ/sz/include/MultiLevelCacheTable.h @@ -0,0 +1,50 @@ +/** + * @file MultiLevelCacheTable.h + * @author Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang, Sheng Di, Dingwen Tao + * @date Jan, 2019 + * @brief Header file. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _MULTILEVELCACHETABLE_H +#define _MULTILEVELCACHETABLE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include "stdio.h" + +typedef struct SubLevelTable{ + uint32_t baseIndex; + uint32_t topIndex; + uint32_t* table; + uint8_t expoIndex; +} SubLevelTable; + +typedef struct TopLevelTable{ + uint8_t bits; + uint8_t baseIndex; + uint8_t topIndex; + struct SubLevelTable* subTables; + float bottomBoundary; + float topBoundary; +} TopLevelTable; + +uint8_t MLCT_GetExpoIndex(float value); +uint8_t MLCT_GetRequiredBits(float precision); +uint32_t MLCT_GetMantiIndex(float value, int bits); +float MLTC_RebuildFloat(uint8_t expo, uint32_t manti, int bits); +void MultiLevelCacheTableBuild(struct TopLevelTable* topTable, float* precisionTable, int count, float precision); +uint32_t MultiLevelCacheTableGetIndex(float value, struct TopLevelTable* topLevelTable); +void MultiLevelCacheTableFree(struct TopLevelTable* table); + +#ifdef __cplusplus +} +#endif + +#endif //_MULTILEVELCACHETABLE_H diff --git a/deps/SZ/sz/include/MultiLevelCacheTableWideInterval.h b/deps/SZ/sz/include/MultiLevelCacheTableWideInterval.h new file mode 100644 index 0000000000000000000000000000000000000000..853d14bcee6f3b6f97998891149103292ce0ef9e --- /dev/null +++ b/deps/SZ/sz/include/MultiLevelCacheTableWideInterval.h @@ -0,0 +1,54 @@ +/** + * @file MultiLevelCacheTableWideInterval.h + * @author Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang, Sheng Di, Dingwen Tao + * @date Jan, 2019 + * @brief Header file for MultiLevelCacheTableWideInterval.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + + +#ifndef _MULTILEVELCACHETABLEWIDEINTERVAL_H +#define _MULTILEVELCACHETABLEWIDEINTERVAL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include "stdio.h" + +typedef struct SubLevelTableWideInterval{ + uint64_t baseIndex; + uint64_t topIndex; + uint16_t* table; + uint16_t expoIndex; +} SubLevelTableWideInterval; + +typedef struct TopLevelTableWideInterval{ + uint16_t bits; + uint16_t baseIndex; + uint16_t topIndex; + struct SubLevelTableWideInterval* subTables; + double bottomBoundary; + double topBoundary; +} TopLevelTableWideInterval; + +void freeTopLevelTableWideInterval(struct TopLevelTableWideInterval* topTable); + +uint16_t MLCTWI_GetExpoIndex(double value); +uint16_t MLCTWI_GetRequiredBits(double precision); +uint64_t MLCTWI_GetMantiIndex(double value, int bits); + +double MLTCWI_RebuildDouble(uint16_t expo, uint64_t manti, int bits); +void MultiLevelCacheTableWideIntervalBuild(struct TopLevelTableWideInterval* topTable, double* precisionTable, int count, double precision, int plus_bits); +uint32_t MultiLevelCacheTableWideIntervalGetIndex(double value, struct TopLevelTableWideInterval* topLevelTable); +void MultiLevelCacheTableWideIntervalFree(struct TopLevelTableWideInterval* table); + +#ifdef __cplusplus +} +#endif + +#endif //_MULTILEVELCACHETABLEWIDEINTERVAL_H diff --git a/deps/SZ/sz/include/TightDataPointStorageD.h b/deps/SZ/sz/include/TightDataPointStorageD.h new file mode 100644 index 0000000000000000000000000000000000000000..188dfe1556cfb2903fa5dc2ddb8a8c8ed768960c --- /dev/null +++ b/deps/SZ/sz/include/TightDataPointStorageD.h @@ -0,0 +1,99 @@ +/** + * @file TightDataPointStorageD.h + * @author Sheng Di + * @date April, 2016 + * @brief Header file for the tight data point storage (TDPS). + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _TightDataPointStorageD_H +#define _TightDataPointStorageD_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct TightDataPointStorageD +{ + size_t dataSeriesLength; + int allSameData; + double realPrecision; + double medianValue; + char reqLength; + char radExpo; //used to compute reqLength based on segmented precisions in "pw_rel_compression" + + double minLogValue; + + int stateNum; + int allNodes; + + size_t exactDataNum; + double reservedValue; + + unsigned char* rtypeArray; + size_t rtypeArray_size; + + unsigned char* typeArray; //its size is dataSeriesLength/4 (or xxx/4+1) + size_t typeArray_size; + + unsigned char* leadNumArray; //its size is exactDataNum/4 (or exactDataNum/4+1) + size_t leadNumArray_size; + + unsigned char* exactMidBytes; + size_t exactMidBytes_size; + + unsigned char* residualMidBits; + size_t residualMidBits_size; + + unsigned int intervals; + + unsigned char isLossless; //a mark to denote whether it's lossless compression (1 is yes, 0 is no) + + size_t segment_size; + + unsigned char* pwrErrBoundBytes; + int pwrErrBoundBytes_size; + + unsigned char* raBytes; + size_t raBytes_size; + + unsigned char plus_bits; + unsigned char max_bits; + +} TightDataPointStorageD; + +void new_TightDataPointStorageD_Empty(TightDataPointStorageD **self); +int new_TightDataPointStorageD_fromFlatBytes(TightDataPointStorageD **self, unsigned char* flatBytes, size_t flatBytesLength); + +void new_TightDataPointStorageD(TightDataPointStorageD **self, + size_t dataSeriesLength, size_t exactDataNum, + int* type, unsigned char* exactMidBytes, size_t exactMidBytes_size, + unsigned char* leadNumIntArray, //leadNumIntArray contains readable numbers.... + unsigned char* resiMidBits, size_t resiMidBits_size, + unsigned char resiBitLength, + double realPrecision, double medianValue, char reqLength, unsigned int intervals, + unsigned char* pwrErrBoundBytes, size_t pwrErrBoundBytes_size, unsigned char radExpo); + +void new_TightDataPointStorageD2(TightDataPointStorageD **self, + size_t dataSeriesLength, size_t exactDataNum, + int* type, unsigned char* exactMidBytes, size_t exactMidBytes_size, + unsigned char* leadNumIntArray, //leadNumIntArray contains readable numbers.... + unsigned char* resiMidBits, size_t resiMidBits_size, + unsigned char* resiBitLength, size_t resiBitLengthSize, + double realPrecision, double medianValue, char reqLength, unsigned int intervals, + unsigned char* pwrErrBoundBytes, size_t pwrErrBoundBytes_size, unsigned char radExpo); + +void convertTDPStoBytes_double(TightDataPointStorageD* tdps, unsigned char* bytes, unsigned char* dsLengthBytes, unsigned char sameByte); +void convertTDPStoBytes_double_reserve(TightDataPointStorageD* tdps, unsigned char* bytes, unsigned char* dsLengthBytes, unsigned char sameByte); +void convertTDPStoFlatBytes_double(TightDataPointStorageD *tdps, unsigned char** bytes, size_t *size); +void convertTDPStoFlatBytes_double_args(TightDataPointStorageD *tdps, unsigned char* bytes, size_t *size); + +void free_TightDataPointStorageD(TightDataPointStorageD *tdps); +void free_TightDataPointStorageD2(TightDataPointStorageD *tdps); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _TightDataPointStorageD_H ----- */ diff --git a/deps/SZ/sz/include/TightDataPointStorageF.h b/deps/SZ/sz/include/TightDataPointStorageF.h new file mode 100644 index 0000000000000000000000000000000000000000..0b61c02e22941984a06c0a069c78b081812dd842 --- /dev/null +++ b/deps/SZ/sz/include/TightDataPointStorageF.h @@ -0,0 +1,105 @@ +/** + * @file TightDataPointStorageF.h + * @author Sheng Di and Dingwen Tao + * @date Aug, 2016 + * @brief Header file for the tight data point storage (TDPS). + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _TightDataPointStorageF_H +#define _TightDataPointStorageF_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +typedef struct TightDataPointStorageF +{ + size_t dataSeriesLength; + int allSameData; + double realPrecision; //it's used as the pwrErrBoundRatio when errBoundMode==PW_REL + float medianValue; + char reqLength; + char radExpo; //used to compute reqLength based on segmented precisions in "pw_rel_compression" + + int stateNum; + int allNodes; + + size_t exactDataNum; + float reservedValue; + + unsigned char* rtypeArray; + size_t rtypeArray_size; + + float minLogValue; + + unsigned char* typeArray; //its size is dataSeriesLength/4 (or xxx/4+1) + size_t typeArray_size; + + unsigned char* leadNumArray; //its size is exactDataNum/4 (or exactDataNum/4+1) + size_t leadNumArray_size; + + unsigned char* exactMidBytes; + size_t exactMidBytes_size; + + unsigned char* residualMidBits; + size_t residualMidBits_size; + + unsigned int intervals; //quantization_intervals + + unsigned char isLossless; //a mark to denote whether it's lossless compression (1 is yes, 0 is no) + + size_t segment_size; + + unsigned char* pwrErrBoundBytes; + int pwrErrBoundBytes_size; + + unsigned char* raBytes; + size_t raBytes_size; + + unsigned char plus_bits; + unsigned char max_bits; + +} TightDataPointStorageF; + +void new_TightDataPointStorageF_Empty(TightDataPointStorageF **self); +int new_TightDataPointStorageF_fromFlatBytes(TightDataPointStorageF **self, unsigned char* flatBytes, size_t flatBytesLength); + +void new_TightDataPointStorageF(TightDataPointStorageF **self, + size_t dataSeriesLength, size_t exactDataNum, + int* type, unsigned char* exactMidBytes, size_t exactMidBytes_size, + unsigned char* leadNumIntArray, //leadNumIntArray contains readable numbers.... + unsigned char* resiMidBits, size_t resiMidBits_size, + unsigned char resiBitLength, + double realPrecision, float medianValue, char reqLength, unsigned int intervals, + unsigned char* pwrErrBoundBytes, size_t pwrErrBoundBytes_size, unsigned char radExpo); + +/** + * This function is designed for first-version of the point-wise relative error bound (developed by Sheng Di for TPDS18 paper) + * + * */ +void new_TightDataPointStorageF2(TightDataPointStorageF **self, + size_t dataSeriesLength, size_t exactDataNum, + int* type, unsigned char* exactMidBytes, size_t exactMidBytes_size, + unsigned char* leadNumIntArray, //leadNumIntArray contains readable numbers.... + unsigned char* resiMidBits, size_t resiMidBits_size, + unsigned char* resiBitLength, size_t resiBitLengthSize, + double realPrecision, float medianValue, char reqLength, unsigned int intervals, + unsigned char* pwrErrBoundBytes, size_t pwrErrBoundBytes_size, unsigned char radExpo); + +void convertTDPStoBytes_float(TightDataPointStorageF* tdps, unsigned char* bytes, unsigned char* dsLengthBytes, unsigned char sameByte); +void convertTDPStoBytes_float_reserve(TightDataPointStorageF* tdps, unsigned char* bytes, unsigned char* dsLengthBytes, unsigned char sameByte); +void convertTDPStoFlatBytes_float(TightDataPointStorageF *tdps, unsigned char** bytes, size_t *size); +void convertTDPStoFlatBytes_float_args(TightDataPointStorageF *tdps, unsigned char* bytes, size_t *size); + +void free_TightDataPointStorageF(TightDataPointStorageF *tdps); +void free_TightDataPointStorageF2(TightDataPointStorageF *tdps); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _TightDataPointStorageF_H ----- */ diff --git a/deps/SZ/sz/include/TightDataPointStorageI.h b/deps/SZ/sz/include/TightDataPointStorageI.h new file mode 100644 index 0000000000000000000000000000000000000000..466a753e0e7a751d9a8d544adcaf8ac63fab9131 --- /dev/null +++ b/deps/SZ/sz/include/TightDataPointStorageI.h @@ -0,0 +1,65 @@ +/** + * @file TightDataPointStorageI.h + * @author Sheng Di and Dingwen Tao + * @date Aug, 2017 + * @brief Header file for the tight data point storage (TDPS). + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _TightDataPointStorageI_H +#define _TightDataPointStorageI_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +typedef struct TightDataPointStorageI +{ + size_t dataSeriesLength; + int allSameData; + double realPrecision; //it's used as the pwrErrBoundRatio when errBoundMode==PW_REL + size_t exactDataNum; + long minValue; + int exactByteSize; + int dataTypeSize; //the size of data type, e.g., it's 4 when data type is int32_t + + int stateNum; + int allNodes; + + unsigned char* typeArray; //its size is dataSeriesLength/4 (or xxx/4+1) + size_t typeArray_size; + + unsigned char* exactDataBytes; + size_t exactDataBytes_size; + + unsigned int intervals; //quantization_intervals + + unsigned char isLossless; //a mark to denote whether it's lossless compression (1 is yes, 0 is no) + +} TightDataPointStorageI; + +int computeRightShiftBits(int exactByteSize, int dataType); +int convertDataTypeSizeCode(int dataTypeSizeCode); +int convertDataTypeSize(int dataTypeSize); + +void new_TightDataPointStorageI_Empty(TightDataPointStorageI **self); +int new_TightDataPointStorageI_fromFlatBytes(TightDataPointStorageI **self, unsigned char* flatBytes, size_t flatBytesLength); +void new_TightDataPointStorageI(TightDataPointStorageI **self, + size_t dataSeriesLength, size_t exactDataNum, int byteSize, + int* type, unsigned char* exactDataBytes, size_t exactDataBytes_size, + double realPrecision, long minValue, int intervals, int dataType); + +void convertTDPStoBytes_int(TightDataPointStorageI* tdps, unsigned char* bytes, unsigned char sameByte); +void convertTDPStoFlatBytes_int(TightDataPointStorageI *tdps, unsigned char** bytes, size_t *size); +void convertTDPStoFlatBytes_int_args(TightDataPointStorageI *tdps, unsigned char* bytes, size_t *size); +void free_TightDataPointStorageI(TightDataPointStorageI *tdps); +void free_TightDataPointStorageI2(TightDataPointStorageI *tdps); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _TightDataPointStorageI_H ----- */ diff --git a/deps/SZ/sz/include/TypeManager.h b/deps/SZ/sz/include/TypeManager.h new file mode 100644 index 0000000000000000000000000000000000000000..7c543d3f82aab34cad5ad8eb44e779872f02cf86 --- /dev/null +++ b/deps/SZ/sz/include/TypeManager.h @@ -0,0 +1,40 @@ +/** + * @file TypeManager.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the TypeManager.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _TypeManager_H +#define _TypeManager_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +//TypeManager.c +size_t convertIntArray2ByteArray_fast_1b(unsigned char* intArray, size_t intArrayLength, unsigned char **result); +size_t convertIntArray2ByteArray_fast_1b_to_result(unsigned char* intArray, size_t intArrayLength, unsigned char *result); +void convertByteArray2IntArray_fast_1b(size_t intArrayLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray); +size_t convertIntArray2ByteArray_fast_2b(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char **result); +size_t convertIntArray2ByteArray_fast_2b_inplace(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char *result); +void convertByteArray2IntArray_fast_2b(size_t stepLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray); +size_t convertIntArray2ByteArray_fast_3b(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char **result); +void convertByteArray2IntArray_fast_3b(size_t stepLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray); +int getLeftMovingSteps(size_t k, unsigned char resiBitLength); +size_t convertIntArray2ByteArray_fast_dynamic(unsigned char* timeStepType, unsigned char resiBitLength, size_t nbEle, unsigned char **bytes); +size_t convertIntArray2ByteArray_fast_dynamic2(unsigned char* timeStepType, unsigned char* resiBitLength, size_t resiBitLengthLength, unsigned char **bytes); +int computeBitNumRequired(size_t dataLength); +void decompressBitArraybySimpleLZ77(int** result, unsigned char* bytes, size_t bytesLength, size_t totalLength, int validLength); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _TypeManager_H ----- */ + diff --git a/deps/SZ/sz/include/VarSet.h b/deps/SZ/sz/include/VarSet.h new file mode 100644 index 0000000000000000000000000000000000000000..c991d72bfc1cbe1c0d184f386b6b9b8aa8bddb0b --- /dev/null +++ b/deps/SZ/sz/include/VarSet.h @@ -0,0 +1,84 @@ +/** + * @file VarSet.h + * @author Sheng Di + * @date July, 2016 + * @brief Header file for the Variable.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _VarSet_H +#define _VarSet_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +typedef struct sz_multisteps +{ + char compressionType; + int predictionMode; + int lastSnapshotStep; //the previous snapshot step + unsigned int currentStep; //current time step of the execution/simulation + + //void* ori_data; //original data pointer, which serve as the key for retrieving hist_data + void* hist_data; //historical data in past time steps +} sz_multisteps; + +typedef struct SZ_Variable +{ + unsigned char var_id; + char* varName; + char compressType; //102 means HZ; 101 means SZ + int dataType; //SZ_FLOAT or SZ_DOUBLE + size_t r5; + size_t r4; + size_t r3; + size_t r2; + size_t r1; + int errBoundMode; + double absErrBound; + double relBoundRatio; + double pwRelBoundRatio; + void* data; + sz_multisteps *multisteps; + unsigned char* compressedBytes; + size_t compressedSize; + struct SZ_Variable* next; +} SZ_Variable; + +typedef struct SZ_VarSet +{ + unsigned short count; + struct SZ_Variable *header; + struct SZ_Variable *lastVar; +} SZ_VarSet; + +void free_Variable_keepOriginalData(SZ_Variable* v); +void free_Variable_keepCompressedBytes(SZ_Variable* v); +void free_Variable_all(SZ_Variable* v); +void SZ_batchAddVar(int var_id, char* varName, int dataType, void* data, + int errBoundMode, double absErrBound, double relBoundRatio, double pwRelBoundRatio, + size_t r5, size_t r4, size_t r3, size_t r2, size_t r1); +int SZ_batchDelVar_vset(SZ_VarSet* vset, char* varName); +int SZ_batchDelVar(char* varName); +int SZ_batchDelVar_ID_vset(SZ_VarSet* vset, int var_id); +int SZ_batchDelVar_ID(int var_id); + +SZ_Variable* SZ_searchVar(char* varName); +void* SZ_getVarData(char* varName, size_t *r5, size_t *r4, size_t *r3, size_t *r2, size_t *r1); + +void free_VarSet_vset(SZ_VarSet *vset, int mode); +void SZ_freeVarSet(int mode); + +void free_multisteps(sz_multisteps* multisteps); +int checkVarID(unsigned char cur_var_id, unsigned char* var_ids, int var_count); +SZ_Variable* SZ_getVariable(int var_id); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _VarSet_H ----- */ diff --git a/deps/SZ/sz/include/callZlib.h b/deps/SZ/sz/include/callZlib.h new file mode 100644 index 0000000000000000000000000000000000000000..1aede548c6e5d7aa30475799d1af994a3ccddad4 --- /dev/null +++ b/deps/SZ/sz/include/callZlib.h @@ -0,0 +1,44 @@ +/** + * @file callZlib.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the callZlib.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _CallZlib_H +#define _CallZlib_H + +#ifdef __cplusplus +extern "C" { +#endif + +//#define SZ_ZLIB_BUFFER_SIZE 1048576 +#define SZ_ZLIB_BUFFER_SIZE 65536 + +#include + +int isZlibFormat(unsigned char magic1, unsigned char magic2); + +//callZlib.c +unsigned long zlib_compress(unsigned char* data, unsigned long dataLength, unsigned char** compressBytes, int level); +unsigned long zlib_compress2(unsigned char* data, unsigned long dataLength, unsigned char** compressBytes, int level); +unsigned long zlib_compress3(unsigned char* data, unsigned long dataLength, unsigned char* compressBytes, int level); +unsigned long zlib_compress4(unsigned char* data, unsigned long dataLength, unsigned char** compressBytes, int level); +unsigned long zlib_compress5(unsigned char* data, unsigned long dataLength, unsigned char** compressBytes, int level); + +unsigned long zlib_uncompress4(unsigned char* compressBytes, unsigned long cmpSize, unsigned char** oriData, unsigned long targetOriSize); +unsigned long zlib_uncompress5(unsigned char* compressBytes, unsigned long cmpSize, unsigned char** oriData, unsigned long targetOriSize); +unsigned long zlib_uncompress(unsigned char* compressBytes, unsigned long cmpSize, unsigned char** oriData, unsigned long targetOriSize); +unsigned long zlib_uncompress2(unsigned char* compressBytes, unsigned long cmpSize, unsigned char** oriData, unsigned long targetOriSize); +unsigned long zlib_uncompress3(unsigned char* compressBytes, unsigned long cmpSize, unsigned char** oriData, unsigned long targetOriSize); + +unsigned long zlib_uncompress65536bytes(unsigned char* compressBytes, unsigned long cmpSize, unsigned char** oriData); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _CallZlib_H ----- */ + diff --git a/deps/SZ/sz/include/conf.h b/deps/SZ/sz/include/conf.h new file mode 100644 index 0000000000000000000000000000000000000000..0f65403194f72a3f2c9a6114db85b6d3d3ecb87f --- /dev/null +++ b/deps/SZ/sz/include/conf.h @@ -0,0 +1,37 @@ +/** + * @file conf.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the conf.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _Conf_H +#define _Conf_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +//conf.c +void updateQuantizationInfo(int quant_intervals); +int SZ_ReadConf(const char* sz_cfgFile); +int SZ_LoadConf(const char* sz_cfgFile); +int checkVersion(char* version); +int computeVersion(int major, int minor, int revision); +int checkVersion2(char* version); + +void initSZ_TSC(); +unsigned int roundUpToPowerOf2(unsigned int base); +double computeABSErrBoundFromPSNR(double psnr, double threshold, double value_range); +double computeABSErrBoundFromNORM_ERR(double normErr, size_t nbEle); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _Conf_H ----- */ + diff --git a/deps/SZ/sz/include/dataCompression.h b/deps/SZ/sz/include/dataCompression.h new file mode 100644 index 0000000000000000000000000000000000000000..0e4993958984970c22db72d11c455544e41e6a1a --- /dev/null +++ b/deps/SZ/sz/include/dataCompression.h @@ -0,0 +1,104 @@ +/** + * @file dataCompression.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the dataCompression.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _DataCompression_H +#define _DataCompression_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "sz.h" +#include +#include + +#define computeMinMax(data) \ + for(i=1;idata_)\ + min = data_;\ + else if(max +#include +#include +#include + +/*--------------------------------------------------------------------------- + New types + ---------------------------------------------------------------------------*/ + +#ifdef __cplusplus +extern "C" { +#endif + +/*-------------------------------------------------------------------------*/ +/** + @brief Dictionary object + + This object contains a list of string/string associations. Each + association is identified by a unique string key. Looking up values + in the dictionary is speeded up by the use of a (hopefully collision-free) + hash function. + */ +/*-------------------------------------------------------------------------*/ +typedef struct _dictionary_ { + int n ; /** Number of entries in dictionary */ + int size ; /** Storage size */ + char ** val ; /** List of string values */ + char ** key ; /** List of string keys */ + unsigned * hash ; /** List of hash values for keys */ +} dictionary ; + + +/*--------------------------------------------------------------------------- + Function prototypes + ---------------------------------------------------------------------------*/ + +/*-------------------------------------------------------------------------*/ +/** + @brief Compute the hash key for a string. + @param key Character string to use for key. + @return 1 unsigned int on at least 32 bits. + + This hash function has been taken from an Article in Dr Dobbs Journal. + This is normally a collision-free function, distributing keys evenly. + The key is stored anyway in the struct so that collision can be avoided + by comparing the key itself in last resort. + */ +/*--------------------------------------------------------------------------*/ +unsigned dictionary_hash(const char * key); + +/*-------------------------------------------------------------------------*/ +/** + @brief Create a new dictionary object. + @param size Optional initial size of the dictionary. + @return 1 newly allocated dictionary objet. + + This function allocates a new dictionary object of given size and returns + it. If you do not know in advance (roughly) the number of entries in the + dictionary, give size=0. + */ +/*--------------------------------------------------------------------------*/ +dictionary * dictionary_new(int size); + +/*-------------------------------------------------------------------------*/ +/** + @brief Delete a dictionary object + @param d dictionary object to deallocate. + @return void + + Deallocate a dictionary object and all memory associated to it. + */ +/*--------------------------------------------------------------------------*/ +void dictionary_del(dictionary * vd); + +/*-------------------------------------------------------------------------*/ +/** + @brief Get a value from a dictionary. + @param d dictionary object to search. + @param key Key to look for in the dictionary. + @param def Default value to return if key not found. + @return 1 pointer to internally allocated character string. + + This function locates a key in a dictionary and returns a pointer to its + value, or the passed 'def' pointer if no such key can be found in + dictionary. The returned character pointer points to data internal to the + dictionary object, you should not try to free it or modify it. + */ +/*--------------------------------------------------------------------------*/ +char * dictionary_get(dictionary * d, const char * key, char * def); + + +/*-------------------------------------------------------------------------*/ +/** + @brief Set a value in a dictionary. + @param d dictionary object to modify. + @param key Key to modify or add. + @param val Value to add. + @return int 0 if Ok, anything else otherwise + + If the given key is found in the dictionary, the associated value is + replaced by the provided one. If the key cannot be found in the + dictionary, it is added to it. + + It is Ok to provide a NULL value for val, but NULL values for the dictionary + or the key are considered as errors: the function will return immediately + in such a case. + + Notice that if you dictionary_set a variable to NULL, a call to + dictionary_get will return a NULL value: the variable will be found, and + its value (NULL) is returned. In other words, setting the variable + content to NULL is equivalent to deleting the variable from the + dictionary. It is not possible (in this implementation) to have a key in + the dictionary without value. + + This function returns non-zero in case of failure. + */ +/*--------------------------------------------------------------------------*/ +int dictionary_set(dictionary * vd, const char * key, const char * val); + +/*-------------------------------------------------------------------------*/ +/** + @brief Delete a key in a dictionary + @param d dictionary object to modify. + @param key Key to remove. + @return void + + This function deletes a key in a dictionary. Nothing is done if the + key cannot be found. + */ +/*--------------------------------------------------------------------------*/ +void dictionary_unset(dictionary * d, const char * key); + + +/*-------------------------------------------------------------------------*/ +/** + @brief Dump a dictionary to an opened file pointer. + @param d Dictionary to dump + @param f Opened file pointer. + @return void + + Dumps a dictionary onto an opened file pointer. Key pairs are printed out + as @c [Key]=[Value], one per line. It is Ok to provide stdout or stderr as + output file pointers. + */ +/*--------------------------------------------------------------------------*/ +void dictionary_dump(dictionary * d, FILE * out); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/deps/SZ/sz/include/exafelSZ.h b/deps/SZ/sz/include/exafelSZ.h new file mode 100644 index 0000000000000000000000000000000000000000..06e9921a2842ccd7f2caf4a7f9ef6abe274b9b99 --- /dev/null +++ b/deps/SZ/sz/include/exafelSZ.h @@ -0,0 +1,57 @@ +#ifndef EXAFELSZ_H +#define EXAFELSZ_H +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +typedef struct exafelSZ_params{ + //uint8_t *peaks; + uint16_t *peaksSegs; + uint16_t *peaksRows; + uint16_t *peaksCols; + uint64_t numPeaks; + + uint8_t *calibPanel; + + uint8_t binSize; //Binning: (pr->binSize x pr->binSize) to (1 x 1) + double tolerance; //SZ pr->tolerance + uint8_t szDim; //1D/2D/3D compression/decompression + //uint8_t szBlockSize; //Currently unused + uint8_t peakSize; //MUST BE ODD AND NOT EVEN! Each peak will have size of: (peakSize x peakSize) + + // uint64_t nEvents; + // uint64_t panels; + // uint64_t rows; + // uint64_t cols; + + //CALCULATED VARIBALES: + uint64_t binnedRows; + uint64_t binnedCols; + uint8_t peakRadius; //Will be calculated using peakSize + +} exafelSZ_params; + + +void exafelSZ_params_process(exafelSZ_params*pr, size_t panels, size_t rows, size_t cols); +void exafelSZ_params_checkDecomp(exafelSZ_params*pr, size_t panels, size_t rows, size_t cols); +void exafelSZ_params_checkComp(exafelSZ_params*pr, size_t panels, size_t rows, size_t cols); + +unsigned char * exafelSZ_Compress(void* _pr, + void* _origData, + size_t events, size_t panels, size_t rows, size_t cols, + size_t *compressedSize); + +void* exafelSZ_Decompress(void *_pr, + unsigned char*_compressedBuffer, + size_t events, size_t panels, size_t rows, size_t cols, + size_t compressedSize); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _EXAFELSZ_H ----- */ + diff --git a/deps/SZ/sz/include/iniparser.h b/deps/SZ/sz/include/iniparser.h new file mode 100644 index 0000000000000000000000000000000000000000..4ddb907c2e4af917e82b6503db0c9a00032ad38f --- /dev/null +++ b/deps/SZ/sz/include/iniparser.h @@ -0,0 +1,321 @@ + +/*-------------------------------------------------------------------------*/ +/** + @file iniparser.h + @author N. Devillard + @brief Parser for ini files. +*/ +/*--------------------------------------------------------------------------*/ + +#ifndef _INIPARSER_H_ +#define _INIPARSER_H_ + +/*--------------------------------------------------------------------------- + Includes + ---------------------------------------------------------------------------*/ + +#include +#include +#include + +/* + * The following #include is necessary on many Unixes but not Linux. + * It is not needed for Windows platforms. + * Uncomment it if needed. + */ +/* #include */ + +#include "dictionary.h" + +/*-------------------------------------------------------------------------*/ +/** + @brief Get number of sections in a dictionary + @param d Dictionary to examine + @return int Number of sections found in dictionary + + This function returns the number of sections found in a dictionary. + The test to recognize sections is done on the string stored in the + dictionary: a section name is given as "section" whereas a key is + stored as "section:key", thus the test looks for entries that do not + contain a colon. + + This clearly fails in the case a section name contains a colon, but + this should simply be avoided. + + This function returns -1 in case of error. + */ +/*--------------------------------------------------------------------------*/ + +int iniparser_getnsec(dictionary * d); + + +/*-------------------------------------------------------------------------*/ +/** + @brief Get name for section n in a dictionary. + @param d Dictionary to examine + @param n Section number (from 0 to nsec-1). + @return Pointer to char string + + This function locates the n-th section in a dictionary and returns + its name as a pointer to a string statically allocated inside the + dictionary. Do not free or modify the returned string! + + This function returns NULL in case of error. + */ +/*--------------------------------------------------------------------------*/ + +char * iniparser_getsecname(dictionary * d, int n); + + +/*-------------------------------------------------------------------------*/ +/** + @brief Save a dictionary to a loadable ini file + @param d Dictionary to dump + @param f Opened file pointer to dump to + @return void + + This function dumps a given dictionary into a loadable ini file. + It is Ok to specify @c stderr or @c stdout as output files. + */ +/*--------------------------------------------------------------------------*/ + +void iniparser_dump_ini(dictionary * d, FILE * f); + +/*-------------------------------------------------------------------------*/ +/** + @brief Save a dictionary section to a loadable ini file + @param d Dictionary to dump + @param s Section name of dictionary to dump + @param f Opened file pointer to dump to + @return void + + This function dumps a given section of a given dictionary into a loadable ini + file. It is Ok to specify @c stderr or @c stdout as output files. + */ +/*--------------------------------------------------------------------------*/ + +void iniparser_dumpsection_ini(dictionary * d, char * s, FILE * f); + +/*-------------------------------------------------------------------------*/ +/** + @brief Dump a dictionary to an opened file pointer. + @param d Dictionary to dump. + @param f Opened file pointer to dump to. + @return void + + This function prints out the contents of a dictionary, one element by + line, onto the provided file pointer. It is OK to specify @c stderr + or @c stdout as output files. This function is meant for debugging + purposes mostly. + */ +/*--------------------------------------------------------------------------*/ +void iniparser_dump(dictionary * d, FILE * f); + +/*-------------------------------------------------------------------------*/ +/** + @brief Get the number of keys in a section of a dictionary. + @param d Dictionary to examine + @param s Section name of dictionary to examine + @return Number of keys in section + */ +/*--------------------------------------------------------------------------*/ +int iniparser_getsecnkeys(dictionary * d, char * s); + +/*-------------------------------------------------------------------------*/ +/** + @brief Get the number of keys in a section of a dictionary. + @param d Dictionary to examine + @param s Section name of dictionary to examine + @return pointer to statically allocated character strings + + This function queries a dictionary and finds all keys in a given section. + Each pointer in the returned char pointer-to-pointer is pointing to + a string allocated in the dictionary; do not free or modify them. + + This function returns NULL in case of error. + */ +/*--------------------------------------------------------------------------*/ +char ** iniparser_getseckeys(dictionary * d, char * s); + +/*-------------------------------------------------------------------------*/ +/** + @brief Get the string associated to a key + @param d Dictionary to search + @param key Key string to look for + @param def Default value to return if key not found. + @return pointer to statically allocated character string + + This function queries a dictionary for a key. A key as read from an + ini file is given as "section:key". If the key cannot be found, + the pointer passed as 'def' is returned. + The returned char pointer is pointing to a string allocated in + the dictionary, do not free or modify it. + */ +/*--------------------------------------------------------------------------*/ +char * iniparser_getstring(dictionary * d, const char * key, char * def); + +/*-------------------------------------------------------------------------*/ +/** + @brief Get the string associated to a key, convert to an int + @param d Dictionary to search + @param key Key string to look for + @param notfound Value to return in case of error + @return integer + + This function queries a dictionary for a key. A key as read from an + ini file is given as "section:key". If the key cannot be found, + the notfound value is returned. + + Supported values for integers include the usual C notation + so decimal, octal (starting with 0) and hexadecimal (starting with 0x) + are supported. Examples: + + - "42" -> 42 + - "042" -> 34 (octal -> decimal) + - "0x42" -> 66 (hexa -> decimal) + + Warning: the conversion may overflow in various ways. Conversion is + totally outsourced to strtol(), see the associated man page for overflow + handling. + + Credits: Thanks to A. Becker for suggesting strtol() + */ +/*--------------------------------------------------------------------------*/ +int iniparser_getint(dictionary * d, const char * key, int notfound); + +/*-------------------------------------------------------------------------*/ +/** + @brief Get the string associated to a key, convert to a long + @param d Dictionary to search + @param key Key string to look for + @param notfound Value to return in case of error + @return long + + Credits: This function bases completely on int iniparser_getint and was + slightly modified to return long instead of int. + */ +/*--------------------------------------------------------------------------*/ +long iniparser_getlint(dictionary * d, const char * key, int notfound); + +/*-------------------------------------------------------------------------*/ +/** + @brief Get the string associated to a key, convert to a double + @param d Dictionary to search + @param key Key string to look for + @param notfound Value to return in case of error + @return double + + This function queries a dictionary for a key. A key as read from an + ini file is given as "section:key". If the key cannot be found, + the notfound value is returned. + */ +/*--------------------------------------------------------------------------*/ +double iniparser_getdouble(dictionary * d, const char * key, double notfound); + +/*-------------------------------------------------------------------------*/ +/** + @brief Get the string associated to a key, convert to a boolean + @param d Dictionary to search + @param key Key string to look for + @param notfound Value to return in case of error + @return integer + + This function queries a dictionary for a key. A key as read from an + ini file is given as "section:key". If the key cannot be found, + the notfound value is returned. + + A true boolean is found if one of the following is matched: + + - A string starting with 'y' + - A string starting with 'Y' + - A string starting with 't' + - A string starting with 'T' + - A string starting with '1' + + A false boolean is found if one of the following is matched: + + - A string starting with 'n' + - A string starting with 'N' + - A string starting with 'f' + - A string starting with 'F' + - A string starting with '0' + + The notfound value returned if no boolean is identified, does not + necessarily have to be 0 or 1. + */ +/*--------------------------------------------------------------------------*/ +int iniparser_getboolean(dictionary * d, const char * key, int notfound); + + +/*-------------------------------------------------------------------------*/ +/** + @brief Set an entry in a dictionary. + @param ini Dictionary to modify. + @param entry Entry to modify (entry name) + @param val New value to associate to the entry. + @return int 0 if Ok, -1 otherwise. + + If the given entry can be found in the dictionary, it is modified to + contain the provided value. If it cannot be found, -1 is returned. + It is Ok to set val to NULL. + */ +/*--------------------------------------------------------------------------*/ +int iniparser_set(dictionary * ini, const char * entry, const char * val); + + +/*-------------------------------------------------------------------------*/ +/** + @brief Delete an entry in a dictionary + @param ini Dictionary to modify + @param entry Entry to delete (entry name) + @return void + + If the given entry can be found, it is deleted from the dictionary. + */ +/*--------------------------------------------------------------------------*/ +void iniparser_unset(dictionary * ini, const char * entry); + +/*-------------------------------------------------------------------------*/ +/** + @brief Finds out if a given entry exists in a dictionary + @param ini Dictionary to search + @param entry Name of the entry to look for + @return integer 1 if entry exists, 0 otherwise + + Finds out if a given entry exists in the dictionary. Since sections + are stored as keys with NULL associated values, this is the only way + of querying for the presence of sections in a dictionary. + */ +/*--------------------------------------------------------------------------*/ +int iniparser_find_entry(dictionary * ini, const char * entry) ; + +/*-------------------------------------------------------------------------*/ +/** + @brief Parse an ini file and return an allocated dictionary object + @param ininame Name of the ini file to read. + @return Pointer to newly allocated dictionary + + This is the parser for ini files. This function is called, providing + the name of the file to be read. It returns a dictionary object that + should not be accessed directly, but through accessor functions + instead. + + The returned dictionary must be freed using iniparser_freedict(). + */ +/*--------------------------------------------------------------------------*/ +dictionary * iniparser_load(const char * ininame); + +/*-------------------------------------------------------------------------*/ +/** + @brief Free all memory associated to an ini dictionary + @param d Dictionary to free + @return void + + Free all memory associated to an ini dictionary. + It is mandatory to call this function before the dictionary object + gets out of the current context. + */ +/*--------------------------------------------------------------------------*/ +void iniparser_freedict(dictionary * d); + +#endif diff --git a/deps/SZ/sz/include/pastri.h b/deps/SZ/sz/include/pastri.h new file mode 100644 index 0000000000000000000000000000000000000000..b5f2e90ddef1084ff262f88de1556967235913b2 --- /dev/null +++ b/deps/SZ/sz/include/pastri.h @@ -0,0 +1,140 @@ +//CHECK: +//What happens when ECQBits==1, or ECQBits==0 or ECQBits<0? +//Rounding? Scale originalEb by 0.99? + +//Possible improvement: Change GAMESS format: {i i i i d} -> {i}{i}{i}{i}{d} +//Possible improvement: Optimize bookkeeping bits +//Possible improvement: Guess the type (C/UC, Sparse/Not) +//Possible improvement: Get rid of writing/reading some of the indexes to in/out buffers +//Possible improvement: Get rid of all debug stuff, including Makefile debug flags +//Possible improvement: Get rid of "compressedBytes" +//Possible improvement: SparseCompressed, ECQBits=2: 1's and -1's can be represented by just 0 and 1, instead 10 and 11. +//Possible improvement: SparseCompressed, ECQBits>2: Again: 1: 10, -1:11, Others: 0XX...XX +//Possible improvement: WriteBitsFast: maybe remove some masks? +//Possible improvement: WriteBitsFast: Get rid of multiple calls! +//Possible improvement: UCSparse: Indexes use 64 bits. It can be lowered to _1DIdxBits +//Possible improvement: Parameters: Smaller data sizes may be possible! + + + +#ifndef PASTRI_H +#define PASTRI_H + +#include +#include +#include +#include +#include +#include //Just for debugging purposes! + +//#define DATASIZE 8 //Bytes per input data point. +//We have only 1 double per data point, so it is 8 bytes. + +#define MAX_PS_SIZE 100 +#define MAX_BLOCK_SIZE 10000 +#define MAX_BUFSIZE 160000 //Should be a multiple of 8 +#define D_W 0 //Debug switch: Write (input block) +#define D_R 0 //Debug switch: Read (compressed block) +#define D_G 0 //Debug switch: General +#define D_G2 0 //Debug switch: General 2 (a little more detail) +#define D_C 0 //Debug switch: C +//#define DEBUG 1 //Debug switch + +//#define BOOKKEEPINGBITS 0 //Currently unused +//#define BOOKKEEPINGBITS 120 //Includes: mode, indexOffsets, compressedBytes, Pb_, ECQBits_ (8+64+32+8+8) +//BOOKKEEPINGBITS is defined here, because if P & S is going to be used, they appear just after the bookkeeping part. +//This allows us to write P and S directly onto using outBuf. + + +// IMPORTANT NOTE: +//Read/Write up to 56 bits. +//More than that is not supported! + + +/********************************************************************/ +//Datatype Declarations: +/********************************************************************/ +typedef struct pastri_params{ + double originalEb; //Error Bound entered by the user + double usedEb; //Error Bound used during compression/deceompression + + int numBlocks; //Number of blocks to be compressed + int dataSize; //8(=Double) or 4(=Float) + + int bf[4]; //Orbital types (basis function types). Typically in range [0,3] + int idxRange[4]; //Ranges of indexes. idxRange[i]=(bf[i]+1)*(bf[i]+2)/2; + + int sbSize; //=idxRange[2]*idxRange[3]; + int sbNum; //=idxRange[0]*idxRange[1]; + int bSize; //=sbSize*sbNum; + + //uint16_t idxOffset[4]; //Index offset values. No longer used. + +}pastri_params; + +//Block-specific stuff: +typedef struct pastri_blockParams{ + uint16_t nonZeros; + //int ECQ0s; //= p->bSize - numOutliers //OR: p->bSize=ECQ0s+ECQ1s+ECQOthers + int ECQ1s; + int ECQOthers; + int numOutliers; //=ECQ1s+ECQOthers + int patternBits; + int scaleBits; + double binSize; + double scalesBinSize; + uint64_t ECQExt; + int ECQBits; + int _1DIdxBits; +}pastri_blockParams; + +typedef union u_UI64I64D{ + uint64_t ui64; + int64_t i64; + double d; +} u_UI64I64D; + +/********************************************************************/ +//Function Prototypes: +/********************************************************************/ +void SZ_pastriReadParameters(char paramsFilename[512],pastri_params *paramsPtr); +//Read the basic PaSTRI parameters from a file, speficied by paramsFilename. + +void SZ_pastriPreprocessParameters(pastri_params *p); +//Using basic PaSTRI parameters, generate the others. +//For example, block and sub-block sizes are generated by using basis function types. + +void SZ_pastriCompressBatch(pastri_params *p,unsigned char *originalBuf, unsigned char** compressedBufP,size_t *compressedBytes); +//INPUTS: p, originalBuf +//OUTPUTS: compressedBufP, compressedBytes +//Using the inputs, compressedBufP is allocated and populated by the compressed data. Compressed size is written into compressedBytes. +//Parameters are also stored at the beginning part of the compressedBuf + +void SZ_pastriDecompressBatch(unsigned char*compressedBuf, pastri_params *p, unsigned char** decompressedBufP ,size_t *decompressedBytes); +//INPUTS: compressedBuf +//OUTPUTS: p, decompressedBufP, decompressedBytes +//First, parameters are read from compressedBuf and written into p. +//Then, decompressedBufP is allocated and populated by the decompressed data. Decompressed size is written into decompressedBytes. + +void SZ_pastriCheckBatch(pastri_params *p,unsigned char*originalBuf,unsigned char*decompressedBuf); +//INPUTS: p, originalBuf, decompressedBuf +//OUTPUTS: None (Just some on-screen messages) +//Compares originalBuf with decompressedBuf. Checks whether the absolute error condition is satisfied or not. + +/********************************************************************/ +//Other Includes: +/********************************************************************/ + + + +#include "pastriGeneral.h" //General tools +#include "pastriD.h" //Compression/Decompression for Double data +#include "pastriF.h" //Compression/Decompression for Float data + + +#endif + + + + + diff --git a/deps/SZ/sz/include/pastriD.h b/deps/SZ/sz/include/pastriD.h new file mode 100644 index 0000000000000000000000000000000000000000..3ee2813997b308a3c85c13eda317269017e6657a --- /dev/null +++ b/deps/SZ/sz/include/pastriD.h @@ -0,0 +1,911 @@ +#ifndef PASTRID_H +#define PASTRID_H + +static inline int64_t pastri_double_quantize(double x, double binSize){ + //Add or sub 0.5, depending on the sign: + x=x/binSize; + + u_UI64I64D u1,half; + u1.d=x; + + half.d=0.5; + +// //printf("pastri_double_quantize:\nx=%lf x=0x%lx\n",x,(*((uint64_t *)(&x)))); +// //printf("sign(x):0x%lx\n", x); +// //printf("0.5:0x%lx\n", (*((uint64_t *)(&half)))); + half.ui64 |= (u1.ui64 & (uint64_t)0x8000000000000000); +// //printf("sign(x)*0.5:0x%lx\n", (*((uint64_t *)(&half)))); + return (int64_t)(x + half.d); +} + +static inline void pastri_double_PatternMatch(double*data,pastri_params* p,pastri_blockParams* bp,int64_t* patternQ,int64_t *scalesQ, int64_t* ECQ){ + //Find the pattern. + //First, find the extremum point: + double absExt=0; //Absolute value of Extremum + int extIdx=-1; //Index of Extremum + bp->nonZeros=0; + int i,sb; + for(i=0;ibSize;i++){ +// //printf("data[%d] = %.16lf\n",i,data[i]);//DEBUG + if(abs_FastD(data[i])>p->usedEb){ + bp->nonZeros++; + ////if(DEBUG)printf("data[%d]:%.6e\n",i,data[i]); //DEBUG + } + if(abs_FastD(data[i])>absExt){ + absExt=abs_FastD(data[i]); + extIdx=i; + } + } + int patternIdx; //Starting Index of Pattern + patternIdx=(extIdx/p->sbSize)*p->sbSize; + + double patternExt=data[extIdx]; + bp->binSize=2*p->usedEb; + + ////if(DEBUG){printf("Extremum : data[%d] = %.6e\n",extIdx,patternExt);} //DEBUG + ////if(DEBUG){printf("patternIdx: %d\n",patternIdx);} //DEBUG + + ////if(DEBUG){for(i=0;isbSize;i++){printf("pattern[%d]=data[%d]=%.6e Quantized:%d\n",i,patternIdx+i,data[patternIdx+i],pastri_double_quantize(data[patternIdx+i]/binSize) );} }//DEBUG + + //int64_t *patternQ=(int64_t*)(outBuf+15); //Possible Improvement! + + + for(i=0;isbSize;i++){ + patternQ[i]=pastri_double_quantize(data[patternIdx+i],bp->binSize); + //if(D_W){printf("patternQ[%d]=%ld\n",i,patternQ[i]);} + } + + bp->patternBits=bitsNeeded_double((abs_FastD(patternExt)/bp->binSize)+1)+1; + bp->scaleBits=bp->patternBits; + bp->scalesBinSize=1/(double)(((uint64_t)1<<(bp->scaleBits-1))-1); + ////if(DEBUG){printf("(patternExt/binSize)+1: %.6e\n",(patternExt/binSize)+1);} //DEBUG + ////if(DEBUG){printf("scaleBits=patternBits: %d\n",scaleBits);} //DEBUG + //if(D_W){printf("scalesBinSize: %.6e\n",bp->scalesBinSize);} //DEBUG + + //Calculate Scales. + //The index part of the input buffer will be reused to hold Scale, Pattern, etc. values. + int localExtIdx=extIdx%p->sbSize; //Local extremum index. This is not the actual extremum of the current sb, but rather the index that correspond to the global (block) extremum. + //int64_t *scalesQ=(int64_t*)(outBuf+15+p->sbSize*8); //Possible Improvement! + int patternExtZero=(patternExt==0); + ////if(DEBUG){printf("patternExtZero: %d\n",patternExtZero);} //DEBUG + for(sb=0;sbsbNum;sb++){ + //scales[sb]=data[sb*p->sbSize+localExtIdx]/patternExt; + //scales[sb]=patternExtZero ? 0 : data[sb*p->sbSize+localExtIdx]/patternExt; + //assert(scales[sb]<=1); + scalesQ[sb]=pastri_double_quantize((patternExtZero ? 0 : data[sb*p->sbSize+localExtIdx]/patternExt),bp->scalesBinSize); + //if(D_W){printf("scalesQ[%d]=%ld\n",sb,scalesQ[sb]);} + } + ////if(DEBUG){for(i=0;isbSize;i++){printf("scalesQ[%d]=%ld \n",i,scalesQ[i]);}} //DEBUG + + //int64_t *ECQ=(int64_t*)(outBuf+p->bSize*8); //ECQ is written into outBuf, just be careful when handling it. + + //uint64_t wVal; + bp->ECQExt=0; + int _1DIdx; + bp->ECQ1s=0; + bp->ECQOthers=0; + double PS_binSize=bp->scalesBinSize*bp->binSize; + for(sb=0;sbsbNum;sb++){ + for(i=0;isbSize;i++){ + _1DIdx=sb*p->sbSize+i; + ECQ[_1DIdx]=pastri_double_quantize( (scalesQ[sb]*patternQ[i]*PS_binSize-data[_1DIdx]),bp->binSize ); + double absECQ=abs_FastD(ECQ[_1DIdx]); + if(absECQ > bp->ECQExt) + bp->ECQExt=absECQ; + ////if(DEBUG){printf("EC[%d]: %.6e Quantized:%ld \n",_1DIdx,(scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-data[_1DIdx]),ECQ[_1DIdx]);} //DEBUG + switch (ECQ[_1DIdx]){ + case 0: + //ECQ0s++; //Currently not needed + break; + case 1: + bp->ECQ1s++; + break; + case -1: + bp->ECQ1s++; + break; + default: + bp->ECQOthers++; + break; + } + } + } + + /* + //DEBUG: Self-check. Remove this later. + for(sb=0;sbsbNum;sb++){ + for(i=0;isbSize;i++){ + _1DIdx=sb*p->sbSize+i; + double decompressed=scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-ECQ[_1DIdx]*binSize; + if(abs_FastD(decompressed-data[_1DIdx])>(p->usedEb)){ + //printf("p->usedEb=%.6e\n",p->usedEb); + //printf("data[%d]=%.6e decompressed[%d]=%.6e diff=%.6e\n",_1DIdx,data[_1DIdx],_1DIdx,decompressed,abs_FastD(data[_1DIdx]-decompressed)); + assert(0); + } + } + } + */ +} + +static inline void pastri_double_Encode(double *data,int64_t* patternQ,int64_t* scalesQ,int64_t* ECQ,pastri_params *p,pastri_blockParams* bp,unsigned char* outBuf,int *numOutBytes){ + bp->ECQBits=bitsNeeded_UI64(bp->ECQExt)+1; + bp->_1DIdxBits=bitsNeeded_UI64(p->bSize); + //(*numOutBytes)=0; + + int i; + + //Encode: 3 options: + //Compressed, Sparse ECQ + //Compressed, Non-Sparse ECQ + //Uncompressed, Sparse Data + //Uncompressed, Non-spsarse Data + + unsigned int UCSparseBits; //Uncompressed, Sparse bits. Just like the original GAMESS data. Includes: mode, nonZeros, {indexes, data} + unsigned int UCNonSparseBits; //Uncompressed, NonSparse bits. Includes: mode, data + unsigned int CSparseBits; //Includes: mode, compressedBytes, patternBits, ECQBits,numOutliers,P, S, {Indexes(Sparse), ECQ} + unsigned int CNonSparseBits; //Includes: mode, compressedBytes, patternBits, ECQBits,P, S, {ECQ} + //int BOOKKEEPINGBITS=120; //Includes: mode, compressedBytes, patternBits, ECQBits (8+64+32+8+8) //Moved to much earlier! + + //Consider: ECQ0s, ECQ1s, ECQOthers. Number of following values in ECQ: {0}, {1,-1}, { val<=-2, val>=2} + //ECQ0s is actually not needed, but others are needed. + + UCSparseBits = p->dataSize*(1 + 2 + bp->nonZeros*16); //64 bits for 4 indexes, 64 bit for data. + UCNonSparseBits = p->dataSize*(1 + p->bSize*8); + bp->numOutliers=bp->ECQ1s+bp->ECQOthers; + if(bp->ECQBits==2){ + CSparseBits = p->dataSize*(1+4+1+1+2) + bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + bp->ECQ1s*(1+bp->_1DIdxBits); + CNonSparseBits = p->dataSize*(1+4+1+1) + bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + p->bSize + bp->ECQ1s ; //Or: ECQ0s+ECQ1s*2; + }else{ //ECQBits>2 + CSparseBits = p->dataSize*(1+4+1+1+2) + bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + bp->ECQ1s*(2+bp->_1DIdxBits) + bp->ECQOthers*(1+bp->_1DIdxBits+bp->ECQBits); + //CNonSparseBits = 8+32+8+8+ patternBits*p->sbSize + scaleBits*p->sbNum + p->bSize + ECQ0s + ECQ1s*3 + ECQOthers*(2+ECQBits); + CNonSparseBits = p->dataSize*(1+4+1+1)+ bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + p->bSize + bp->ECQ1s*2 + bp->ECQOthers*(1+bp->ECQBits); + } + + int UCSparseBytes=(UCSparseBits+7)/8; + int UCNonSparseBytes=(UCNonSparseBits+7)/8; + int CSparseBytes=(CSparseBits+7)/8; + int CNonSparseBytes=(CNonSparseBits+7)/8; + uint64_t bitPos=0; + uint64_t bytePos=0; + int i0,i1,i2,i3; + int _1DIdx; + + //*(uint16_t*)(&outBuf[1])=p->idxOffset[0]; + //*(uint16_t*)(&outBuf[3])=p->idxOffset[1]; + //*(uint16_t*)(&outBuf[5])=p->idxOffset[2]; + //*(uint16_t*)(&outBuf[7])=p->idxOffset[3]; + + //if(D_W){printf("ECQ0s:%d ECQ1s:%d ECQOthers:%d Total:%d\n",p->bSize-bp->ECQ1s-bp->ECQOthers,bp->ECQ1s,bp->ECQOthers,p->bSize);} //DEBUG + //if(D_W){printf("numOutliers:%d\n",bp->numOutliers);} //DEBUG + + //**************************************************************************************** + //if(0){ //DEBUG + //W:UCSparse + if((UCSparseBytesECQBits); //DEBUG + outBuf[0]=0; //mode + + //*(uint16_t*)(&outBuf[9])=nonZeros; + //bytePos=11;//0:mode, 1-8:indexOffsets 9-10:NonZeros. So start from 11. + *(uint16_t*)(&outBuf[1])=bp->nonZeros; + bytePos=3;//0:mode, 2-3:NonZeros. So start from 3. + + for(i0=0;i0idxRange[0];i0++) + for(i1=0;i1idxRange[1];i1++) + for(i2=0;i2idxRange[2];i2++) + for(i3=0;i3idxRange[3];i3++){ + _1DIdx=p->idxRange[3]*(i2+p->idxRange[2]*(i1+i0*p->idxRange[1]))+i3; + if(abs_FastD(data[_1DIdx])>p->usedEb){ + //*(uint16_t*)(&outBuf[bytePos])=i0+1+p->idxOffset[0]; + *(uint16_t*)(&outBuf[bytePos])=i0; + bytePos+=2; + //*(uint16_t*)(&outBuf[bytePos])=i1+1+p->idxOffset[1]; + *(uint16_t*)(&outBuf[bytePos])=i1; + bytePos+=2; + //*(uint16_t*)(&outBuf[bytePos])=i2+1+p->idxOffset[2]; + *(uint16_t*)(&outBuf[bytePos])=i2; + bytePos+=2; + //*(uint16_t*)(&outBuf[bytePos])=i3+1+p->idxOffset[3]; + *(uint16_t*)(&outBuf[bytePos])=i3; + bytePos+=2; + + *(double*)(&outBuf[bytePos])=data[_1DIdx]; + bytePos+=p->dataSize; + } + } + + //if(D_G)printf("UCSparseBytes:%d \n",UCSparseBytes); //DEBUG + + //**************************************************************************************** + //}else if(0){ //DEBUG + //W:UCNonSparse + }else if((UCNonSparseBytesECQBits); //DEBUG + outBuf[0]=1; //mode + + //memcpy(&outBuf[9], &inBuf[p->bSize*8], UCNonSparseBytes-9); + memcpy(&outBuf[1], data, p->bSize*p->dataSize); + + //if(D_G)printf("UCNonSparseBytes:%d \n",UCNonSparseBytes); //DEBUG + /* + for(i=0;ibSize*8+i]); + } + //printf("\n"); + for(i=0;iECQBits); //DEBUG + ////if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG + outBuf[0]=2; //mode + + ////outBuf bytes [1:8] are indexOffsets, which are already written. outBuf bytes [9:12] are reserved for compressedBytes. + //outBuf[13]=patternBits; + //outBuf[14]=ECQBits; + ////Currently, we are at the end of 15th byte. + //*(uint16_t*)(&outBuf[15])=numOutliers; + //bitPos=17*8; //Currently, we are at the end of 17th byte. + + //outBuf bytes [1:4] are reserved for compressedBytes. + outBuf[5]=bp->patternBits; + outBuf[6]=bp->ECQBits; + //Currently, we are at the end of 7th byte. + + *(uint16_t*)(&outBuf[7])=bp->numOutliers; + //Now, we are at the end of 9th byte. + bitPos=9*8; + + ////if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG + + for(i=0;isbSize;i++){ + writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point + } + ////if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG + for(i=0;isbNum;i++){ + writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale + } + ////if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG + ////if(DEBUG)printf("ECQBits:%d\n",ECQBits); + switch(bp->ECQBits){ + case 2: + for(i=0;ibSize;i++){ + switch(ECQ[i]){ + case 0: + break; + case 1: + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x0\n",i,ECQ[i]); //DEBUG + writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); + //writeBits_Fast(outBuf,&bitPos,2,0x10); + //writeBits_Fast(outBuf,&bitPos,2,0);//0x00 + //writeBits_Fast(outBuf,&bitPos,2,0);//0x00 + writeBits_Fast(outBuf,&bitPos,1,0);//0x00 + break; + case -1: + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG + writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); + //writeBits_Fast(outBuf,&bitPos,2,0x11); + //writeBits_Fast(outBuf,&bitPos,2,1);//0x01 + //writeBits_Fast(outBuf,&bitPos,1,0); + writeBits_Fast(outBuf,&bitPos,1,1); + break; + default: + assert(0); + break; + } + } + break; + default: //ECQBits>2 + for(i=0;ibSize;i++){ + switch(ECQ[i]){ + case 0: + break; + case 1: + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x00\n",i,ECQ[i]); //DEBUG + writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); + //writeBits_Fast(outBuf,&bitPos,3,0);//0x000 + //writeBits_Fast(outBuf,&bitPos,1,0); + writeBits_Fast(outBuf,&bitPos,1,0); + writeBits_Fast(outBuf,&bitPos,1,0); + break; + case -1: + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01\n",i,ECQ[i]); //DEBUG + writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); + //writeBits_Fast(outBuf,&bitPos,3,1);//0x001 + //writeBits_Fast(outBuf,&bitPos,1,0); + writeBits_Fast(outBuf,&bitPos,1,0); + writeBits_Fast(outBuf,&bitPos,1,1); + break; + default: + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1 0x%lx\n",i,ECQ[i],ECQ[i]); //DEBUG + writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); + //writeBits_Fast(outBuf,&bitPos,2+ECQBits,((uint64_t)0x11<ECQBits,ECQ[i]); + break; + } + } + break; + } + + ////if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG + //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG + + + uint32_t bytePos=(bitPos+7)/8; + //*(uint32_t*)(&outBuf[9])=bytePos; + *(uint32_t*)(&outBuf[1])=bytePos; + + //if(D_G)printf("bitPos:%ld CSparseBits:%d bytePos:%d CSparseBytes:%d\n",bitPos,CSparseBits,bytePos,CSparseBytes); //DEBUG + if(D_G){assert(bitPos==CSparseBits);} + + //**************************************************************************************** + //W:CNonSparse + }else { + //Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,P, S, {ECQ} + *numOutBytes=CNonSparseBytes; + //if(D_G){printf("CNonSparse\n");} //DEBUG + //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG + ////if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG + outBuf[0]=3; //mode + + ////outBuf bytes [1:8] are indexOffsets, which are already written. outBuf bytes [9:12] are reserved for compressedBytes. + //outBuf[13]=patternBits; + //outBuf[14]=ECQBits; + //bitPos=15*8; //Currently, we are at the end of 15th byte. + + //outBuf bytes [1:4] are reserved for compressedBytes. + outBuf[5]=bp->patternBits; + outBuf[6]=bp->ECQBits; + bitPos=7*8; //Currently, we are at the end of 7th byte. + + ////if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG + + for(i=0;isbSize;i++){ + writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point + } + ////if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG + for(i=0;isbNum;i++){ + writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale + } + ////if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG + ////if(DEBUG)printf("ECQBits:%d\n",ECQBits); + switch(bp->ECQBits){ + case 2: + for(i=0;ibSize;i++){ + switch(ECQ[i]){ + case 0: + ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x1\n",i,ECQ[i]); //DEBUG + writeBits_Fast(outBuf,&bitPos,1,1);//0x1 + break; + case 1: + ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x00\n",i,ECQ[i]); //DEBUG + //writeBits_Fast(outBuf,&bitPos,2,0);//0x00 + writeBits_Fast(outBuf,&bitPos,1,0); + writeBits_Fast(outBuf,&bitPos,1,0); + break; + case -1: + ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x01\n",i,ECQ[i]); //DEBUG + //writeBits_Fast(outBuf,&bitPos,2,2); //0x01 + writeBits_Fast(outBuf,&bitPos,1,0); + writeBits_Fast(outBuf,&bitPos,1,1); + break; + default: + assert(0); + break; + } + } + break; + default: //ECQBits>2 + ////if(DEBUG) printf("AMG_W1:bitPos:%ld\n",bitPos); //DEBUG + for(i=0;ibSize;i++){ + ////if(DEBUG){printf("AMG_W3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG) printf("AMG_W2:bitPos:%ld\n",bitPos); //DEBUG + ////if(DEBUG) printf("ECQ[%d]:%ld\n",i,ECQ[i]); //DEBUG + switch(ECQ[i]){ + case 0: + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG + ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG + //temp1=bitPos; + writeBits_Fast(outBuf,&bitPos,1,1); //0x1 + //wVal=1; writeBits_Fast(outBuf,&bitPos,1,wVal); //0x1 + ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG + break; + case 1: + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x000\n",i,ECQ[i]); //DEBUG + ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG + //temp1=bitPos; + //writeBits_Fast(outBuf,&bitPos,3,0); //0x000 + writeBits_Fast(outBuf,&bitPos,1,0); + writeBits_Fast(outBuf,&bitPos,1,0); + writeBits_Fast(outBuf,&bitPos,1,0); + //wVal=0; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x000 + ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG + break; + case -1: + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x001\n",i,ECQ[i]); //DEBUG + ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG + //temp1=bitPos; + //writeBits_Fast(outBuf,&bitPos,3,8); //0x001 + writeBits_Fast(outBuf,&bitPos,1,0); + writeBits_Fast(outBuf,&bitPos,1,0); + writeBits_Fast(outBuf,&bitPos,1,1); + //wVal=8; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x001 + ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG + break; + default: + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01 0x%lx\n",i,ECQ[i]); //DEBUG + ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG + //temp1=bitPos; + //writeBits_Fast(outBuf,&bitPos,2,2); //0x01 + writeBits_Fast(outBuf,&bitPos,1,0); + writeBits_Fast(outBuf,&bitPos,1,1); + //wVal=2; writeBits_Fast(outBuf,&bitPos,2,wVal); //0x01 + writeBits_Fast(outBuf,&bitPos,bp->ECQBits,ECQ[i]); + ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG + break; + } + } + break; + } + + ////if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG + //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG + + + + uint32_t bytePos=(bitPos+7)/8; + //*(uint32_t*)(&outBuf[9])=bytePos; + *(uint32_t*)(&outBuf[1])=bytePos; + + //if(D_G)printf("bitPos:%ld CNonSparseBits:%d bytePos:%d CNonSparseBytes:%d\n",bitPos,CNonSparseBits,bytePos,CNonSparseBytes); //DEBUG + if(D_G){assert(bitPos==CNonSparseBits);} + + } + ////for(i=213;i<233;i++)if(DEBUG)printf("AMG_WE:bitPos:%d buffer[%d]=0x%lx\n",i*8,i,*(uint64_t*)(&outBuf[i])); //DEBUG + +} +static inline int pastri_double_Compress(unsigned char*inBuf,pastri_params *p,unsigned char*outBuf,int *numOutBytes){ + pastri_blockParams bp; + + //if(D_G2){printf("Parameters: dataSize:%d\n",p->dataSize);} //DEBUG + //if(D_G2){printf("Parameters: bfs:%d %d %d %d originalEb:%.3e\n",p->bf[0],p->bf[1],p->bf[2],p->bf[3],p->usedEb);} //DEBUG + //if(D_G2){printf("Parameters: idxRanges:%d %d %d %d\n",p->idxRange[0],p->idxRange[1],p->idxRange[2],p->idxRange[3]);} //DEBUG + //if(D_G2){printf("Parameters: sbSize:%d sbNum:%d bSize:%d\n",p->sbSize,p->sbNum,p->bSize); }//DEBUG + + int64_t patternQ[MAX_PS_SIZE]; + int64_t scalesQ[MAX_PS_SIZE]; + int64_t ECQ[MAX_BLOCK_SIZE]; + + double *data; + data=(double*)inBuf; + + //STEP 0: PREPROCESSING: + //This step can include flattening the block, determining the period, etc. + //Currently not needed. + + //STEP 1: PATTERN MATCH + pastri_double_PatternMatch(data,p,&bp,patternQ,scalesQ,ECQ); + + //STEP 2: ENCODING(Include QUANTIZE) + pastri_double_Encode(data,patternQ,scalesQ,ECQ,p,&bp,outBuf,numOutBytes); + + + return 0; +} + +static inline double pastri_double_InverseQuantization(int64_t q, double binSize){ + return q*binSize; +} + +static inline void pastri_double_PredictData(pastri_params *p,pastri_blockParams *bp,double *data,int64_t* patternQ,int64_t* scalesQ,int64_t* ECQ){ + int j; + double PS_binSize=bp->scalesBinSize*bp->binSize; + for(j=0;jbSize;j++){ + //data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*PS_binSize - ECQ[j]*bp->binSize; + data[j]=pastri_double_InverseQuantization(scalesQ[j/p->sbSize]*patternQ[j%p->sbSize],PS_binSize) - pastri_double_InverseQuantization(ECQ[j],bp->binSize); + } +} + +static inline void pastri_double_Decode(unsigned char*inBuf,pastri_params *p,pastri_blockParams *bp,unsigned char*outBuf,int *numReadBytes,int64_t* patternQ,int64_t* scalesQ,int64_t* ECQ){ + int j; + bp->_1DIdxBits=bitsNeeded_UI64(p->bSize); + //double *data=(double*)(outBuf+p->bSize*8); + double *data=(double*)(outBuf); + int i0,i1,i2,i3; + //uint16_t *idx0,*idx1,*idx2,*idx3; + int _1DIdx; + + int64_t ECQTemp; + uint64_t bytePos=0; + uint64_t bitPos=0; + uint64_t temp,temp2; + //int sb,localIdx; + + + //idx0=(uint16_t*)(outBuf ); + //idx1=(uint16_t*)(outBuf+p->bSize*2); + //idx2=(uint16_t*)(outBuf+p->bSize*4); + //idx3=(uint16_t*)(outBuf+p->bSize*6); + //p->idxOffset[0]=*(uint32_t*)(&inBuf[1]); + //p->idxOffset[1]=*(uint32_t*)(&inBuf[3]); + //p->idxOffset[2]=*(uint32_t*)(&inBuf[5]); + //p->idxOffset[3]=*(uint32_t*)(&inBuf[7]); + /* + for(i0=0;i0idxRange[0];i0++) + for(i1=0;i1idxRange[1];i1++) + for(i2=0;i2idxRange[2];i2++) + for(i3=0;i3idxRange[3];i3++){ + //_1DIdx=i0*p->idxRange[1]*p->idxRange[2]*p->idxRange[3]+i1*p->idxRange[2]*p->idxRange[3]+i2*p->idxRange[3]+i3; + _1DIdx=p->idxRange[3]*(i2+p->idxRange[2]*(i1+i0*p->idxRange[1]))+i3; + idx0[_1DIdx]=i0+1+p->idxOffset[0]; + idx1[_1DIdx]=i1+1+p->idxOffset[1]; + idx2[_1DIdx]=i2+1+p->idxOffset[2]; + idx3[_1DIdx]=i3+1+p->idxOffset[3]; + } + */ + + //*numOutBytes=p->bSize*16; + + //inBuf[0] is "mode" + switch(inBuf[0]){ + //R:UCSparse + case 0: + //if(D_G){printf("\nDC:UCSparse\n");} //DEBUG + //bp->nonZeros=*(uint16_t*)(&inBuf[9]); + //bytePos=11; + bp->nonZeros=*(uint16_t*)(&inBuf[1]); + bytePos=3; + for(j=0;jbSize;j++){ + data[j]=0; + } + for(j=0;jnonZeros;j++){ + //i0=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[0]; //i0 + i0=*(uint16_t*)(&inBuf[bytePos]); //i0 + bytePos+=2; + //i1=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[1]; //i1 + i1=*(uint16_t*)(&inBuf[bytePos]); //i1 + bytePos+=2; + //i2=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[2]; //i2 + i2=*(uint16_t*)(&inBuf[bytePos]); //i2 + bytePos+=2; + //i3=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[3]; //i3 + i3=*(uint16_t*)(&inBuf[bytePos]); //i3 + bytePos+=2; + _1DIdx=p->idxRange[3]*(i2+p->idxRange[2]*(i1+i0*p->idxRange[1]))+i3; + data[_1DIdx]=*(double*)(&inBuf[bytePos]); + bytePos+=8; + } + //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG + break; + //R:UCNonSparse + case 1: + //if(D_G){printf("\nDC:UCNonSparse\n");} //DEBUG + //memcpy(&outBuf[p->bSize*8], &inBuf[9], p->bSize*8); + memcpy(data, &inBuf[1], p->bSize*8); + bytePos=p->bSize*8; + //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG + break; + //R:CSparse + case 2: + //if(D_G){printf("\nDC:CSparse\n");} //DEBUG + //for(j=0;jbSize;j++){ + // data[j]=0; + //} + + //bp->patternBits=inBuf[13]; + //bp->ECQBits=inBuf[14]; + + bp->patternBits=inBuf[5]; + bp->ECQBits=inBuf[6]; + + //if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG + + //bp->numOutliers=*(uint16_t*)(&inBuf[15]); + //bitPos=17*8; + bp->numOutliers=*(uint16_t*)(&inBuf[7]); + bitPos=9*8; + //if(D_R){printf("bp->numOutliers:%d\n",bp->numOutliers);} //DEBUG + + bp->scalesBinSize=1/(double)(((uint64_t)1<<(bp->patternBits-1))-1); + + bp->binSize=p->usedEb*2; + + //if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG + + for(j=0;jsbSize;j++){ + patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point + //if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);} + } + for(j=0;jsbNum;j++){ + scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale + //if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);} + } + + /* //Splitting + for(j=0;jbSize;j++){ + data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*bp->scalesBinSize*bp->binSize; + } + */ + for(j=0;jbSize;j++){ + ECQ[j]=0; + } + switch(bp->ECQBits){ + case 2: + for(j=0;jnumOutliers;j++){ + ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG + ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG + + _1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); + ECQTemp=readBits_I64(inBuf,&bitPos,1); + ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; + ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); + //continue; + //sb=_1DIdx/p->sbSize; + //localIdx=_1DIdx%p->sbSize; + + ////data[_1DIdx]-=ECQTemp*bp->binSize;//Splitting + ECQ[_1DIdx]=ECQTemp; + + ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG + } + break; + default: //bp->ECQBits>2 + //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: bp->ECQBits:%d bp->numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG + + for(j=0;jnumOutliers;j++){ + _1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); + //sb=_1DIdx/p->sbSize; + //localIdx=_1DIdx%p->sbSize; + temp=readBits_UI64(inBuf,&bitPos,1); + ////if(DEBUG){printf("temp:%ld\n",temp);} //DEBUG + switch(temp){ + case 0: //+-1 + ECQTemp=readBits_I64(inBuf,&bitPos,1); + ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; + ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG + ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); + break; + case 1: //Others + ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits); + ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG + ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); + break; + //default: + //// printf("ERROR: Bad 2-bit value: 0x%lx",temp); + // assert(0); //AMG + // break; + } + + //data[_1DIdx]-=ECQTemp*bp->binSize;//Splitting + ECQ[_1DIdx]=ECQTemp; + + ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG + } + break; + } + //static inline uint64_t readBits_UI64(unsigned char* buffer,uint64_t *bitPosPtr,uint64_t numBits){ // numBits must be in range [0:56] + //patternQ=(int64_t*)(inBuf+15); + //scalesQ=(int64_t*)(inBuf+15+p->sbSize*8); + + bytePos=(bitPos+7)/8; + //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG + + //STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION) + pastri_double_PredictData(p,bp,data,patternQ,scalesQ,ECQ); + + break; + //R:CNonSparse + case 3: + //if(D_G){printf("\nDC:CNonSparse\n");} //DEBUG + + //for(j=0;jbSize;j++){ + // data[j]=0; + //} + + //bp->patternBits=inBuf[13]; + //bp->ECQBits=inBuf[14]; + + bp->patternBits=inBuf[5]; + bp->ECQBits=inBuf[6]; + + //if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG + + //bitPos=15*8; + bitPos=7*8; + + bp->scalesBinSize=1/(double)(((uint64_t)1<<(bp->patternBits-1))-1); + bp->binSize=p->usedEb*2; + + //if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG + + for(j=0;jsbSize;j++){ + patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point + //if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);} + } + for(j=0;jsbNum;j++){ + scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale + //if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);} + } + /* //Splitting + for(j=0;jbSize;j++){ + data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*bp->scalesBinSize*bp->binSize; + ////if(DEBUG){printf("DC:PS[%d]=%.6e\n",j,data[j]);} + } + */ + switch(bp->ECQBits){ + case 2: + for(j=0;jbSize;j++){ + ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG + ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG + //_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); + temp=readBits_UI64(inBuf,&bitPos,1); + switch(temp){ + case 0: + ECQTemp=readBits_I64(inBuf,&bitPos,1); + ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; + break; + case 1: + ECQTemp=0; + break; + default: + assert(0); + break; + } + + ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG + //continue; + //sb=_1DIdx/p->sbSize; + //localIdx=_1DIdx%p->sbSize; + + //data[j]-=ECQTemp*bp->binSize; //Splitting + ECQ[j]=ECQTemp; + + ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG + } + break; + default: //bp->ECQBits>2 + ////if(DEBUG)printf("AMG_R1:bitPos: %ld\n",bitPos); + + for(j=0;jbSize;j++){ + ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG)printf("AMG_R2:bitPos: %ld\n",bitPos); + + ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG + ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG + //_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); + temp=readBits_UI64(inBuf,&bitPos,1); + ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG + switch(temp){ + case 0: + ////if(DEBUG)printf("Read:0"); + temp2=readBits_UI64(inBuf,&bitPos,1); + switch(temp2){ + case 0: + ////if(DEBUG)printf("0"); + ECQTemp=readBits_I64(inBuf,&bitPos,1); + ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG)printf("R:ECQTemp:%ld\n",ECQTemp); + ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; + ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); + break; + case 1: + ////if(DEBUG)printf("1\n"); + ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits); + ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); + break; + default: + assert(0); + break; + } + break; + case 1: + ////if(DEBUG)printf("Read:1\n"); + ECQTemp=0; + ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); + break; + default: + assert(0); + break; + } + + ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG + //continue; + //sb=_1DIdx/p->sbSize; + //localIdx=_1DIdx%p->sbSize; + + //data[j]-=ECQTemp*bp->binSize; //Splitting + ECQ[j]=ECQTemp; + + ////if(DEBUG){printf("DC:data[%d]:%.6e\n",j,data[j]);} //DEBUG + } + break; + } + //static inline uint64_t readBits_UI64(unsigned char* buffer,uint64_t *bitPosPtr,uint64_t numBits){ // numBits must be in range [0:56] + //patternQ=(int64_t*)(inBuf+15); + //scalesQ=(int64_t*)(inBuf+15+p->sbSize*8); + bytePos=(bitPos+7)/8; + //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG + + //STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION) + pastri_double_PredictData(p,bp,data,patternQ,scalesQ,ECQ); + break; + + default: + assert(0); + break; + } + (*numReadBytes)=bytePos; +} + +static inline void pastri_double_Decompress(unsigned char*inBuf,int dataSize,pastri_params *p,unsigned char*outBuf,int *numReadBytes){ + int64_t patternQ[MAX_PS_SIZE]; + int64_t scalesQ[MAX_PS_SIZE]; + int64_t ECQ[MAX_BLOCK_SIZE]; + + pastri_blockParams bp; + + //STEP 1: DECODE (Includes PREDICT DATA(Includes INVERSE QUANTIZATION)) + //(Further steps are called inside pastri_double_Decode function) + pastri_double_Decode(inBuf,p,&bp,outBuf,numReadBytes,patternQ,scalesQ,ECQ); + + return; +} + +//inBuf vs Decompressed +static inline int pastri_double_Check(unsigned char*inBuf,int dataSize,unsigned char*DC,pastri_params *p){ + int i; + + double *data=(double*)(inBuf); + double *data_dc=(double*)(DC); + + //Comparing Indexes: + /* + for(i=0;ibSize;i++){ + if(idx0[i]!=idx0_dc[i]){ + //printf("idx0[%d]=%d != %d=idx0_dc[%d]",i,idx0[i],idx0_dc[i],i); + assert(0); + } + if(idx1[i]!=idx1_dc[i]){ + //printf("idx1[%d]=%d != %d=idx1_dc[%d]",i,idx1[i],idx1_dc[i],i); + assert(0); + } + if(idx2[i]!=idx2_dc[i]){ + //printf("idx2[%d]=%d != %d=idx2_dc[%d]",i,idx2[i],idx2_dc[i],i); + assert(0); + } + if(idx3[i]!=idx3_dc[i]){ + //printf("idx3[%d]=%d != %d=idx3_dc[%d]",i,idx3[i],idx3_dc[i],i); + assert(0); + } + } + */ + + //Comparing Data: + for(i=0;ibSize;i++){ + if(abs_FastD(data[i]-data_dc[i])>p->usedEb){ + //printf("|data[%d]-data_dc[%d]|>originalEb : %.3e - %.3e = %.3e > %.3e\n",i,i,data[i],data_dc[i],abs_FastD(data[i]-data_dc[i]),p->usedEb); + assert(0); + } + } + return 0; +} + + +#endif diff --git a/deps/SZ/sz/include/pastriF.h b/deps/SZ/sz/include/pastriF.h new file mode 100644 index 0000000000000000000000000000000000000000..5c1d5879649e34636e20b383a4ac7bb818eea0e8 --- /dev/null +++ b/deps/SZ/sz/include/pastriF.h @@ -0,0 +1,911 @@ +#ifndef PASTRIF_H +#define PASTRIF_H + +static inline int64_t pastri_float_quantize(float x, float binSize){ + //Add or sub 0.5, depending on the sign: + x=x/binSize; + + u_UI64I64D u1,half; + u1.d=x; + + half.d=0.5; + + ////printf("pastri_float_quantize:\nx=%lf x=0x%lx\n",x,(*((uint64_t *)(&x)))); + ////printf("sign(x):0x%lx\n", x); + ////printf("0.5:0x%lx\n", (*((uint64_t *)(&half)))); + half.ui64 |= (u1.ui64 & (uint64_t)0x8000000000000000); + ////printf("sign(x)*0.5:0x%lx\n", (*((uint64_t *)(&half)))); + return (int64_t)(x + half.d); +} + +static inline void pastri_float_PatternMatch(float*data,pastri_params* p,pastri_blockParams* bp,int64_t* patternQ,int64_t *scalesQ, int64_t* ECQ){ + //Find the pattern. + //First, find the extremum point: + float absExt=0; //Absolute value of Extremum + int extIdx=-1; //Index of Extremum + bp->nonZeros=0; + int i,sb; + for(i=0;ibSize;i++){ + ////printf("data[%d] = %.16lf\n",i,data[i]);//DEBUG + if(abs_FastD(data[i])>p->usedEb){ + bp->nonZeros++; + ////if(DEBUG)printf("data[%d]:%.6e\n",i,data[i]); //DEBUG + } + if(abs_FastD(data[i])>absExt){ + absExt=abs_FastD(data[i]); + extIdx=i; + } + } + int patternIdx; //Starting Index of Pattern + patternIdx=(extIdx/p->sbSize)*p->sbSize; + + float patternExt=data[extIdx]; + bp->binSize=2*p->usedEb; + + ////if(DEBUG){printf("Extremum : data[%d] = %.6e\n",extIdx,patternExt);} //DEBUG + ////if(DEBUG){printf("patternIdx: %d\n",patternIdx);} //DEBUG + + ////if(DEBUG){for(i=0;isbSize;i++){printf("pattern[%d]=data[%d]=%.6e Quantized:%d\n",i,patternIdx+i,data[patternIdx+i],pastri_float_quantize(data[patternIdx+i]/binSize) );} }//DEBUG + + //int64_t *patternQ=(int64_t*)(outBuf+15); //Possible Improvement! + + + for(i=0;isbSize;i++){ + patternQ[i]=pastri_float_quantize(data[patternIdx+i],bp->binSize); + //if(D_W){printf("patternQ[%d]=%ld\n",i,patternQ[i]);} + } + + bp->patternBits=bitsNeeded_float((abs_FastD(patternExt)/bp->binSize)+1)+1; + bp->scaleBits=bp->patternBits; + bp->scalesBinSize=1/(float)(((uint64_t)1<<(bp->scaleBits-1))-1); + ////if(DEBUG){printf("(patternExt/binSize)+1: %.6e\n",(patternExt/binSize)+1);} //DEBUG + ////if(DEBUG){printf("scaleBits=patternBits: %d\n",scaleBits);} //DEBUG + //if(D_W){printf("scalesBinSize: %.6e\n",bp->scalesBinSize);} //DEBUG + + //Calculate Scales. + //The index part of the input buffer will be reused to hold Scale, Pattern, etc. values. + int localExtIdx=extIdx%p->sbSize; //Local extremum index. This is not the actual extremum of the current sb, but rather the index that correspond to the global (block) extremum. + //int64_t *scalesQ=(int64_t*)(outBuf+15+p->sbSize*8); //Possible Improvement! + int patternExtZero=(patternExt==0); + ////if(DEBUG){printf("patternExtZero: %d\n",patternExtZero);} //DEBUG + for(sb=0;sbsbNum;sb++){ + //scales[sb]=data[sb*p->sbSize+localExtIdx]/patternExt; + //scales[sb]=patternExtZero ? 0 : data[sb*p->sbSize+localExtIdx]/patternExt; + //assert(scales[sb]<=1); + scalesQ[sb]=pastri_float_quantize((patternExtZero ? 0 : data[sb*p->sbSize+localExtIdx]/patternExt),bp->scalesBinSize); + //if(D_W){printf("scalesQ[%d]=%ld\n",sb,scalesQ[sb]);} + } + ////if(DEBUG){for(i=0;isbSize;i++){printf("scalesQ[%d]=%ld \n",i,scalesQ[i]);}} //DEBUG + + //int64_t *ECQ=(int64_t*)(outBuf+p->bSize*8); //ECQ is written into outBuf, just be careful when handling it. + + //uint64_t wVal; + bp->ECQExt=0; + int _1DIdx; + bp->ECQ1s=0; + bp->ECQOthers=0; + float PS_binSize=bp->scalesBinSize*bp->binSize; + for(sb=0;sbsbNum;sb++){ + for(i=0;isbSize;i++){ + _1DIdx=sb*p->sbSize+i; + ECQ[_1DIdx]=pastri_float_quantize( (scalesQ[sb]*patternQ[i]*PS_binSize-data[_1DIdx]),bp->binSize ); + float absECQ=abs_FastD(ECQ[_1DIdx]); + if(absECQ > bp->ECQExt) + bp->ECQExt=absECQ; + ////if(DEBUG){printf("EC[%d]: %.6e Quantized:%ld \n",_1DIdx,(scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-data[_1DIdx]),ECQ[_1DIdx]);} //DEBUG + switch (ECQ[_1DIdx]){ + case 0: + //ECQ0s++; //Currently not needed + break; + case 1: + bp->ECQ1s++; + break; + case -1: + bp->ECQ1s++; + break; + default: + bp->ECQOthers++; + break; + } + } + } + + /* + //DEBUG: Self-check. Remove this later. + for(sb=0;sbsbNum;sb++){ + for(i=0;isbSize;i++){ + _1DIdx=sb*p->sbSize+i; + float decompressed=scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-ECQ[_1DIdx]*binSize; + if(abs_FastD(decompressed-data[_1DIdx])>(p->usedEb)){ + //printf("p->usedEb=%.6e\n",p->usedEb); + //printf("data[%d]=%.6e decompressed[%d]=%.6e diff=%.6e\n",_1DIdx,data[_1DIdx],_1DIdx,decompressed,abs_FastD(data[_1DIdx]-decompressed)); + assert(0); + } + } + } + */ +} + +static inline void pastri_float_Encode(float *data,int64_t* patternQ,int64_t* scalesQ,int64_t* ECQ,pastri_params *p,pastri_blockParams* bp,unsigned char* outBuf,int *numOutBytes){ + bp->ECQBits=bitsNeeded_UI64(bp->ECQExt)+1; + bp->_1DIdxBits=bitsNeeded_UI64(p->bSize); + //(*numOutBytes)=0; + + int i; + + //Encode: 3 options: + //Compressed, Sparse ECQ + //Compressed, Non-Sparse ECQ + //Uncompressed, Sparse Data + //Uncompressed, Non-spsarse Data + + unsigned int UCSparseBits; //Uncompressed, Sparse bits. Just like the original GAMESS data. Includes: mode, nonZeros, {indexes, data} + unsigned int UCNonSparseBits; //Uncompressed, NonSparse bits. Includes: mode, data + unsigned int CSparseBits; //Includes: mode, compressedBytes, patternBits, ECQBits,numOutliers,P, S, {Indexes(Sparse), ECQ} + unsigned int CNonSparseBits; //Includes: mode, compressedBytes, patternBits, ECQBits,P, S, {ECQ} + //int BOOKKEEPINGBITS=120; //Includes: mode, compressedBytes, patternBits, ECQBits (8+64+32+8+8) //Moved to much earlier! + + //Consider: ECQ0s, ECQ1s, ECQOthers. Number of following values in ECQ: {0}, {1,-1}, { val<=-2, val>=2} + //ECQ0s is actually not needed, but others are needed. + + UCSparseBits = p->dataSize*(1 + 2 + bp->nonZeros*16); //64 bits for 4 indexes, 64 bit for data. + UCNonSparseBits = p->dataSize*(1 + p->bSize*8); + bp->numOutliers=bp->ECQ1s+bp->ECQOthers; + if(bp->ECQBits==2){ + CSparseBits = p->dataSize*(1+4+1+1+2) + bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + bp->ECQ1s*(1+bp->_1DIdxBits); + CNonSparseBits = p->dataSize*(1+4+1+1) + bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + p->bSize + bp->ECQ1s ; //Or: ECQ0s+ECQ1s*2; + }else{ //ECQBits>2 + CSparseBits = p->dataSize*(1+4+1+1+2) + bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + bp->ECQ1s*(2+bp->_1DIdxBits) + bp->ECQOthers*(1+bp->_1DIdxBits+bp->ECQBits); + //CNonSparseBits = 8+32+8+8+ patternBits*p->sbSize + scaleBits*p->sbNum + p->bSize + ECQ0s + ECQ1s*3 + ECQOthers*(2+ECQBits); + CNonSparseBits = p->dataSize*(1+4+1+1)+ bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + p->bSize + bp->ECQ1s*2 + bp->ECQOthers*(1+bp->ECQBits); + } + + int UCSparseBytes=(UCSparseBits+7)/8; + int UCNonSparseBytes=(UCNonSparseBits+7)/8; + int CSparseBytes=(CSparseBits+7)/8; + int CNonSparseBytes=(CNonSparseBits+7)/8; + uint64_t bitPos=0; + uint64_t bytePos=0; + int i0,i1,i2,i3; + int _1DIdx; + + //*(uint16_t*)(&outBuf[1])=p->idxOffset[0]; + //*(uint16_t*)(&outBuf[3])=p->idxOffset[1]; + //*(uint16_t*)(&outBuf[5])=p->idxOffset[2]; + //*(uint16_t*)(&outBuf[7])=p->idxOffset[3]; + + //if(D_W){printf("ECQ0s:%d ECQ1s:%d ECQOthers:%d Total:%d\n",p->bSize-bp->ECQ1s-bp->ECQOthers,bp->ECQ1s,bp->ECQOthers,p->bSize);} //DEBUG + //if(D_W){printf("numOutliers:%d\n",bp->numOutliers);} //DEBUG + + //**************************************************************************************** + //if(0){ //DEBUG + //W:UCSparse + if((UCSparseBytesECQBits); //DEBUG + outBuf[0]=0; //mode + + //*(uint16_t*)(&outBuf[9])=nonZeros; + //bytePos=11;//0:mode, 1-8:indexOffsets 9-10:NonZeros. So start from 11. + *(uint16_t*)(&outBuf[1])=bp->nonZeros; + bytePos=3;//0:mode, 2-3:NonZeros. So start from 3. + + for(i0=0;i0idxRange[0];i0++) + for(i1=0;i1idxRange[1];i1++) + for(i2=0;i2idxRange[2];i2++) + for(i3=0;i3idxRange[3];i3++){ + _1DIdx=p->idxRange[3]*(i2+p->idxRange[2]*(i1+i0*p->idxRange[1]))+i3; + if(abs_FastD(data[_1DIdx])>p->usedEb){ + //*(uint16_t*)(&outBuf[bytePos])=i0+1+p->idxOffset[0]; + *(uint16_t*)(&outBuf[bytePos])=i0; + bytePos+=2; + //*(uint16_t*)(&outBuf[bytePos])=i1+1+p->idxOffset[1]; + *(uint16_t*)(&outBuf[bytePos])=i1; + bytePos+=2; + //*(uint16_t*)(&outBuf[bytePos])=i2+1+p->idxOffset[2]; + *(uint16_t*)(&outBuf[bytePos])=i2; + bytePos+=2; + //*(uint16_t*)(&outBuf[bytePos])=i3+1+p->idxOffset[3]; + *(uint16_t*)(&outBuf[bytePos])=i3; + bytePos+=2; + + *(float*)(&outBuf[bytePos])=data[_1DIdx]; + bytePos+=p->dataSize; + } + } + + //if(D_G)printf("UCSparseBytes:%d \n",UCSparseBytes); //DEBUG + + //**************************************************************************************** + //}else if(0){ //DEBUG + //W:UCNonSparse + }else if((UCNonSparseBytesECQBits); //DEBUG + outBuf[0]=1; //mode + + //memcpy(&outBuf[9], &inBuf[p->bSize*8], UCNonSparseBytes-9); + memcpy(&outBuf[1], data, p->bSize*p->dataSize); + + //if(D_G)printf("UCNonSparseBytes:%d \n",UCNonSparseBytes); //DEBUG + /* + for(i=0;ibSize*8+i]); + } + //printf("\n"); + for(i=0;iECQBits); //DEBUG + ////if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG + outBuf[0]=2; //mode + + ////outBuf bytes [1:8] are indexOffsets, which are already written. outBuf bytes [9:12] are reserved for compressedBytes. + //outBuf[13]=patternBits; + //outBuf[14]=ECQBits; + ////Currently, we are at the end of 15th byte. + //*(uint16_t*)(&outBuf[15])=numOutliers; + //bitPos=17*8; //Currently, we are at the end of 17th byte. + + //outBuf bytes [1:4] are reserved for compressedBytes. + outBuf[5]=bp->patternBits; + outBuf[6]=bp->ECQBits; + //Currently, we are at the end of 7th byte. + + *(uint16_t*)(&outBuf[7])=bp->numOutliers; + //Now, we are at the end of 9th byte. + bitPos=9*8; + + ////if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG + + for(i=0;isbSize;i++){ + writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point + } + ////if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG + for(i=0;isbNum;i++){ + writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale + } + ////if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG + ////if(DEBUG)printf("ECQBits:%d\n",ECQBits); + switch(bp->ECQBits){ + case 2: + for(i=0;ibSize;i++){ + switch(ECQ[i]){ + case 0: + break; + case 1: + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x0\n",i,ECQ[i]); //DEBUG + writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); + //writeBits_Fast(outBuf,&bitPos,2,0x10); + //writeBits_Fast(outBuf,&bitPos,2,0);//0x00 + //writeBits_Fast(outBuf,&bitPos,2,0);//0x00 + writeBits_Fast(outBuf,&bitPos,1,0);//0x00 + break; + case -1: + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG + writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); + //writeBits_Fast(outBuf,&bitPos,2,0x11); + //writeBits_Fast(outBuf,&bitPos,2,1);//0x01 + //writeBits_Fast(outBuf,&bitPos,1,0); + writeBits_Fast(outBuf,&bitPos,1,1); + break; + default: + assert(0); + break; + } + } + break; + default: //ECQBits>2 + for(i=0;ibSize;i++){ + switch(ECQ[i]){ + case 0: + break; + case 1: + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x00\n",i,ECQ[i]); //DEBUG + writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); + //writeBits_Fast(outBuf,&bitPos,3,0);//0x000 + //writeBits_Fast(outBuf,&bitPos,1,0); + writeBits_Fast(outBuf,&bitPos,1,0); + writeBits_Fast(outBuf,&bitPos,1,0); + break; + case -1: + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01\n",i,ECQ[i]); //DEBUG + writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); + //writeBits_Fast(outBuf,&bitPos,3,1);//0x001 + //writeBits_Fast(outBuf,&bitPos,1,0); + writeBits_Fast(outBuf,&bitPos,1,0); + writeBits_Fast(outBuf,&bitPos,1,1); + break; + default: + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1 0x%lx\n",i,ECQ[i],ECQ[i]); //DEBUG + writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); + //writeBits_Fast(outBuf,&bitPos,2+ECQBits,((uint64_t)0x11<ECQBits,ECQ[i]); + break; + } + } + break; + } + + ////if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG + //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG + + + uint32_t bytePos=(bitPos+7)/8; + //*(uint32_t*)(&outBuf[9])=bytePos; + *(uint32_t*)(&outBuf[1])=bytePos; + + //if(D_G)printf("bitPos:%ld CSparseBits:%d bytePos:%d CSparseBytes:%d\n",bitPos,CSparseBits,bytePos,CSparseBytes); //DEBUG + if(D_G){assert(bitPos==CSparseBits);} + + //**************************************************************************************** + //W:CNonSparse + }else { + //Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,P, S, {ECQ} + *numOutBytes=CNonSparseBytes; + //if(D_G){printf("CNonSparse\n");} //DEBUG + //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG + ////if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG + outBuf[0]=3; //mode + + ////outBuf bytes [1:8] are indexOffsets, which are already written. outBuf bytes [9:12] are reserved for compressedBytes. + //outBuf[13]=patternBits; + //outBuf[14]=ECQBits; + //bitPos=15*8; //Currently, we are at the end of 15th byte. + + //outBuf bytes [1:4] are reserved for compressedBytes. + outBuf[5]=bp->patternBits; + outBuf[6]=bp->ECQBits; + bitPos=7*8; //Currently, we are at the end of 7th byte. + + ////if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG + + for(i=0;isbSize;i++){ + writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point + } + ////if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG + for(i=0;isbNum;i++){ + writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale + } + ////if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG + ////if(DEBUG)printf("ECQBits:%d\n",ECQBits); + switch(bp->ECQBits){ + case 2: + for(i=0;ibSize;i++){ + switch(ECQ[i]){ + case 0: + ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x1\n",i,ECQ[i]); //DEBUG + writeBits_Fast(outBuf,&bitPos,1,1);//0x1 + break; + case 1: + ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x00\n",i,ECQ[i]); //DEBUG + //writeBits_Fast(outBuf,&bitPos,2,0);//0x00 + writeBits_Fast(outBuf,&bitPos,1,0); + writeBits_Fast(outBuf,&bitPos,1,0); + break; + case -1: + ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x01\n",i,ECQ[i]); //DEBUG + //writeBits_Fast(outBuf,&bitPos,2,2); //0x01 + writeBits_Fast(outBuf,&bitPos,1,0); + writeBits_Fast(outBuf,&bitPos,1,1); + break; + default: + assert(0); + break; + } + } + break; + default: //ECQBits>2 + ////if(DEBUG) printf("AMG_W1:bitPos:%ld\n",bitPos); //DEBUG + for(i=0;ibSize;i++){ + ////if(DEBUG){printf("AMG_W3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG) printf("AMG_W2:bitPos:%ld\n",bitPos); //DEBUG + ////if(DEBUG) printf("ECQ[%d]:%ld\n",i,ECQ[i]); //DEBUG + switch(ECQ[i]){ + case 0: + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG + ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG + //temp1=bitPos; + writeBits_Fast(outBuf,&bitPos,1,1); //0x1 + //wVal=1; writeBits_Fast(outBuf,&bitPos,1,wVal); //0x1 + ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG + break; + case 1: + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x000\n",i,ECQ[i]); //DEBUG + ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG + //temp1=bitPos; + //writeBits_Fast(outBuf,&bitPos,3,0); //0x000 + writeBits_Fast(outBuf,&bitPos,1,0); + writeBits_Fast(outBuf,&bitPos,1,0); + writeBits_Fast(outBuf,&bitPos,1,0); + //wVal=0; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x000 + ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG + break; + case -1: + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x001\n",i,ECQ[i]); //DEBUG + ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG + //temp1=bitPos; + //writeBits_Fast(outBuf,&bitPos,3,8); //0x001 + writeBits_Fast(outBuf,&bitPos,1,0); + writeBits_Fast(outBuf,&bitPos,1,0); + writeBits_Fast(outBuf,&bitPos,1,1); + //wVal=8; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x001 + ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG + break; + default: + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01 0x%lx\n",i,ECQ[i]); //DEBUG + ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG + //temp1=bitPos; + //writeBits_Fast(outBuf,&bitPos,2,2); //0x01 + writeBits_Fast(outBuf,&bitPos,1,0); + writeBits_Fast(outBuf,&bitPos,1,1); + //wVal=2; writeBits_Fast(outBuf,&bitPos,2,wVal); //0x01 + writeBits_Fast(outBuf,&bitPos,bp->ECQBits,ECQ[i]); + ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG + break; + } + } + break; + } + + ////if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG + //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG + + + + uint32_t bytePos=(bitPos+7)/8; + //*(uint32_t*)(&outBuf[9])=bytePos; + *(uint32_t*)(&outBuf[1])=bytePos; + + //if(D_G)printf("bitPos:%ld CNonSparseBits:%d bytePos:%d CNonSparseBytes:%d\n",bitPos,CNonSparseBits,bytePos,CNonSparseBytes); //DEBUG + if(D_G){assert(bitPos==CNonSparseBits);} + + } + ////for(i=213;i<233;i++)if(DEBUG)printf("AMG_WE:bitPos:%d buffer[%d]=0x%lx\n",i*8,i,*(uint64_t*)(&outBuf[i])); //DEBUG + +} +static inline int pastri_float_Compress(unsigned char*inBuf,pastri_params *p,unsigned char*outBuf,int *numOutBytes){ + pastri_blockParams bp; + + //if(D_G2){printf("Parameters: dataSize:%d\n",p->dataSize);} //DEBUG + //if(D_G2){printf("Parameters: bfs:%d %d %d %d originalEb:%.3e\n",p->bf[0],p->bf[1],p->bf[2],p->bf[3],p->usedEb);} //DEBUG + //if(D_G2){printf("Parameters: idxRanges:%d %d %d %d\n",p->idxRange[0],p->idxRange[1],p->idxRange[2],p->idxRange[3]);} //DEBUG + //if(D_G2){printf("Parameters: sbSize:%d sbNum:%d bSize:%d\n",p->sbSize,p->sbNum,p->bSize); }//DEBUG + + int64_t patternQ[MAX_PS_SIZE]; + int64_t scalesQ[MAX_PS_SIZE]; + int64_t ECQ[MAX_BLOCK_SIZE]; + + float *data; + data=(float*)inBuf; + + //STEP 0: PREPROCESSING: + //This step can include flattening the block, determining the period, etc. + //Currently not needed. + + //STEP 1: PATTERN MATCH + pastri_float_PatternMatch(data,p,&bp,patternQ,scalesQ,ECQ); + + //STEP 2: ENCODING(Include QUANTIZE) + pastri_float_Encode(data,patternQ,scalesQ,ECQ,p,&bp,outBuf,numOutBytes); + + + return 0; +} + +static inline float pastri_float_InverseQuantization(int64_t q, float binSize){ + return q*binSize; +} + +static inline void pastri_float_PredictData(pastri_params *p,pastri_blockParams *bp,float *data,int64_t* patternQ,int64_t* scalesQ,int64_t* ECQ){ + int j; + float PS_binSize=bp->scalesBinSize*bp->binSize; + for(j=0;jbSize;j++){ + //data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*PS_binSize - ECQ[j]*bp->binSize; + data[j]=pastri_float_InverseQuantization(scalesQ[j/p->sbSize]*patternQ[j%p->sbSize],PS_binSize) - pastri_float_InverseQuantization(ECQ[j],bp->binSize); + } +} + +static inline void pastri_float_Decode(unsigned char*inBuf,pastri_params *p,pastri_blockParams *bp,unsigned char*outBuf,int *numReadBytes,int64_t* patternQ,int64_t* scalesQ,int64_t* ECQ){ + int j; + bp->_1DIdxBits=bitsNeeded_UI64(p->bSize); + //float *data=(float*)(outBuf+p->bSize*8); + float *data=(float*)(outBuf); + int i0,i1,i2,i3; + //uint16_t *idx0,*idx1,*idx2,*idx3; + int _1DIdx; + + int64_t ECQTemp; + uint64_t bytePos=0; + uint64_t bitPos=0; + uint64_t temp,temp2; + //int sb,localIdx; + + + //idx0=(uint16_t*)(outBuf ); + //idx1=(uint16_t*)(outBuf+p->bSize*2); + //idx2=(uint16_t*)(outBuf+p->bSize*4); + //idx3=(uint16_t*)(outBuf+p->bSize*6); + //p->idxOffset[0]=*(uint32_t*)(&inBuf[1]); + //p->idxOffset[1]=*(uint32_t*)(&inBuf[3]); + //p->idxOffset[2]=*(uint32_t*)(&inBuf[5]); + //p->idxOffset[3]=*(uint32_t*)(&inBuf[7]); + /* + for(i0=0;i0idxRange[0];i0++) + for(i1=0;i1idxRange[1];i1++) + for(i2=0;i2idxRange[2];i2++) + for(i3=0;i3idxRange[3];i3++){ + //_1DIdx=i0*p->idxRange[1]*p->idxRange[2]*p->idxRange[3]+i1*p->idxRange[2]*p->idxRange[3]+i2*p->idxRange[3]+i3; + _1DIdx=p->idxRange[3]*(i2+p->idxRange[2]*(i1+i0*p->idxRange[1]))+i3; + idx0[_1DIdx]=i0+1+p->idxOffset[0]; + idx1[_1DIdx]=i1+1+p->idxOffset[1]; + idx2[_1DIdx]=i2+1+p->idxOffset[2]; + idx3[_1DIdx]=i3+1+p->idxOffset[3]; + } + */ + + //*numOutBytes=p->bSize*16; + + //inBuf[0] is "mode" + switch(inBuf[0]){ + //R:UCSparse + case 0: + //if(D_G){printf("\nDC:UCSparse\n");} //DEBUG + //bp->nonZeros=*(uint16_t*)(&inBuf[9]); + //bytePos=11; + bp->nonZeros=*(uint16_t*)(&inBuf[1]); + bytePos=3; + for(j=0;jbSize;j++){ + data[j]=0; + } + for(j=0;jnonZeros;j++){ + //i0=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[0]; //i0 + i0=*(uint16_t*)(&inBuf[bytePos]); //i0 + bytePos+=2; + //i1=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[1]; //i1 + i1=*(uint16_t*)(&inBuf[bytePos]); //i1 + bytePos+=2; + //i2=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[2]; //i2 + i2=*(uint16_t*)(&inBuf[bytePos]); //i2 + bytePos+=2; + //i3=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[3]; //i3 + i3=*(uint16_t*)(&inBuf[bytePos]); //i3 + bytePos+=2; + _1DIdx=p->idxRange[3]*(i2+p->idxRange[2]*(i1+i0*p->idxRange[1]))+i3; + data[_1DIdx]=*(float*)(&inBuf[bytePos]); + bytePos+=8; + } + //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG + break; + //R:UCNonSparse + case 1: + //if(D_G){printf("\nDC:UCNonSparse\n");} //DEBUG + //memcpy(&outBuf[p->bSize*8], &inBuf[9], p->bSize*8); + memcpy(data, &inBuf[1], p->bSize*8); + bytePos=p->bSize*8; + //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG + break; + //R:CSparse + case 2: + //if(D_G){printf("\nDC:CSparse\n");} //DEBUG + //for(j=0;jbSize;j++){ + // data[j]=0; + //} + + //bp->patternBits=inBuf[13]; + //bp->ECQBits=inBuf[14]; + + bp->patternBits=inBuf[5]; + bp->ECQBits=inBuf[6]; + + //if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG + + //bp->numOutliers=*(uint16_t*)(&inBuf[15]); + //bitPos=17*8; + bp->numOutliers=*(uint16_t*)(&inBuf[7]); + bitPos=9*8; + //if(D_R){printf("bp->numOutliers:%d\n",bp->numOutliers);} //DEBUG + + bp->scalesBinSize=1/(float)(((uint64_t)1<<(bp->patternBits-1))-1); + + bp->binSize=p->usedEb*2; + + //if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG + + for(j=0;jsbSize;j++){ + patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point + //if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);} + } + for(j=0;jsbNum;j++){ + scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale + //if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);} + } + + /* //Splitting + for(j=0;jbSize;j++){ + data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*bp->scalesBinSize*bp->binSize; + } + */ + for(j=0;jbSize;j++){ + ECQ[j]=0; + } + switch(bp->ECQBits){ + case 2: + for(j=0;jnumOutliers;j++){ + ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG + ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG + + _1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); + ECQTemp=readBits_I64(inBuf,&bitPos,1); + ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; + ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); + //continue; + //sb=_1DIdx/p->sbSize; + //localIdx=_1DIdx%p->sbSize; + + ////data[_1DIdx]-=ECQTemp*bp->binSize;//Splitting + ECQ[_1DIdx]=ECQTemp; + + ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG + } + break; + default: //bp->ECQBits>2 + //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: bp->ECQBits:%d bp->numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG + + for(j=0;jnumOutliers;j++){ + _1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); + //sb=_1DIdx/p->sbSize; + //localIdx=_1DIdx%p->sbSize; + temp=readBits_UI64(inBuf,&bitPos,1); + ////if(DEBUG){printf("temp:%ld\n",temp);} //DEBUG + switch(temp){ + case 0: //+-1 + ECQTemp=readBits_I64(inBuf,&bitPos,1); + ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; + ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG + ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); + break; + case 1: //Others + ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits); + ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG + ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); + break; + //default: + //// printf("ERROR: Bad 2-bit value: 0x%lx",temp); + // assert(0); //AMG + // break; + } + + //data[_1DIdx]-=ECQTemp*bp->binSize;//Splitting + ECQ[_1DIdx]=ECQTemp; + + ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG + } + break; + } + //static inline uint64_t readBits_UI64(unsigned char* buffer,uint64_t *bitPosPtr,uint64_t numBits){ // numBits must be in range [0:56] + //patternQ=(int64_t*)(inBuf+15); + //scalesQ=(int64_t*)(inBuf+15+p->sbSize*8); + + bytePos=(bitPos+7)/8; + //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG + + //STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION) + pastri_float_PredictData(p,bp,data,patternQ,scalesQ,ECQ); + + break; + //R:CNonSparse + case 3: + //if(D_G){printf("\nDC:CNonSparse\n");} //DEBUG + + //for(j=0;jbSize;j++){ + // data[j]=0; + //} + + //bp->patternBits=inBuf[13]; + //bp->ECQBits=inBuf[14]; + + bp->patternBits=inBuf[5]; + bp->ECQBits=inBuf[6]; + + //if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG + + //bitPos=15*8; + bitPos=7*8; + + bp->scalesBinSize=1/(float)(((uint64_t)1<<(bp->patternBits-1))-1); + bp->binSize=p->usedEb*2; + + //if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG + + for(j=0;jsbSize;j++){ + patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point + //if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);} + } + for(j=0;jsbNum;j++){ + scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale + //if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);} + } + /* //Splitting + for(j=0;jbSize;j++){ + data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*bp->scalesBinSize*bp->binSize; + ////if(DEBUG){printf("DC:PS[%d]=%.6e\n",j,data[j]);} + } + */ + switch(bp->ECQBits){ + case 2: + for(j=0;jbSize;j++){ + ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG + ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG + //_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); + temp=readBits_UI64(inBuf,&bitPos,1); + switch(temp){ + case 0: + ECQTemp=readBits_I64(inBuf,&bitPos,1); + ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; + break; + case 1: + ECQTemp=0; + break; + default: + assert(0); + break; + } + + ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG + //continue; + //sb=_1DIdx/p->sbSize; + //localIdx=_1DIdx%p->sbSize; + + //data[j]-=ECQTemp*bp->binSize; //Splitting + ECQ[j]=ECQTemp; + + ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG + } + break; + default: //bp->ECQBits>2 + ////if(DEBUG)printf("AMG_R1:bitPos: %ld\n",bitPos); + + for(j=0;jbSize;j++){ + ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG)printf("AMG_R2:bitPos: %ld\n",bitPos); + + ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG + ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG + //_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); + temp=readBits_UI64(inBuf,&bitPos,1); + ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG + switch(temp){ + case 0: + ////if(DEBUG)printf("Read:0"); + temp2=readBits_UI64(inBuf,&bitPos,1); + switch(temp2){ + case 0: + ////if(DEBUG)printf("0"); + ECQTemp=readBits_I64(inBuf,&bitPos,1); + ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG)printf("R:ECQTemp:%ld\n",ECQTemp); + ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; + ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); + break; + case 1: + ////if(DEBUG)printf("1\n"); + ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits); + ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); + break; + default: + assert(0); + break; + } + break; + case 1: + ////if(DEBUG)printf("Read:1\n"); + ECQTemp=0; + ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); + break; + default: + assert(0); + break; + } + + ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG + //continue; + //sb=_1DIdx/p->sbSize; + //localIdx=_1DIdx%p->sbSize; + + //data[j]-=ECQTemp*bp->binSize; //Splitting + ECQ[j]=ECQTemp; + + ////if(DEBUG){printf("DC:data[%d]:%.6e\n",j,data[j]);} //DEBUG + } + break; + } + //static inline uint64_t readBits_UI64(unsigned char* buffer,uint64_t *bitPosPtr,uint64_t numBits){ // numBits must be in range [0:56] + //patternQ=(int64_t*)(inBuf+15); + //scalesQ=(int64_t*)(inBuf+15+p->sbSize*8); + bytePos=(bitPos+7)/8; + //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG + + //STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION) + pastri_float_PredictData(p,bp,data,patternQ,scalesQ,ECQ); + break; + + default: + assert(0); + break; + } + (*numReadBytes)=bytePos; +} + +static inline void pastri_float_Decompress(unsigned char*inBuf,int dataSize,pastri_params *p,unsigned char*outBuf,int *numReadBytes){ + int64_t patternQ[MAX_PS_SIZE]; + int64_t scalesQ[MAX_PS_SIZE]; + int64_t ECQ[MAX_BLOCK_SIZE]; + + pastri_blockParams bp; + + //STEP 1: DECODE (Includes PREDICT DATA(Includes INVERSE QUANTIZATION)) + //(Further steps are called inside pastri_float_Decode function) + pastri_float_Decode(inBuf,p,&bp,outBuf,numReadBytes,patternQ,scalesQ,ECQ); + + return; +} + +//inBuf vs Decompressed +static inline int pastri_float_Check(unsigned char*inBuf,int dataSize,unsigned char*DC,pastri_params *p){ + int i; + + float *data=(float*)(inBuf); + float *data_dc=(float*)(DC); + + //Comparing Indexes: + /* + for(i=0;ibSize;i++){ + if(idx0[i]!=idx0_dc[i]){ + //printf("idx0[%d]=%d != %d=idx0_dc[%d]",i,idx0[i],idx0_dc[i],i); + assert(0); + } + if(idx1[i]!=idx1_dc[i]){ + //printf("idx1[%d]=%d != %d=idx1_dc[%d]",i,idx1[i],idx1_dc[i],i); + assert(0); + } + if(idx2[i]!=idx2_dc[i]){ + //printf("idx2[%d]=%d != %d=idx2_dc[%d]",i,idx2[i],idx2_dc[i],i); + assert(0); + } + if(idx3[i]!=idx3_dc[i]){ + //printf("idx3[%d]=%d != %d=idx3_dc[%d]",i,idx3[i],idx3_dc[i],i); + assert(0); + } + } + */ + + //Comparing Data: + for(i=0;ibSize;i++){ + if(abs_FastD(data[i]-data_dc[i])>p->usedEb){ + //printf("|data[%d]-data_dc[%d]|>originalEb : %.3e - %.3e = %.3e > %.3e\n",i,i,data[i],data_dc[i],abs_FastD(data[i]-data_dc[i]),p->usedEb); + assert(0); + } + } + return 0; +} + + +#endif diff --git a/deps/SZ/sz/include/pastriGeneral.h b/deps/SZ/sz/include/pastriGeneral.h new file mode 100644 index 0000000000000000000000000000000000000000..81149256d65d05f8626418dc360644202550e44e --- /dev/null +++ b/deps/SZ/sz/include/pastriGeneral.h @@ -0,0 +1,205 @@ +#ifndef PASTRIGENERAL_H +#define PASTRIGENERAL_H + + +static inline double abs_FastD(double x){ + u_UI64I64D u1; + u1.d=x; + //(*((uint64_t *)(&x)))&=(int64_t)0x7FFFFFFFFFFFFFFF; + u1.ui64&=(int64_t)0x7FFFFFFFFFFFFFFF; + return u1.d; +} + +static inline int64_t abs_FastI64(int64_t x){ + return (x^((x&(int64_t)0x8000000000000000)>>63))+((x&(int64_t)0x8000000000000000)!=0); +} +/* +int abs(int x) { + int mask = (x >> (sizeof(int) * CHAR_BIT - 1)); + return (x + mask) ^ mask; +} +*/ + + + + +//Returns the min. bits needed to represent x. +//Same as: ceil(log2(abs(x))) +//Actually to be completely safe, it correspond to: ceil(log2(abs(i)+1))+0.1 +//+0.1 was for fixing rounding errors +//REMEMBER: To represent the whole range [-x:x], the number of bits required is bitsNeeded(x)+1 +static inline int bitsNeeded_double(double x){ + u_UI64I64D u1; + u1.d=x; + return (((u1.ui64<<1)>>53)-1022) & (((x!=0)<<31)>>31); +} + +//Returns the min. bits needed to represent x. +//Same as: ceil(log2(abs(x))) +//NEEDS OPTIMIZATION! +static inline int bitsNeeded_float(float x){ + u_UI64I64D u1; + u1.d=x; //Casting to Double! + return (((u1.ui64<<1)>>53)-1022) & (((x!=0)<<31)>>31); +} + +static inline int bitsNeeded_UI64(uint64_t x){ + int shift; + int res=0; + + //Get the absolute value of x: + //x=(x^((x&(int64_t)0x8000000000000000)>>63))+((x&(int64_t)0x8000000000000000)!=0); + //x=abs_FastI64(x); + + //printf("%d\n",(x&(uint64_t)0xFFFFFFFF00000000)!=0); + shift=(((x&(uint64_t)0xFFFFFFFF00000000)!=0)*32); + x>>=shift; + res+=shift; + + //printf("%d\n",(x&(uint64_t)0x00000000FFFF0000)!=0); + shift=(((x&(uint64_t)0x00000000FFFF0000)!=0)*16); + x>>=shift; + res+=shift; + + //printf("%d\n",(x&(uint64_t)0x000000000000FF00)!=0); + shift=(((x&(uint64_t)0x000000000000FF00)!=0)*8); + x>>=shift; + res+=shift; + + //printf("%d\n",(x&(uint64_t)0x00000000000000F0)!=0); + shift=(((x&(uint64_t)0x00000000000000F0)!=0)*4); + x>>=shift; + res+=shift; + + //printf("%d\n",(x&(uint64_t)0x000000000000000C)!=0); + shift=(((x&(uint64_t)0x000000000000000C)!=0)*2); + x>>=shift; + res+=shift; + + //printf("%d\n",(x&(uint64_t)0x0000000000000002)!=0); + shift=((x&(uint64_t)0x0000000000000002)!=0); + x>>=shift; + res+=shift; + + //printf("%d\n",(x&(uint64_t)0x0000000000000001)!=0); + shift=((x&(uint64_t)0x0000000000000001)!=0); + x>>=shift; + res+=shift; + + //printf("BITS NEEDED: %d\n",res); + return res; +} + +static inline int bitsNeeded_I64(int64_t x){ + uint64_t ux; + ux=abs_FastI64(x); + return bitsNeeded_UI64(ux); +} + +//Implementations(They are inline, so they should be in this header file) + +static inline int myEndianType(){ //Should work for most cases. May not work at mixed endian systems. + uint64_t n=1; + if (*(unsigned char*)&n == 1){ + //cout<<"Little-Endian"<>3:"<<(*bitPosPtr>>3)<> 3)); + //NOTE: bitPos>>3 is the same as bitPos/8 + temp64b >>= (*bitPosPtr) & (uint64_t)0x0000000000000007; + + //cout<>3:"<<(bitPos>>3)<<" bitPos&0x7:"<<(bitPos & 0x00000007)<<" bitPos%8:"<<(bitPos%8)<>shiftAmount;//Sign correction + return val; +} + +//WARNING: readBits_EndianSafe is not tested on Big-Endian machines +static inline uint64_t readBits_EndianSafe(unsigned char* buffer,uint64_t *bitPosPtr,char numBits){ // numBits must be in range [0:56] + uint64_t mask = ((uint64_t)0x0000000000000001<>3)); + //NOTE: (*bitPosPtr)>>3 is the same as (*bitPosPtr)/8 + if(myEndianType()) + flipBytes_UI64(&temp64b); + temp64b >>= (*bitPosPtr) & (uint64_t)0x0000000000000007; + (*bitPosPtr) += numBits; + return temp64b & mask; +} + +//WARNING: writeBits_Fast works properly only on Little Endian machines! (For Big Endians, some modifications are needed) +//The buffer should be initialized as 0's for this to work! +//Also, the range of data is not checked!(If data exceeds numBits, it may be cause problems) +static inline void writeBits_Fast(unsigned char* buffer,uint64_t *bitPosPtr,char numBits,int64_t data){ + //if(DEBUG){printf("writeBits_Fast: data:0x%lx %ld\n",data,data);} //DEBUG + //if(DEBUG){printf("writeBits_Fast: numBits:0x%lx %ld\n",numBits,numBits);} //DEBUG + uint64_t mask = ((uint64_t)0x0000000000000001<>3)));} //DEBUG + *(uint64_t*)(buffer + ((*bitPosPtr)>>3)) |= ((*(uint64_t*)&data)&mask) << ((*bitPosPtr) & (uint64_t)0x0000000000000007); + //if(DEBUG){printf("writeBits_Fast: buffer_N:0x%lx\n",*(uint64_t*)(buffer + ((*bitPosPtr)>>3)));} //DEBUG + + + (*bitPosPtr) += numBits; +} + +//WARNING: writeBits_EndianSafe is not tested on Big-Endian machines +static inline void writeBits_EndianSafe(unsigned char* buffer,uint64_t *bitPosPtr,char numBits,uint64_t data){ + uint64_t mask = ((uint64_t)0x0000000000000001<>3)); + uint64_t temp64b_outBuffer=data << ((*bitPosPtr) & (uint64_t)0x0000000000000007); + if(myEndianType()){ + flipBytes_UI64(&temp64b_inBuffer); + } + temp64b_outBuffer |= temp64b_inBuffer; + if(myEndianType()){ + flipBytes_UI64(&temp64b_outBuffer); + } + *(uint64_t*)(buffer + ((*bitPosPtr)>>3))=temp64b_outBuffer; // "|=" may also work + (*bitPosPtr) += numBits; +} + + +#endif diff --git a/deps/SZ/sz/include/rw.h b/deps/SZ/sz/include/rw.h new file mode 100644 index 0000000000000000000000000000000000000000..846243de5e0fe58a266e8f62f487649b60cb2ebb --- /dev/null +++ b/deps/SZ/sz/include/rw.h @@ -0,0 +1,89 @@ +/** + * @file io.h + * @author Sheng Di + * @date April, 2015 + * @brief Header file for the whole io interface. + * (C) 2015 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _IO_H +#define _IO_H + +#include +#include + +#ifdef _WIN32 +#define PATH_SEPARATOR ';' +#else +#define PATH_SEPARATOR ':' +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +int checkFileExistance(char* filePath); + +float** create2DArray_float(size_t m, size_t n); +void free2DArray_float(float** data, size_t m); +float*** create3DArray_float(size_t p, size_t m, size_t n); +void free3DArray_float(float*** data, size_t p, size_t m); +double** create2DArray_double(size_t m, size_t n); +void free2DArray_double(double** data, size_t m); +double*** create3DArray_double(size_t p, size_t m, size_t n); +void free3DArray_double(double*** data, size_t p, size_t m); +size_t checkFileSize(char *srcFilePath, int *status); + +unsigned char *readByteData(char *srcFilePath, size_t *byteLength, int *status); +double *readDoubleData(char *srcFilePath, size_t *nbEle, int *status); +int8_t *readInt8Data(char *srcFilePath, size_t *nbEle, int *status); +int16_t *readInt16Data(char *srcFilePath, size_t *nbEle, int *status); +uint16_t *readUInt16Data(char *srcFilePath, size_t *nbEle, int *status); +int32_t *readInt32Data(char *srcFilePath, size_t *nbEle, int *status); +uint32_t *readUInt32Data(char *srcFilePath, size_t *nbEle, int *status); +int64_t *readInt64Data(char *srcFilePath, size_t *nbEle, int *status); +uint64_t *readUInt64Data(char *srcFilePath, size_t *nbEle, int *status); +float *readFloatData(char *srcFilePath, size_t *nbEle, int *status); +unsigned short* readShortData(char *srcFilePath, size_t *dataLength, int *status); + +double *readDoubleData_systemEndian(char *srcFilePath, size_t *nbEle, int *status); +int8_t *readInt8Data_systemEndian(char *srcFilePath, size_t *nbEle, int *status); +int16_t *readInt16Data_systemEndian(char *srcFilePath, size_t *nbEle, int *status); +uint16_t *readUInt16Data_systemEndian(char *srcFilePath, size_t *nbEle, int *status); +int32_t *readInt32Data_systemEndian(char *srcFilePath, size_t *nbEle, int *status); +uint32_t *readUInt32Data_systemEndian(char *srcFilePath, size_t *nbEle, int *status); +int64_t *readInt64Data_systemEndian(char *srcFilePath, size_t *nbEle, int *status); +uint64_t *readUInt64Data_systemEndian(char *srcFilePath, size_t *nbEle, int *status); +float *readFloatData_systemEndian(char *srcFilePath, size_t *nbEle, int *status); + +void writeByteData(unsigned char *bytes, size_t byteLength, char *tgtFilePath, int *status); +void writeDoubleData(double *data, size_t nbEle, char *tgtFilePath, int *status); +void writeFloatData(float *data, size_t nbEle, char *tgtFilePath, int *status); +void writeData(void *data, int dataType, size_t nbEle, char *tgtFilePath, int *status); +void writeFloatData_inBytes(float *data, size_t nbEle, char* tgtFilePath, int *status); +void writeDoubleData_inBytes(double *data, size_t nbEle, char* tgtFilePath, int *status); +void writeShortData_inBytes(short *states, size_t stateLength, char *tgtFilePath, int *status); +void writeUShortData_inBytes(unsigned short *states, size_t stateLength, char *tgtFilePath, int *status); +void writeIntData_inBytes(int *states, size_t stateLength, char *tgtFilePath, int *status); +void writeUIntData_inBytes(unsigned int *states, size_t stateLength, char *tgtFilePath, int *status); +void writeLongData_inBytes(int64_t *states, size_t stateLength, char *tgtFilePath, int *status); +void writeULongData_inBytes(uint64_t *states, size_t stateLength, char *tgtFilePath, int *status); + +void writeStrings(int nbStr, char *str[], char *tgtFilePath, int *status); + +//void convertToPFM_float(float *data, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, int endianType, char *tgtFilePath, int *status); + +void checkfilesizec_(char *srcFilePath, int *len, size_t *filesize); +void readbytefile_(char *srcFilePath, int *len, unsigned char *bytes, size_t *byteLength); +void readdoublefile_(char *srcFilePath, int *len, double *data, size_t *nbEle); +void readfloatfile_(char *srcFilePath, int *len, float *data, size_t *nbEle); +void writebytefile_(unsigned char *bytes, size_t *byteLength, char *tgtFilePath, int *len); +void writedoublefile_(double *data, size_t *nbEle, char *tgtFilePath, int *len); +void writefloatfile_(float *data, size_t *nbEle, char *tgtFilePath, int *len); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _IO_H ----- */ diff --git a/deps/SZ/sz/include/sz.h b/deps/SZ/sz/include/sz.h new file mode 100644 index 0000000000000000000000000000000000000000..8bf7fa357f5597864646fc284e218525e2bfd8e4 --- /dev/null +++ b/deps/SZ/sz/include/sz.h @@ -0,0 +1,337 @@ +/** + * @file sz.h + * @author Sheng Di + * @date April, 2015 + * @brief Header file for the whole compressor. + * (C) 2015 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZ_H +#define _SZ_H + +#include +#include +#include /* For gettimeofday(), in microseconds */ +#include /* For time(), in seconds */ +#include "CompressElement.h" +#include "DynamicByteArray.h" +#include "DynamicIntArray.h" +#include "VarSet.h" +#include "Huffman.h" +#include "TightDataPointStorageD.h" +#include "TightDataPointStorageF.h" +#include "TightDataPointStorageI.h" +#include "conf.h" +#include "dataCompression.h" +#include "ByteToolkit.h" +#include "TypeManager.h" +#include "sz_int8.h" +#include "sz_int16.h" +#include "sz_int32.h" +#include "sz_int64.h" +#include "sz_uint8.h" +#include "sz_uint16.h" +#include "sz_uint32.h" +#include "sz_uint64.h" +#include "sz_float.h" +#include "sz_double.h" +#include "szd_int8.h" +#include "szd_int16.h" +#include "szd_int32.h" +#include "szd_int64.h" +#include "szd_uint8.h" +#include "szd_uint16.h" +#include "szd_uint32.h" +#include "szd_uint64.h" +#include "szd_float.h" +#include "szd_double.h" +#include "sz_float_pwr.h" +#include "sz_double_pwr.h" +#include "sz_opencl.h" +#include "callZlib.h" +#include "rw.h" +#include "pastri.h" +#include "sz_float_ts.h" +#include "szd_float_ts.h" +#include "utility.h" +#include "CacheTable.h" +#include "MultiLevelCacheTable.h" +#include "MultiLevelCacheTableWideInterval.h" +#include "exafelSZ.h" +#include "sz_stats.h" + +#ifdef _WIN32 +#define PATH_SEPARATOR ';' +#else +#define PATH_SEPARATOR ':' +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +//typedef char int8_t; +//typedef unsigned char uint8_t; +//typedef short int16_t; +//typedef unsigned short uint16_t; +//typedef int int32_t; +//typedef unsigned int uint32_t; +//typedef long int64_t; +//typedef unsigned long uint64_t; + +#include "defines.h" + +//Note: the following setting should be consistent with stateNum in Huffman.h +//#define intvCapacity 65536 +//#define intvRadius 32768 +//#define intvCapacity 131072 +//#define intvRadius 65536 + +#define SZ_COMPUTE_1D_NUMBER_OF_BLOCKS( COUNT, NUM_BLOCKS, BLOCK_SIZE ) \ + if (COUNT <= BLOCK_SIZE){ \ + NUM_BLOCKS = 1; \ + } \ + else{ \ + NUM_BLOCKS = COUNT / BLOCK_SIZE; \ + } \ + +#define SZ_COMPUTE_2D_NUMBER_OF_BLOCKS( COUNT, NUM_BLOCKS, BLOCK_SIZE ) \ + if (COUNT <= BLOCK_SIZE){ \ + NUM_BLOCKS = 1; \ + } \ + else{ \ + NUM_BLOCKS = COUNT / BLOCK_SIZE; \ + } \ + +#define SZ_COMPUTE_3D_NUMBER_OF_BLOCKS( COUNT, NUM_BLOCKS, BLOCK_SIZE ) \ + if (COUNT <= BLOCK_SIZE){ \ + NUM_BLOCKS = 1; \ + } \ + else{ \ + NUM_BLOCKS = COUNT / BLOCK_SIZE; \ + } \ + +#define SZ_COMPUTE_BLOCKCOUNT( COUNT, NUM_BLOCKS, SPLIT_INDEX, \ + EARLY_BLOCK_COUNT, LATE_BLOCK_COUNT ) \ + EARLY_BLOCK_COUNT = LATE_BLOCK_COUNT = COUNT / NUM_BLOCKS; \ + SPLIT_INDEX = COUNT % NUM_BLOCKS; \ + if (0 != SPLIT_INDEX) { \ + EARLY_BLOCK_COUNT = EARLY_BLOCK_COUNT + 1; \ + } \ + +//typedef unsigned long unsigned long; +//typedef unsigned int uint; + +typedef union lint16 +{ + unsigned short usvalue; + short svalue; + unsigned char byte[2]; +} lint16; + +typedef union lint32 +{ + int ivalue; + unsigned int uivalue; + unsigned char byte[4]; +} lint32; + +typedef union lint64 +{ + long lvalue; + unsigned long ulvalue; + unsigned char byte[8]; +} lint64; + +typedef union ldouble +{ + double value; + unsigned long lvalue; + unsigned char byte[8]; +} ldouble; + +typedef union lfloat +{ + float value; + unsigned int ivalue; + unsigned char byte[4]; +} lfloat; + +/* array meta data and compression parameters for SZ_Init_Params() */ +typedef struct sz_params +{ + int dataType; + unsigned int max_quant_intervals; //max number of quantization intervals for quantization + unsigned int quantization_intervals; + unsigned int maxRangeRadius; + int sol_ID;// it's SZ or SZ_Transpose, unless the setting is PASTRI compression mode (./configure --enable-pastri) + int losslessCompressor; + int sampleDistance; //2 bytes + float predThreshold; // 2 bytes + int szMode; //* 0 (best speed) or 1 (better compression with Zstd/Gzip) or 3 temporal-dimension based compression + int gzipMode; //* four options: Z_NO_COMPRESSION, or Z_BEST_SPEED, Z_BEST_COMPRESSION, Z_DEFAULT_COMPRESSION + int errorBoundMode; //4bits (0.5byte), //ABS, REL, ABS_AND_REL, or ABS_OR_REL, PSNR, or PW_REL, PSNR + double absErrBound; //absolute error bound + double relBoundRatio; //value range based relative error bound ratio + double psnr; //PSNR + double normErr; + double pw_relBoundRatio; //point-wise relative error bound + int segment_size; //only used for 2D/3D data compression with pw_relBoundRatio (deprecated) + int pwr_type; //only used for 2D/3D data compression with pw_relBoundRatio + + int protectValueRange; //0 or 1 + float fmin, fmax; + double dmin, dmax; + + int snapshotCmprStep; //perform single-snapshot-based compression if time_step == snapshotCmprStep + int predictionMode; + + int accelerate_pw_rel_compression; + int plus_bits; + + int randomAccess; + int withRegression; + +} sz_params; + +typedef struct sz_metadata +{ + int versionNumber[3]; //only used for checking the version by calling SZ_GetMetaData() + int isConstant; //only used for checking if the data are constant values by calling SZ_GetMetaData() + int isLossless; //only used for checking if the data compression was lossless, used only by calling SZ_GetMetaData() + int sizeType; //only used for checking whether the size type is "int" or "long" in the compression, used only by calling SZ_GetMetaData() + size_t dataSeriesLength; //# number of data points in the dataset + int defactoNBBins; //real number of quantization bins + struct sz_params* conf_params; //configuration parameters +} sz_metadata; + +typedef struct sz_exedata +{ + char optQuantMode; //opt Quantization (0: fixed ; 1: optimized) + int intvCapacity; // the number of intervals for the linear-scaling quantization + int intvRadius; // the number of intervals for the radius of the quantization range (intvRadius=intvCapacity/2) + unsigned int SZ_SIZE_TYPE; //the length (# bytes) of the size_t in the system at runtime //4 or 8: sizeof(size_t) +} sz_exedata; + +/*We use a linked list to maintain time-step meta info for time-step based compression*/ +typedef struct sz_tsc_metainfo +{ + int totalNumOfSteps; + int currentStep; + char metadata_filename[256]; + FILE *metadata_file; + unsigned char* bit_array; //sihuan added + size_t intersect_size; //sihuan added + int64_t* hist_index; //sihuan added: prestep index + +} sz_tsc_metadata; + +extern int versionNumber[4]; + +//-------------------key global variables-------------- +extern int dataEndianType; //*endian type of the data read from disk +extern int sysEndianType; //*sysEndianType is actually set automatically. + +extern sz_params *confparams_cpr; +extern sz_params *confparams_dec; +extern sz_exedata *exe_params; + +//------------------------------------------------ +extern SZ_VarSet* sz_varset; +extern sz_multisteps *multisteps; //compression based on multiple time steps (time-dimension based compression) +extern sz_tsc_metadata *sz_tsc; + +//for pastri +#ifdef PASTRI +extern pastri_params pastri_par; +#endif + +//sz.h +HuffmanTree* SZ_Reset(); + +int SZ_Init(const char *configFilePath); + +int SZ_Init_Params(sz_params *params); + +size_t computeDataLength(size_t r5, size_t r4, size_t r3, size_t r2, size_t r1); + +int computeDimension(size_t r5, size_t r4, size_t r3, size_t r2, size_t r1); + +int SZ_compress_args_float_subblock(unsigned char* compressedBytes, float *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, +size_t s5, size_t s4, size_t s3, size_t s2, size_t s1, +size_t e5, size_t e4, size_t e3, size_t e2, size_t e1, +size_t *outSize, int errBoundMode, double absErr_Bound, double relBoundRatio); + +int SZ_compress_args_double_subblock(unsigned char* compressedBytes, double *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, +size_t s5, size_t s4, size_t s3, size_t s2, size_t s1, +size_t e5, size_t e4, size_t e3, size_t e2, size_t e1, +size_t *outSize, int errBoundMode, double absErr_Bound, double relBoundRatio); + +unsigned char *SZ_compress(int dataType, void *data, size_t *outSize, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1); + +unsigned char* SZ_compress_args(int dataType, void *data, size_t *outSize, int errBoundMode, double absErrBound, +double relBoundRatio, double pwrBoundRatio, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1); + +int SZ_compress_args2(int dataType, void *data, unsigned char* compressed_bytes, size_t *outSize, +int errBoundMode, double absErrBound, double relBoundRatio, double pwrBoundRatio, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1); + +int SZ_compress_args3(int dataType, void *data, unsigned char* compressed_bytes, size_t *outSize, int errBoundMode, double absErrBound, double relBoundRatio, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, +size_t s5, size_t s4, size_t s3, size_t s2, size_t s1, +size_t e5, size_t e4, size_t e3, size_t e2, size_t e1); + +unsigned char *SZ_compress_rev_args(int dataType, void *data, void *reservedValue, size_t *outSize, int errBoundMode, double absErrBound, double relBoundRatio, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1); + +int SZ_compress_rev_args2(int dataType, void *data, void *reservedValue, unsigned char* compressed_bytes, size_t *outSize, int errBoundMode, double absErrBound, double relBoundRatio, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1); +unsigned char *SZ_compress_rev(int dataType, void *data, void *reservedValue, size_t *outSize, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1); + +void SZ_Create_ParamsExe(sz_params** conf_params, sz_exedata** exe_params); + +void *SZ_decompress(int dataType, unsigned char *bytes, size_t byteLength, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1); +size_t SZ_decompress_args(int dataType, unsigned char *bytes, size_t byteLength, void* decompressed_array, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1); + +sz_metadata* SZ_getMetadata(unsigned char* bytes); +void SZ_printMetadata(sz_metadata* metadata); + + +void filloutDimArray(size_t* dim, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1); + +size_t compute_total_batch_size(); + +void SZ_registerVar(int var_id, char* varName, int dataType, void* data, + int errBoundMode, double absErrBound, double relBoundRatio, double pwRelBoundRatio, + size_t r5, size_t r4, size_t r3, size_t r2, size_t r1); + +int SZ_deregisterVar_ID(int var_id); +int SZ_deregisterVar(char* varName); +int SZ_deregisterAllVars(); + +int SZ_compress_ts_select_var(int cmprType, unsigned char* var_ids, unsigned char var_count, unsigned char** newByteData, size_t *outSize); +int SZ_compress_ts(int cmprType, unsigned char** newByteData, size_t *outSize); +void SZ_decompress_ts_select_var(unsigned char* var_ids, unsigned char var_count, unsigned char *bytes, size_t bytesLength); +void SZ_decompress_ts(unsigned char *bytes, size_t byteLength); + +void SZ_Finalize(); + +void convertSZParamsToBytes(sz_params* params, unsigned char* result); +void convertBytesToSZParams(unsigned char* bytes, sz_params* params); + +unsigned char* SZ_compress_customize(const char* appName, void* userPara, int dataType, void* data, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, int *status); + +unsigned char* SZ_compress_customize_threadsafe(const char* cmprName, void* userPara, int dataType, void* data, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, int *status); + +void* SZ_decompress_customize(const char* appName, void* userPara, int dataType, unsigned char* bytes, size_t byteLength, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, int* status); + +void* SZ_decompress_customize_threadsafe(const char* cmprName, void* userPara, int dataType, unsigned char* bytes, size_t byteLength, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, int *status); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZ_H ----- */ diff --git a/deps/SZ/sz/include/sz_double.h b/deps/SZ/sz/include/sz_double.h new file mode 100644 index 0000000000000000000000000000000000000000..1004f118502208fe736d019d5774f223571f30a0 --- /dev/null +++ b/deps/SZ/sz/include/sz_double.h @@ -0,0 +1,100 @@ +/** + * @file sz_double.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the sz_double.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZ_Double_H +#define _SZ_Double_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +unsigned char* SZ_skip_compress_double(double* data, size_t dataLength, size_t* outSize); + +void computeReqLength_double(double realPrecision, short radExpo, int* reqLength, double* medianValue); +short computeReqLength_double_MSST19(double realPrecision); + +unsigned int optimize_intervals_double_1D(double *oriData, size_t dataLength, double realPrecision); +unsigned int optimize_intervals_double_2D(double *oriData, size_t r1, size_t r2, double realPrecision); +unsigned int optimize_intervals_double_3D(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision); +unsigned int optimize_intervals_double_4D(double *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision); + +unsigned int optimize_intervals_double_3D_opt(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision); +unsigned int optimize_intervals_double_2D_opt(double *oriData, size_t r1, size_t r2, double realPrecision); +unsigned int optimize_intervals_double_1D_opt(double *oriData, size_t dataLength, double realPrecision); + +size_t SZ_compress_double_3D_MDQ_RA_block(double * block_ori_data, double * mean, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, double realPrecision, double * P0, double * P1, int * type, double * unpredictable_data); + +unsigned int optimize_intervals_double_1D_opt_MSST19(double *oriData, size_t dataLength, double realPrecision); +unsigned int optimize_intervals_double_2D_opt_MSST19(double *oriData, size_t r1, size_t r2, double realPrecision); +unsigned int optimize_intervals_double_3D_opt_MSST19(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision); +TightDataPointStorageD* SZ_compress_double_1D_MDQ(double *oriData, +size_t dataLength, double realPrecision, double valueRangeSize, double medianValue_d); +void SZ_compress_args_double_StoreOriData(double* oriData, size_t dataLength, unsigned char** newByteData, size_t *outSize); + +char SZ_compress_args_double_NoCkRngeNoGzip_1D(int cmprType, unsigned char** newByteData, double *oriData, size_t dataLength, double realPrecision, size_t *outSize, double valueRangeSize, double medianValue_d); + +TightDataPointStorageD* SZ_compress_double_2D_MDQ(double *oriData, size_t r1, size_t r2, double realPrecision, double valueRangeSize, double medianValue_d); +char SZ_compress_args_double_NoCkRngeNoGzip_2D(int cmprType, unsigned char** newByteData, double *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize, double valueRangeSize, double medianValue_d); + +TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, double valueRangeSize, double medianValue_d); +char SZ_compress_args_double_NoCkRngeNoGzip_3D(int cmprType, unsigned char** newByteData, double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, double valueRangeSize, double medianValue_d); + +TightDataPointStorageD* SZ_compress_double_4D_MDQ(double *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, double valueRangeSize, double medianValue_d); +char SZ_compress_args_double_NoCkRngeNoGzip_4D(unsigned char** newByteData, double *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, size_t *outSize, double valueRangeSize, double medianValue_d); + +TightDataPointStorageD* SZ_compress_double_1D_MDQ_MSST19(double *oriData, size_t dataLength, double realPrecision, double valueRangeSize, double medianValue_f); +TightDataPointStorageD* SZ_compress_double_2D_MDQ_MSST19(double *oriData, size_t r1, size_t r2, double realPrecision, double valueRangeSize, double medianValue_f); +TightDataPointStorageD* SZ_compress_double_3D_MDQ_MSST19(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, double valueRangeSize, double medianValue_f); + +void SZ_compress_args_double_withinRange(unsigned char** newByteData, double *oriData, size_t dataLength, size_t *outSize); + +/*int SZ_compress_args_double_wRngeNoGzip(unsigned char** newByteData, double *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio, double pwrErrRatio);*/ + +int SZ_compress_args_double(int cmprType, int withRegression, unsigned char** newByteData, double *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRatio); + +void SZ_compress_args_double_NoCkRnge_1D_subblock(unsigned char* compressedBytes, double *oriData, double realPrecision, size_t *outSize, double valueRangeSize, double medianValue_d, +size_t r1, size_t s1, size_t e1); +void SZ_compress_args_double_NoCkRnge_2D_subblock(unsigned char* compressedBytes, double *oriData, double realPrecision, size_t *outSize, double valueRangeSize, double medianValue_d, +size_t r2, size_t r1, size_t s2, size_t s1, size_t e2, size_t e1); +void SZ_compress_args_double_NoCkRnge_3D_subblock(unsigned char* compressedBytes, double *oriData, double realPrecision, size_t *outSize, double valueRangeSize, double medianValue_d, +size_t r3, size_t r2, size_t r1, size_t s3, size_t s2, size_t s1, size_t e3, size_t e2, size_t e1); +void SZ_compress_args_double_NoCkRnge_4D_subblock(unsigned char* compressedBytes, double *oriData, double realPrecision, size_t *outSize, double valueRangeSize, double medianValue_d, +size_t r4, size_t r3, size_t r2, size_t r1, size_t s4, size_t s3, size_t s2, size_t s1, size_t e4, size_t e3, size_t e2, size_t e1); + +unsigned int optimize_intervals_double_1D_subblock(double *oriData, double realPrecision, size_t r1, size_t s1, size_t e1); +unsigned int optimize_intervals_double_2D_subblock(double *oriData, double realPrecision, size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2); +unsigned int optimize_intervals_double_3D_subblock(double *oriData, double realPrecision, size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, size_t e2, size_t e3); +unsigned int optimize_intervals_double_4D_subblock(double *oriData, double realPrecision, size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4); + +TightDataPointStorageD* SZ_compress_double_1D_MDQ_subblock(double *oriData, double realPrecision, double valueRangeSize, double medianValue_d, +size_t r1, size_t s1, size_t e1); +TightDataPointStorageD* SZ_compress_double_2D_MDQ_subblock(double *oriData, double realPrecision, double valueRangeSize, double medianValue_d, +size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2); +TightDataPointStorageD* SZ_compress_double_3D_MDQ_subblock(double *oriData, double realPrecision, double valueRangeSize, double medianValue_d, +size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, size_t e2, size_t e3); +TightDataPointStorageD* SZ_compress_double_4D_MDQ_subblock(double *oriData, double realPrecision, double valueRangeSize, double medianValue_d, +size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4); + +unsigned int optimize_intervals_double_2D_with_freq_and_dense_pos(double *oriData, size_t r1, size_t r2, double realPrecision, double * dense_pos, double * max_freq, double * mean_freq); +unsigned int optimize_intervals_double_3D_with_freq_and_dense_pos(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, double * dense_pos, double * max_freq, double * mean_freq); +unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(double *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size); +unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size); + + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZ_Double_H ----- */ + diff --git a/deps/SZ/sz/include/sz_double_pwr.h b/deps/SZ/sz/include/sz_double_pwr.h new file mode 100644 index 0000000000000000000000000000000000000000..421895adbb467b171f4315338e6addd8540a7c24 --- /dev/null +++ b/deps/SZ/sz/include/sz_double_pwr.h @@ -0,0 +1,57 @@ +/** + * @file sz_double.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the sz_double.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZ_Double_PWR_H +#define _SZ_Double_PWR_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +void compute_segment_precisions_double_1D(double *oriData, size_t dataLength, double* pwrErrBound, unsigned char* pwrErrBoundBytes, double globalPrecision); +unsigned int optimize_intervals_double_1D_pwr(double *oriData, size_t dataLength, double* pwrErrBound); +void compute_segment_precisions_double_2D(double *oriData, double* pwrErrBound, +size_t r1, size_t r2, size_t R2, size_t edgeSize, unsigned char* pwrErrBoundBytes, double Min, double Max, double globalPrecision); +unsigned int optimize_intervals_double_2D_pwr(double *oriData, size_t r1, size_t r2, size_t R2, size_t edgeSize, double* pwrErrBound); +void compute_segment_precisions_double_3D(double *oriData, double* pwrErrBound, +size_t r1, size_t r2, size_t r3, size_t R2, size_t R3, size_t edgeSize, unsigned char* pwrErrBoundBytes, double Min, double Max, double globalPrecision); +unsigned int optimize_intervals_double_3D_pwr(double *oriData, size_t r1, size_t r2, size_t r3, size_t R2, size_t R3, size_t edgeSize, double* pwrErrBound); +void SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr(unsigned char** newByteData, double *oriData, double globalPrecision, size_t dataLength, size_t *outSize, double min, double max); +void SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr(unsigned char** newByteData, double *oriData, double globalPrecision, size_t r1, size_t r2, +size_t *outSize, double min, double max); +void SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr(unsigned char** newByteData, double *oriData, double globalPrecision, +size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max); + +void createRangeGroups_double(double** posGroups, double** negGroups, int** posFlags, int** negFlags); +void compressGroupIDArray_double(char* groupID, TightDataPointStorageD* tdps); +TightDataPointStorageD* SZ_compress_double_1D_MDQ_pwrGroup(double* oriData, size_t dataLength, int errBoundMode, +double absErrBound, double relBoundRatio, double pwrErrRatio, double valueRangeSize, double medianValue_f); +void SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(unsigned char** newByteData, double *oriData, +size_t dataLength, double absErrBound, double relBoundRatio, double pwrErrRatio, double valueRangeSize, double medianValue_f, size_t *outSize); + +void SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_pre_log(unsigned char** newByteData, double *oriData, double globalPrecision, size_t dataLength, size_t *outSize, double min, double max); +void SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_pre_log(unsigned char** newByteData, double *oriData, double globalPrecision, size_t r1, size_t r2, size_t *outSize, double min, double max); +void SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(unsigned char** newByteData, double *oriData, double globalPrecision, size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max); + +void SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_pre_log_MSST19(unsigned char** newByteData, double *oriData, double pwrErrRatio, size_t dataLength, size_t *outSize, double valueRangeSize, double medianValue_f, + unsigned char* signs, bool* positive, double min, double max, double nearZero); +void SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_pre_log_MSST19(unsigned char** newByteData, double *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t *outSize, double valueRangeSize, + unsigned char* signs, bool* positive, double min, double max, double nearZero); +void SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log_MSST19(unsigned char** newByteData, double *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t r3, size_t *outSize, double valueRangeSize, + unsigned char* signs, bool* positive, double min, double max, double nearZero); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZ_Double_PWR_H ----- */ + diff --git a/deps/SZ/sz/include/sz_double_ts.h b/deps/SZ/sz/include/sz_double_ts.h new file mode 100644 index 0000000000000000000000000000000000000000..581d20ddf58ba77f61b70bd1042a352f482919ef --- /dev/null +++ b/deps/SZ/sz/include/sz_double_ts.h @@ -0,0 +1,27 @@ +/** + * @file sz_double_ts.h + * @author Sheng Di + * @date May, 2018 + * @brief Header file for the sz_double_ts.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ +#include "TightDataPointStorageD.h" + +#ifndef _SZ_Double_TS_H +#define _SZ_Double_TS_H + +#ifdef __cplusplus +extern "C" { +#endif +unsigned int optimize_intervals_double_1D_ts(double *oriData, size_t dataLength, double* preData, double realPrecision); + +TightDataPointStorageD* SZ_compress_double_1D_MDQ_ts(double *oriData, size_t dataLength, sz_multisteps* multisteps, +double realPrecision, double valueRangeSize, double medianValue_d); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZ_Double_TS_H ----- */ + diff --git a/deps/SZ/sz/include/sz_float.h b/deps/SZ/sz/include/sz_float.h new file mode 100644 index 0000000000000000000000000000000000000000..9557ee0593ff4b84a9272075828954d8ff9890a4 --- /dev/null +++ b/deps/SZ/sz/include/sz_float.h @@ -0,0 +1,153 @@ +/** + * @file sz_float.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the sz_float.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ +#include "DynamicFloatArray.h" + +#ifndef _SZ_Float_H +#define _SZ_Float_H + +#ifdef __cplusplus +extern "C" { +#endif +unsigned char* SZ_skip_compress_float(float* data, size_t dataLength, size_t* outSize); + +void computeReqLength_float(double realPrecision, short radExpo, int* reqLength, float* medianValue); +short computeReqLength_float_MSST19(double realPrecision); + +unsigned int optimize_intervals_float_1D(float *oriData, size_t dataLength, double realPrecision); +unsigned int optimize_intervals_float_2D(float *oriData, size_t r1, size_t r2, double realPrecision); +unsigned int optimize_intervals_float_3D(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision); +unsigned int optimize_intervals_float_4D(float *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision); + +unsigned int optimize_intervals_and_compute_dense_position_float_1D(float *oriData, size_t dataLength, double realPrecision, float * dense_pos); +unsigned int optimize_intervals_and_compute_dense_position_float_3D(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, float * dense_pos); +unsigned int optimize_intervals_float_3D_with_freq_and_dense_pos(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, float * dense_pos, float * max_freq, float * mean_freq); +unsigned int optimize_intervals_float_3D_opt(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision); +unsigned int optimize_intervals_float_2D_opt(float *oriData, size_t r1, size_t r2, double realPrecision); +unsigned int optimize_intervals_float_1D_opt(float *oriData, size_t dataLength, double realPrecision); + +unsigned int optimize_intervals_float_1D_opt_MSST19(float *oriData, size_t dataLength, double realPrecision); +unsigned int optimize_intervals_float_2D_opt_MSST19(float *oriData, size_t r1, size_t r2, double realPrecision); +unsigned int optimize_intervals_float_3D_opt_MSST19(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision); + +TightDataPointStorageF* SZ_compress_float_1D_MDQ(float *oriData, +size_t dataLength, float realPrecision, float valueRangeSize, float medianValue_f); + +void SZ_compress_args_float_StoreOriData(float* oriData, size_t dataLength, unsigned char** newByteData, size_t *outSize); + +char SZ_compress_args_float_NoCkRngeNoGzip_1D(int cmprType, unsigned char** newByteData, float *oriData, +size_t dataLength, double realPrecision, size_t *outSize, float valueRangeSize, float medianValue_f); + +TightDataPointStorageF* SZ_compress_float_2D_MDQ(float *oriData, size_t r1, size_t r2, float realPrecision, float valueRangeSize, float medianValue_f); + +char SZ_compress_args_float_NoCkRngeNoGzip_2D(int cmprType, unsigned char** newByteData, float *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize, float valueRangeSize, float medianValue_f); + +TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size_t r2, size_t r3, float realPrecision, float valueRangeSize, float medianValue_f); + +char SZ_compress_args_float_NoCkRngeNoGzip_3D(int cmprType, unsigned char** newByteData, float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, float valueRangeSize, float medianValue_f); + +size_t SZ_compress_float_1D_MDQ_RA_block(float * block_ori_data, float * mean, size_t dim_0, size_t block_dim_0, double realPrecision, int * type, float * unpredictable_data); +size_t SZ_compress_float_2D_MDQ_RA_block(float * block_ori_data, float * mean, size_t dim_0, size_t dim_1, size_t block_dim_0, size_t block_dim_1, double realPrecision, float * P0, float * P1, int * type, float * unpredictable_data); + +size_t SZ_compress_float_1D_MDQ_RA_block_1D_pred(float * block_ori_data, float * mean, float dense_pos, size_t dim_0, size_t block_dim_0, double realPrecision, int * type, DynamicFloatArray * unpredictable_data); +size_t SZ_compress_float_2D_MDQ_RA_block_2D_pred(float * block_ori_data, float * mean, float dense_pos, size_t dim_0, size_t dim_1, size_t block_dim_0, size_t block_dim_1, double realPrecision, float * P0, float * P1, int * type, float * unpredictable_data); +size_t SZ_compress_float_3D_MDQ_RA_block(float * block_ori_data, float * mean, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, float realPrecision, float * P0, float * P1, int * type, float * unpredictable_data); +size_t SZ_compress_float_3D_MDQ_RA_block_3D_pred(float * block_ori_data, float * mean, float dense_pos, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, double realPrecision, float * P0, float * P1, int * type, float * unpredictable_data); +size_t SZ_compress_float_3D_MDQ_RA_block_adaptive(float * block_ori_data, float * mean, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, double realPrecision, float * P0, float * P1, int * type, float * unpredictable_data); +//unsigned short SZ_compress_float_3D_MDQ_RA_block_1D_pred(float * block_ori_data, float * mean, float dense_pos, size_t dim_0, size_t dim_1, size_t dim_2, int block_dim_0, int block_dim_1, int block_dim_2, double realPrecision, int * type, float * unpredictable_data); +size_t SZ_compress_float_3D_MDQ_RA_block_3D_pred_flush_after_compare(float * block_ori_data, float * mean, float dense_pos, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, double realPrecision, float * P0, float * P1, int * type, float * unpredictable_data); +size_t SZ_compress_float_3D_MDQ_RA_block_2_layers(float * block_ori_data, float * mean, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, double realPrecision, float * P0, float * P1, float * P_, int * type, float * unpredictable_data); +size_t SZ_compress_float_3D_MDQ_pred_by_regression(float * block_ori_data, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, double realPrecision, float * reg_params, int * type, float * unpredictable_data); +void SZ_blocked_regression(float * block_ori_data, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, float *params); +unsigned char * SZ_compress_float_3D_MDQ_RA_all_by_regression(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size); +float SZ_compress_float_3D_MDQ_RA_block_no_mean(float * block_ori_data, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, double realPrecision, float * P0, float * P1, int * type, unsigned short * unpred_count, float * unpredictable_data); +float SZ_compress_float_3D_MDQ_pred_by_regression_with_err(float * block_ori_data, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, double realPrecision, float * reg_params, int * type, unsigned short * unpred_count, float * unpredictable_data); +unsigned char * SZ_compress_float_3D_MDQ_RA_blocked_with_regression(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size); +void decompressDataSeries_float_3D_RA_blocked_with_regression(float** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data); + +unsigned char * SZ_compress_float_1D_MDQ_RA(float *oriData, size_t r1, double realPrecision, size_t * comp_size); +unsigned char * SZ_compress_float_2D_MDQ_RA(float *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size); +unsigned char * SZ_compress_float_2D_MDQ_nonblocked(float *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size); +unsigned char * SZ_compress_float_3D_MDQ_RA(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size); +unsigned char * SZ_compress_float_3D_MDQ_nonblocked(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size); +unsigned char * SZ_compress_float_3D_MDQ_nonblocked_ori(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size); +unsigned char * SZ_compress_float_3D_MDQ_nonblocked_multi_means(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size); +unsigned char * SZ_compress_float_3D_MDQ_RA_multi_means(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size); +unsigned char * SZ_compress_float_3D_MDQ_nonblocked_adaptive(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size); + +unsigned char * SZ_compress_float_2D_MDQ_decompression_random_access_with_blocked_regression(float *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size); +unsigned char * SZ_compress_float_1D_MDQ_decompression_random_access_with_blocked_regression(float *oriData, size_t r1, double realPrecision, size_t * comp_size); + +TightDataPointStorageF* SZ_compress_float_4D_MDQ(float *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, float valueRangeSize, float medianValue_f); + +char SZ_compress_args_float_NoCkRngeNoGzip_4D(unsigned char** newByteData, float *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, size_t *outSize, float valueRangeSize, float medianValue_f); + +TightDataPointStorageF* SZ_compress_float_1D_MDQ_MSST19(float *oriData, +size_t dataLength, double realPrecision, float valueRangeSize, float medianValue_f); +TightDataPointStorageF* SZ_compress_float_2D_MDQ_MSST19(float *oriData, size_t r1, size_t r2, double realPrecision, float valueRangeSize, float medianValue_f); +TightDataPointStorageF* SZ_compress_float_3D_MDQ_MSST19(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, float valueRangeSize, float medianValue_f); + +void SZ_compress_args_float_withinRange(unsigned char** newByteData, float *oriData, size_t dataLength, size_t *outSize); + +/*int SZ_compress_args_float_wRngeNoGzip(unsigned char** newByteData, float *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio, double pwrErrRatio);*/ + +int SZ_compress_args_float(int cmprType, int withRegression, unsigned char** newByteData, float *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRatio); + +int SZ_compress_args_float_subblock(unsigned char* compressedBytes, float *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, +size_t s5, size_t s4, size_t s3, size_t s2, size_t s1, +size_t e5, size_t e4, size_t e3, size_t e2, size_t e1, +size_t *outSize, int errBoundMode, double absErr_Bound, double relBoundRatio); + +void SZ_compress_args_float_NoCkRnge_1D_subblock(unsigned char* compressedBytes, float *oriData, double realPrecision, size_t *outSize, float valueRangeSize, float medianValue_f, +size_t r1, size_t s1, size_t e1); + +void SZ_compress_args_float_NoCkRnge_2D_subblock(unsigned char* compressedBytes, float *oriData, double realPrecision, size_t *outSize, float valueRangeSize, float medianValue_f, +size_t r2, size_t r1, size_t s2, size_t s1, size_t e2, size_t e1); + +void SZ_compress_args_float_NoCkRnge_3D_subblock(unsigned char* compressedBytes, float *oriData, double realPrecision, size_t *outSize, float valueRangeSize, float medianValue_f, +size_t r3, size_t r2, size_t r1, size_t s3, size_t s2, size_t s1, size_t e3, size_t e2, size_t e1); + +void SZ_compress_args_float_NoCkRnge_4D_subblock(unsigned char* compressedBytes, float *oriData, double realPrecision, size_t *outSize, float valueRangeSize, float medianValue_f, +size_t r4, size_t r3, size_t r2, size_t r1, size_t s4, size_t s3, size_t s2, size_t s1, size_t e4, size_t e3, size_t e2, size_t e1); + +unsigned int optimize_intervals_float_1D_subblock(float *oriData, double realPrecision, size_t r1, size_t s1, size_t e1); +unsigned int optimize_intervals_float_2D_subblock(float *oriData, double realPrecision, size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2); +unsigned int optimize_intervals_float_3D_subblock(float *oriData, double realPrecision, size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, size_t e2, size_t e3); +unsigned int optimize_intervals_float_4D_subblock(float *oriData, double realPrecision, size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4); + +TightDataPointStorageF* SZ_compress_float_1D_MDQ_subblock(float *oriData, double realPrecision, float valueRangeSize, float medianValue_f, +size_t r1, size_t s1, size_t e1); + +TightDataPointStorageF* SZ_compress_float_2D_MDQ_subblock(float *oriData, double realPrecision, float valueRangeSize, float medianValue_f, +size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2); + +TightDataPointStorageF* SZ_compress_float_3D_MDQ_subblock(float *oriData, double realPrecision, float valueRangeSize, float medianValue_f, +size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, size_t e2, size_t e3); + +TightDataPointStorageF* SZ_compress_float_4D_MDQ_subblock(float *oriData, double realPrecision, float valueRangeSize, float medianValue_f, +size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4); + + +unsigned int optimize_intervals_float_2D_with_freq_and_dense_pos(float *oriData, size_t r1, size_t r2, double realPrecision, float * dense_pos, float * max_freq, float * mean_freq); +unsigned int optimize_intervals_float_3D_with_freq_and_dense_pos(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, float * dense_pos, float * max_freq, float * mean_freq); + +unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(float *oriData, size_t r1, size_t r2, float realPrecision, size_t * comp_size); +unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(float *oriData, size_t r1, size_t r2, size_t r3, float realPrecision, size_t * comp_size); +unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size); +unsigned char * SZ_compress_float_3D_MDQ_decompression_random_access_with_blocked_regression(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size); +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZ_Float_H ----- */ + diff --git a/deps/SZ/sz/include/sz_float_pwr.h b/deps/SZ/sz/include/sz_float_pwr.h new file mode 100644 index 0000000000000000000000000000000000000000..7bbda8dd479504c6603e427e410c0a07d844a53c --- /dev/null +++ b/deps/SZ/sz/include/sz_float_pwr.h @@ -0,0 +1,66 @@ +/** + * @file sz_float.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the sz_float.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZ_Float_PWR_H +#define _SZ_Float_PWR_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +void compute_segment_precisions_float_1D(float *oriData, size_t dataLength, float* pwrErrBound, unsigned char* pwrErrBoundBytes, double globalPrecision); +unsigned int optimize_intervals_float_1D_pwr(float *oriData, size_t dataLength, float* pwrErrBound); + +void compute_segment_precisions_float_2D(float *oriData, float* pwrErrBound, +size_t r1, size_t r2, size_t R2, size_t edgeSize, unsigned char* pwrErrBoundBytes, float Min, float Max, double globalPrecision); + +unsigned int optimize_intervals_float_2D_pwr(float *oriData, size_t r1, size_t r2, size_t R2, size_t edgeSize, float* pwrErrBound); + +void compute_segment_precisions_float_3D(float *oriData, float* pwrErrBound, +size_t r1, size_t r2, size_t r3, size_t R2, size_t R3, size_t edgeSize, unsigned char* pwrErrBoundBytes, float Min, float Max, double globalPrecision); + +unsigned int optimize_intervals_float_3D_pwr(float *oriData, size_t r1, size_t r2, size_t r3, size_t R2, size_t R3, size_t edgeSize, float* pwrErrBound); + +void SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr(unsigned char** newByteData, float *oriData, double globalPrecision, size_t dataLength, size_t *outSize, float min, float max); + +void SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr(unsigned char** newByteData, float *oriData, double globalPrecision, size_t r1, size_t r2, +size_t *outSize, float min, float max); + +void SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr(unsigned char** newByteData, float *oriData, double globalPrecision, size_t r1, size_t r2, +size_t r3, size_t *outSize, float min, float max); + +void createRangeGroups_float(float** posGroups, float** negGroups, int** posFlags, int** negFlags); +void compressGroupIDArray_float(char* groupID, TightDataPointStorageF* tdps); +int* generateGroupLowerBounds(); +TightDataPointStorageF* SZ_compress_float_1D_MDQ_pwrGroup(float* oriData, size_t dataLength, int errBoundMode, +double absErrBound, double relBoundRatio, double pwrErrRatio, float valueRangeSize, float medianValue_f); + +void SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(unsigned char** newByteData, float *oriData, +size_t dataLength, double absErrBound, double relBoundRatio, double pwrErrRatio, float valueRangeSize, float medianValue_f, size_t *outSize); + +void SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t dataLength, size_t *outSize, float min, float max); +void SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t *outSize, float min, float max); +void SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max); + +void SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log_MSST19(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t dataLength, size_t *outSize, float valueRangeSize, float medianValue_f, + unsigned char* signs, bool* positive, float min, float max, float nearZero); +void SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log_MSST19(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t *outSize, float valueRangeSize, + unsigned char* signs, bool* positive, float min, float max, float nearZero); +void SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log_MSST19(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t r3, size_t *outSize, float valueRangeSize, + unsigned char* signs, bool* positive, float min, float max, float nearZero); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZ_Float_PWR_H ----- */ + diff --git a/deps/SZ/sz/include/sz_float_ts.h b/deps/SZ/sz/include/sz_float_ts.h new file mode 100644 index 0000000000000000000000000000000000000000..9f2301da99ebc426c695d7196df44f320f2dd87e --- /dev/null +++ b/deps/SZ/sz/include/sz_float_ts.h @@ -0,0 +1,27 @@ +/** + * @file sz_float_ts.h + * @author Sheng Di + * @date May, 2018 + * @brief Header file for the sz_float_ts.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ +#include "TightDataPointStorageF.h" + +#ifndef _SZ_Float_TS_H +#define _SZ_Float_TS_H + +#ifdef __cplusplus +extern "C" { +#endif +unsigned int optimize_intervals_float_1D_ts(float *oriData, size_t dataLength, float* preData, double realPrecision); + +TightDataPointStorageF* SZ_compress_float_1D_MDQ_ts(float *oriData, size_t dataLength, sz_multisteps* multisteps, +double realPrecision, float valueRangeSize, float medianValue_f); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZ_Float_TS_H ----- */ + diff --git a/deps/SZ/sz/include/sz_int16.h b/deps/SZ/sz/include/sz_int16.h new file mode 100644 index 0000000000000000000000000000000000000000..0ad62c5b9038b621d940e6cf3926a206a648d5e3 --- /dev/null +++ b/deps/SZ/sz/include/sz_int16.h @@ -0,0 +1,48 @@ +/** + * @file sz_int16.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the sz_int16.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZ_Int16_H +#define _SZ_Int16_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +unsigned int optimize_intervals_int16_1D(int16_t *oriData, size_t dataLength, double realPrecision); +unsigned int optimize_intervals_int16_2D(int16_t *oriData, size_t r1, size_t r2, double realPrecision); +unsigned int optimize_intervals_int16_3D(int16_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision); +unsigned int optimize_intervals_int16_4D(int16_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision); +TightDataPointStorageI* SZ_compress_int16_1D_MDQ(int16_t *oriData, size_t dataLength, double realPrecision, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_int16_StoreOriData(int16_t* oriData, size_t dataLength, TightDataPointStorageI* tdps, unsigned char** newByteData, size_t *outSize); +void SZ_compress_args_int16_NoCkRngeNoGzip_1D(unsigned char** newByteData, int16_t *oriData, +size_t dataLength, double realPrecision, size_t *outSize, int64_t valueRangeSize, int16_t minValue); +TightDataPointStorageI* SZ_compress_int16_2D_MDQ(int16_t *oriData, size_t r1, size_t r2, double realPrecision, int64_t valueRangeSize, int64_t minValue); +TightDataPointStorageI* SZ_compress_int16_3D_MDQ(int16_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_int16_NoCkRngeNoGzip_3D(unsigned char** newByteData, int16_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, int64_t valueRangeSize, int64_t minValue); +TightDataPointStorageI* SZ_compress_int16_4D_MDQ(int16_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_int16_NoCkRngeNoGzip_4D(unsigned char** newByteData, int16_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, +size_t *outSize, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_int16_withinRange(unsigned char** newByteData, int16_t *oriData, size_t dataLength, size_t *outSize); + +int SZ_compress_args_int16_wRngeNoGzip(unsigned char** newByteData, int16_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio); + +int SZ_compress_args_int16(unsigned char** newByteData, int16_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZ_Int16_H ----- */ + diff --git a/deps/SZ/sz/include/sz_int32.h b/deps/SZ/sz/include/sz_int32.h new file mode 100644 index 0000000000000000000000000000000000000000..a87825d0fcfcaa9757581d8ff1f05ea0161ddc7e --- /dev/null +++ b/deps/SZ/sz/include/sz_int32.h @@ -0,0 +1,48 @@ +/** + * @file sz_int32.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the sz_int32.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZ_Int32_H +#define _SZ_Int32_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +unsigned int optimize_intervals_int32_1D(int32_t *oriData, size_t dataLength, double realPrecision); +unsigned int optimize_intervals_int32_2D(int32_t *oriData, size_t r1, size_t r2, double realPrecision); +unsigned int optimize_intervals_int32_3D(int32_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision); +unsigned int optimize_intervals_int32_4D(int32_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision); +TightDataPointStorageI* SZ_compress_int32_1D_MDQ(int32_t *oriData, size_t dataLength, double realPrecision, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_int32_StoreOriData(int32_t* oriData, size_t dataLength, TightDataPointStorageI* tdps, unsigned char** newByteData, size_t *outSize); +void SZ_compress_args_int32_NoCkRngeNoGzip_1D(unsigned char** newByteData, int32_t *oriData, +size_t dataLength, double realPrecision, size_t *outSize, int64_t valueRangeSize, int32_t minValue); +TightDataPointStorageI* SZ_compress_int32_2D_MDQ(int32_t *oriData, size_t r1, size_t r2, double realPrecision, int64_t valueRangeSize, int64_t minValue); +TightDataPointStorageI* SZ_compress_int32_3D_MDQ(int32_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_int32_NoCkRngeNoGzip_3D(unsigned char** newByteData, int32_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, int64_t valueRangeSize, int64_t minValue); +TightDataPointStorageI* SZ_compress_int32_4D_MDQ(int32_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_int32_NoCkRngeNoGzip_4D(unsigned char** newByteData, int32_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, +size_t *outSize, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_int32_withinRange(unsigned char** newByteData, int32_t *oriData, size_t dataLength, size_t *outSize); + +int SZ_compress_args_int32_wRngeNoGzip(unsigned char** newByteData, int32_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio); + +int SZ_compress_args_int32(unsigned char** newByteData, int32_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZ_Int32_H ----- */ + diff --git a/deps/SZ/sz/include/sz_int64.h b/deps/SZ/sz/include/sz_int64.h new file mode 100644 index 0000000000000000000000000000000000000000..b7213b2315b551385c5e0c22d3dbd16a07291746 --- /dev/null +++ b/deps/SZ/sz/include/sz_int64.h @@ -0,0 +1,48 @@ +/** + * @file sz_int64.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the sz_int64.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZ_Int64_H +#define _SZ_Int64_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +unsigned int optimize_intervals_int64_1D(int64_t *oriData, size_t dataLength, double realPrecision); +unsigned int optimize_intervals_int64_2D(int64_t *oriData, size_t r1, size_t r2, double realPrecision); +unsigned int optimize_intervals_int64_3D(int64_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision); +unsigned int optimize_intervals_int64_4D(int64_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision); +TightDataPointStorageI* SZ_compress_int64_1D_MDQ(int64_t *oriData, size_t dataLength, double realPrecision, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_int64_StoreOriData(int64_t* oriData, size_t dataLength, TightDataPointStorageI* tdps, unsigned char** newByteData, size_t *outSize); +void SZ_compress_args_int64_NoCkRngeNoGzip_1D(unsigned char** newByteData, int64_t *oriData, +size_t dataLength, double realPrecision, size_t *outSize, int64_t valueRangeSize, int64_t minValue); +TightDataPointStorageI* SZ_compress_int64_2D_MDQ(int64_t *oriData, size_t r1, size_t r2, double realPrecision, int64_t valueRangeSize, int64_t minValue); +TightDataPointStorageI* SZ_compress_int64_3D_MDQ(int64_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_int64_NoCkRngeNoGzip_3D(unsigned char** newByteData, int64_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, int64_t valueRangeSize, int64_t minValue); +TightDataPointStorageI* SZ_compress_int64_4D_MDQ(int64_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_int64_NoCkRngeNoGzip_4D(unsigned char** newByteData, int64_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, +size_t *outSize, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_int64_withinRange(unsigned char** newByteData, int64_t *oriData, size_t dataLength, size_t *outSize); + +int SZ_compress_args_int64_wRngeNoGzip(unsigned char** newByteData, int64_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio); + +int SZ_compress_args_int64(unsigned char** newByteData, int64_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZ_Int64_H ----- */ + diff --git a/deps/SZ/sz/include/sz_int8.h b/deps/SZ/sz/include/sz_int8.h new file mode 100644 index 0000000000000000000000000000000000000000..c6ce758a79524ffc25e3803f4e03d31119a4f47d --- /dev/null +++ b/deps/SZ/sz/include/sz_int8.h @@ -0,0 +1,48 @@ +/** + * @file sz_int8.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the sz_int8.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZ_Int8_H +#define _SZ_Int8_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +unsigned int optimize_intervals_int8_1D(int8_t *oriData, size_t dataLength, double realPrecision); +unsigned int optimize_intervals_int8_2D(int8_t *oriData, size_t r1, size_t r2, double realPrecision); +unsigned int optimize_intervals_int8_3D(int8_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision); +unsigned int optimize_intervals_int8_4D(int8_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision); +TightDataPointStorageI* SZ_compress_int8_1D_MDQ(int8_t *oriData, size_t dataLength, double realPrecision, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_int8_StoreOriData(int8_t* oriData, size_t dataLength, TightDataPointStorageI* tdps, unsigned char** newByteData, size_t *outSize); +void SZ_compress_args_int8_NoCkRngeNoGzip_1D(unsigned char** newByteData, int8_t *oriData, +size_t dataLength, double realPrecision, size_t *outSize, int64_t valueRangeSize, int8_t minValue); +TightDataPointStorageI* SZ_compress_int8_2D_MDQ(int8_t *oriData, size_t r1, size_t r2, double realPrecision, int64_t valueRangeSize, int64_t minValue); +TightDataPointStorageI* SZ_compress_int8_3D_MDQ(int8_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_int8_NoCkRngeNoGzip_3D(unsigned char** newByteData, int8_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, int64_t valueRangeSize, int64_t minValue); +TightDataPointStorageI* SZ_compress_int8_4D_MDQ(int8_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_int8_NoCkRngeNoGzip_4D(unsigned char** newByteData, int8_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, +size_t *outSize, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_int8_withinRange(unsigned char** newByteData, int8_t *oriData, size_t dataLength, size_t *outSize); + +int SZ_compress_args_int8_wRngeNoGzip(unsigned char** newByteData, int8_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio); + +int SZ_compress_args_int8(unsigned char** newByteData, int8_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZ_Int8_H ----- */ + diff --git a/deps/SZ/sz/include/sz_omp.h b/deps/SZ/sz/include/sz_omp.h new file mode 100644 index 0000000000000000000000000000000000000000..cb83acbe9a897967054382fdd8698e5613289913 --- /dev/null +++ b/deps/SZ/sz/include/sz_omp.h @@ -0,0 +1,47 @@ +/** + * @file sz_omp.h + * @author Xin Liang + * @date July, 2017 + * @brief Header file for the sz_omp.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#ifdef _OPENMP +#include "omp.h" +#endif +#include "sz.h" + +#ifndef _SZ_OMP_H +#define _SZ_OMP_H + +#ifdef __cplusplus +extern "C" { +#endif + +unsigned char * SZ_compress_float_1D_MDQ_openmp(float *oriData, size_t r1, double realPrecision, size_t * comp_size); +unsigned char * SZ_compress_float_2D_MDQ_openmp(float *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size); +unsigned char * SZ_compress_float_3D_MDQ_openmp(float *oriData, size_t r1, size_t r2, size_t r3, float realPrecision, size_t * comp_size); + +void decompressDataSeries_float_1D_openmp(float** data, size_t r1, unsigned char* comp_data); +void decompressDataSeries_float_3D_openmp(float** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data); +void decompressDataSeries_float_2D_openmp(float** data, size_t r1, size_t r2, unsigned char* comp_data); + +unsigned char * SZ_compress_double_1D_MDQ_openmp(double *oriData, size_t r1, double realPrecision, size_t * comp_size); +unsigned char * SZ_compress_double_2D_MDQ_openmp(double *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size); +unsigned char * SZ_compress_double_3D_MDQ_openmp(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size); + +void decompressDataSeries_double_1D_openmp(double** data, size_t r1, unsigned char* comp_data); +void decompressDataSeries_double_2D_openmp(double** data, size_t r1, size_t r2, unsigned char* comp_data); +void decompressDataSeries_double_3D_openmp(double** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data); + +//void Huffman_init_openmp(HuffmanTree* huffmanTree, int *s, size_t length, int thread_num); +void Huffman_init_openmp(HuffmanTree* huffmanTree, int *s, size_t length, int thread_num, size_t * freq); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZ_OMP_H ----- */ diff --git a/deps/SZ/sz/include/sz_opencl.h b/deps/SZ/sz/include/sz_opencl.h new file mode 100644 index 0000000000000000000000000000000000000000..693256161c0600c97341033d7c28979eb5c90b9d --- /dev/null +++ b/deps/SZ/sz/include/sz_opencl.h @@ -0,0 +1,68 @@ +//make header C++/C inter-operable +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef SZ_OPENCL_H +#define SZ_OPENCL_H + +#include + + //opaque pointer for opencl state + struct sz_opencl_state; + + /** + * creates an opencl state for multiple uses of the compressor or + * returns an error code. + * + * \post if return code is SZ_NCES, the state object may only be passed to + * sz_opencl_release or sz_opencl_error_* otherwise it may be used in any + * sz_opencl_* function. + * + * \param[out] state the sz opencl state + * \return SZ_SCES for success or SZ_NCES on error + */ + int sz_opencl_init(struct sz_opencl_state** state); + + /** + * deinitializes an opencl state + * + * \param[in] state the sz opencl state + * \return SZ_SCES + */ + int sz_opencl_release(struct sz_opencl_state** state); + + /** + * returns a human readable error message for the last error recieved by state + * + * \param[in] state the sz opencl state + * \return a pointer to a string that describes the error + */ + const char* sz_opencl_error_msg(struct sz_opencl_state* state); + + + /** + * returns a numeric code for the last error recieved by state + * + * \param[in] state the sz opencl state + * \return the numeric error code + */ + int sz_opencl_error_code(struct sz_opencl_state* state); + + /** + * confirms that the sz opencl state is ready to use by performing a vector addition + * + * \param[in] state the sz opencl state + * \return SZ_SCES if the opencl implementation is functioning + */ + int sz_opencl_check(struct sz_opencl_state*); + + unsigned char* sz_compress_float3d_opencl(float* data, size_t r1, size_t r2, size_t r3, double, size_t* out_size); + + +#endif /* SZ_OPENCL_H */ + +//make header C++/C inter-operable +#ifdef __cplusplus +} +#endif diff --git a/deps/SZ/sz/include/sz_stats.h b/deps/SZ/sz/include/sz_stats.h new file mode 100644 index 0000000000000000000000000000000000000000..ba0f701ae918a9ac4d0ceb4b7b34f8054c0fea7f --- /dev/null +++ b/deps/SZ/sz/include/sz_stats.h @@ -0,0 +1,58 @@ +/** + * @file ByteToolkit.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the ByteToolkit.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _STATS_H +#define _STATS_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct sz_stats +{ + int use_mean; + + size_t blockSize; + + float lorenzoPercent; + float regressionPercent; + size_t lorenzoBlocks; + size_t regressionBlocks; + size_t totalBlocks; + + //size_t huffmanTreeHeight; + size_t huffmanTreeSize; //before the final zstd + size_t huffmanCodingSize; //before the final zstd + float huffmanCompressionRatio; + int huffmanNodeCount; + + size_t unpredictCount; + float unpredictPercent; + + float zstdCompressionRatio; //not available yet + +} sz_stats; + +extern sz_stats sz_stat; + + +void writeBlockInfo(int use_mean, size_t blockSize, size_t regressionBlocks, size_t totalBlocks); +void writeHuffmanInfo(size_t huffmanTreeSize, size_t huffmanCodingSize, size_t totalDataSize, int huffmanNocdeCount); +void writeZstdCompressionRatio(float zstdCompressionRatio); +void writeUnpredictDataCounts(size_t unpredictCount, size_t totalNumElements); +void printSZStats(); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _STATS_H ----- */ diff --git a/deps/SZ/sz/include/sz_uint16.h b/deps/SZ/sz/include/sz_uint16.h new file mode 100644 index 0000000000000000000000000000000000000000..eb2319772faa95eb2bef6d9db0130a3c52472229 --- /dev/null +++ b/deps/SZ/sz/include/sz_uint16.h @@ -0,0 +1,48 @@ +/** + * @file sz_uint16.h + * @author Sheng Di + * @date Nov, 2017 + * @brief Header file for the sz_uint16.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZ_UInt16_H +#define _SZ_UInt16_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +unsigned int optimize_intervals_uint16_1D(uint16_t *oriData, size_t dataLength, double realPrecision); +unsigned int optimize_intervals_uint16_2D(uint16_t *oriData, size_t r1, size_t r2, double realPrecision); +unsigned int optimize_intervals_uint16_3D(uint16_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision); +unsigned int optimize_intervals_uint16_4D(uint16_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision); +TightDataPointStorageI* SZ_compress_uint16_1D_MDQ(uint16_t *oriData, size_t dataLength, double realPrecision, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_uint16_StoreOriData(uint16_t* oriData, size_t dataLength, TightDataPointStorageI* tdps, unsigned char** newByteData, size_t *outSize); +void SZ_compress_args_uint16_NoCkRngeNoGzip_1D(unsigned char** newByteData, uint16_t *oriData, +size_t dataLength, double realPrecision, size_t *outSize, int64_t valueRangeSize, uint16_t minValue); +TightDataPointStorageI* SZ_compress_uint16_2D_MDQ(uint16_t *oriData, size_t r1, size_t r2, double realPrecision, int64_t valueRangeSize, int64_t minValue); +TightDataPointStorageI* SZ_compress_uint16_3D_MDQ(uint16_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_uint16_NoCkRngeNoGzip_3D(unsigned char** newByteData, uint16_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, int64_t valueRangeSize, int64_t minValue); +TightDataPointStorageI* SZ_compress_uint16_4D_MDQ(uint16_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_uint16_NoCkRngeNoGzip_4D(unsigned char** newByteData, uint16_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, +size_t *outSize, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_uint16_withinRange(unsigned char** newByteData, uint16_t *oriData, size_t dataLength, size_t *outSize); + +int SZ_compress_args_uint16_wRngeNoGzip(unsigned char** newByteData, uint16_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio); + +int SZ_compress_args_uint16(unsigned char** newByteData, uint16_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZ_UInt16_H ----- */ + diff --git a/deps/SZ/sz/include/sz_uint32.h b/deps/SZ/sz/include/sz_uint32.h new file mode 100644 index 0000000000000000000000000000000000000000..8adb31d3fc19446fa8b71dcfb6cdc2b2ea8c9556 --- /dev/null +++ b/deps/SZ/sz/include/sz_uint32.h @@ -0,0 +1,48 @@ +/** + * @file sz_uint32.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the sz_uint32.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZ_UInt32_H +#define _SZ_UInt32_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +unsigned int optimize_intervals_uint32_1D(uint32_t *oriData, size_t dataLength, double realPrecision); +unsigned int optimize_intervals_uint32_2D(uint32_t *oriData, size_t r1, size_t r2, double realPrecision); +unsigned int optimize_intervals_uint32_3D(uint32_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision); +unsigned int optimize_intervals_uint32_4D(uint32_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision); +TightDataPointStorageI* SZ_compress_uint32_1D_MDQ(uint32_t *oriData, size_t dataLength, double realPrecision, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_uint32_StoreOriData(uint32_t* oriData, size_t dataLength, TightDataPointStorageI* tdps, unsigned char** newByteData, size_t *outSize); +void SZ_compress_args_uint32_NoCkRngeNoGzip_1D(unsigned char** newByteData, uint32_t *oriData, +size_t dataLength, double realPrecision, size_t *outSize, int64_t valueRangeSize, uint32_t minValue); +TightDataPointStorageI* SZ_compress_uint32_2D_MDQ(uint32_t *oriData, size_t r1, size_t r2, double realPrecision, int64_t valueRangeSize, int64_t minValue); +TightDataPointStorageI* SZ_compress_uint32_3D_MDQ(uint32_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_uint32_NoCkRngeNoGzip_3D(unsigned char** newByteData, uint32_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, int64_t valueRangeSize, int64_t minValue); +TightDataPointStorageI* SZ_compress_uint32_4D_MDQ(uint32_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_uint32_NoCkRngeNoGzip_4D(unsigned char** newByteData, uint32_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, +size_t *outSize, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_uint32_withinRange(unsigned char** newByteData, uint32_t *oriData, size_t dataLength, size_t *outSize); + +int SZ_compress_args_uint32_wRngeNoGzip(unsigned char** newByteData, uint32_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio); + +int SZ_compress_args_uint32(unsigned char** newByteData, uint32_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZ_UInt32_H ----- */ + diff --git a/deps/SZ/sz/include/sz_uint64.h b/deps/SZ/sz/include/sz_uint64.h new file mode 100644 index 0000000000000000000000000000000000000000..7717aa2d5fa82d9f2415fb50af62d936b9d10bfb --- /dev/null +++ b/deps/SZ/sz/include/sz_uint64.h @@ -0,0 +1,48 @@ +/** + * @file sz_uint64.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the sz_uint64.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZ_UInt64_H +#define _SZ_UInt64_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +unsigned int optimize_intervals_uint64_1D(uint64_t *oriData, size_t dataLength, double realPrecision); +unsigned int optimize_intervals_uint64_2D(uint64_t *oriData, size_t r1, size_t r2, double realPrecision); +unsigned int optimize_intervals_uint64_3D(uint64_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision); +unsigned int optimize_intervals_uint64_4D(uint64_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision); +TightDataPointStorageI* SZ_compress_uint64_1D_MDQ(uint64_t *oriData, size_t dataLength, double realPrecision, uint64_t valueRangeSize, uint64_t minValue); +void SZ_compress_args_uint64_StoreOriData(uint64_t* oriData, size_t dataLength, TightDataPointStorageI* tdps, unsigned char** newByteData, size_t *outSize); +void SZ_compress_args_uint64_NoCkRngeNoGzip_1D(unsigned char** newByteData, uint64_t *oriData, +size_t dataLength, double realPrecision, size_t *outSize, uint64_t valueRangeSize, uint64_t minValue); +TightDataPointStorageI* SZ_compress_uint64_2D_MDQ(uint64_t *oriData, size_t r1, size_t r2, double realPrecision, uint64_t valueRangeSize, uint64_t minValue); +TightDataPointStorageI* SZ_compress_uint64_3D_MDQ(uint64_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, uint64_t valueRangeSize, uint64_t minValue); +void SZ_compress_args_uint64_NoCkRngeNoGzip_3D(unsigned char** newByteData, uint64_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, uint64_t valueRangeSize, uint64_t minValue); +TightDataPointStorageI* SZ_compress_uint64_4D_MDQ(uint64_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, uint64_t valueRangeSize, uint64_t minValue); +void SZ_compress_args_uint64_NoCkRngeNoGzip_4D(unsigned char** newByteData, uint64_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, +size_t *outSize, uint64_t valueRangeSize, uint64_t minValue); +void SZ_compress_args_uint64_withinRange(unsigned char** newByteData, uint64_t *oriData, size_t dataLength, size_t *outSize); + +int SZ_compress_args_uint64_wRngeNoGzip(unsigned char** newByteData, uint64_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio); + +int SZ_compress_args_uint64(unsigned char** newByteData, uint64_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZ_UInt64_H ----- */ + diff --git a/deps/SZ/sz/include/sz_uint8.h b/deps/SZ/sz/include/sz_uint8.h new file mode 100644 index 0000000000000000000000000000000000000000..9de3a117b557715fed450978e4b54b36f094e239 --- /dev/null +++ b/deps/SZ/sz/include/sz_uint8.h @@ -0,0 +1,48 @@ +/** + * @file sz_uint8.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the sz_uint8.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZ_UInt8_H +#define _SZ_UInt8_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +unsigned int optimize_intervals_uint8_1D(uint8_t *oriData, size_t dataLength, double realPrecision); +unsigned int optimize_intervals_uint8_2D(uint8_t *oriData, size_t r1, size_t r2, double realPrecision); +unsigned int optimize_intervals_uint8_3D(uint8_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision); +unsigned int optimize_intervals_uint8_4D(uint8_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision); +TightDataPointStorageI* SZ_compress_uint8_1D_MDQ(uint8_t *oriData, size_t dataLength, double realPrecision, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_uint8_StoreOriData(uint8_t* oriData, size_t dataLength, TightDataPointStorageI* tdps, unsigned char** newByteData, size_t *outSize); +void SZ_compress_args_uint8_NoCkRngeNoGzip_1D(unsigned char** newByteData, uint8_t *oriData, +size_t dataLength, double realPrecision, size_t *outSize, int64_t valueRangeSize, uint8_t minValue); +TightDataPointStorageI* SZ_compress_uint8_2D_MDQ(uint8_t *oriData, size_t r1, size_t r2, double realPrecision, int64_t valueRangeSize, int64_t minValue); +TightDataPointStorageI* SZ_compress_uint8_3D_MDQ(uint8_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_uint8_NoCkRngeNoGzip_3D(unsigned char** newByteData, uint8_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, int64_t valueRangeSize, int64_t minValue); +TightDataPointStorageI* SZ_compress_uint8_4D_MDQ(uint8_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_uint8_NoCkRngeNoGzip_4D(unsigned char** newByteData, uint8_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, +size_t *outSize, int64_t valueRangeSize, int64_t minValue); +void SZ_compress_args_uint8_withinRange(unsigned char** newByteData, uint8_t *oriData, size_t dataLength, size_t *outSize); + +int SZ_compress_args_uint8_wRngeNoGzip(unsigned char** newByteData, uint8_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio); + +int SZ_compress_args_uint8(unsigned char** newByteData, uint8_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZ_UInt8_H ----- */ + diff --git a/deps/SZ/sz/include/szd_double.h b/deps/SZ/sz/include/szd_double.h new file mode 100644 index 0000000000000000000000000000000000000000..3fcf48bc6f957b8b40a98774c35f12276379d2d6 --- /dev/null +++ b/deps/SZ/sz/include/szd_double.h @@ -0,0 +1,43 @@ +/** + * @file szd_double.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the szd_double.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZD_Double_H +#define _SZD_Double_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "TightDataPointStorageD.h" + +void decompressDataSeries_double_1D(double** data, size_t dataSeriesLength, double* hist_data, TightDataPointStorageD* tdps); +void decompressDataSeries_double_2D(double** data, size_t r1, size_t r2, double* hist_data, TightDataPointStorageD* tdps); +void decompressDataSeries_double_3D(double** data, size_t r1, size_t r2, size_t r3, double* hist_data, TightDataPointStorageD* tdps); +void decompressDataSeries_double_4D(double** data, size_t r1, size_t r2, size_t r3, size_t r4, double* hist_data, TightDataPointStorageD* tdps); + +void decompressDataSeries_double_1D_MSST19(double** data, size_t dataSeriesLength, TightDataPointStorageD* tdps); +void decompressDataSeries_double_2D_MSST19(double** data, size_t r1, size_t r2, TightDataPointStorageD* tdps); +void decompressDataSeries_double_3D_MSST19(double** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageD* tdps); + +void getSnapshotData_double_1D(double** data, size_t dataSeriesLength, TightDataPointStorageD* tdps, int errBoundMode, int compressionType, double* hist_data); +void getSnapshotData_double_2D(double** data, size_t r1, size_t r2, TightDataPointStorageD* tdps, int errBoundMode, int compressionType, double* hist_data); +void getSnapshotData_double_3D(double** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageD* tdps, int errBoundMode, int compressionType, double* hist_data); +void getSnapshotData_double_4D(double** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageD* tdps, int errBoundMode, int compressionType, double* hist_data); +void decompressDataSeries_double_2D_nonblocked_with_blocked_regression(double** data, size_t r1, size_t r2, unsigned char* comp_data, double* hist_data); +void decompressDataSeries_double_3D_nonblocked_with_blocked_regression(double** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data, double* hist_data); + +size_t decompressDataSeries_double_3D_RA_block(double * data, double mean, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, double realPrecision, int * type, double * unpredictable_data); + +int SZ_decompress_args_double(double** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize, int compressionType, double* hist_data); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZD_Double_H ----- */ diff --git a/deps/SZ/sz/include/szd_double_pwr.h b/deps/SZ/sz/include/szd_double_pwr.h new file mode 100644 index 0000000000000000000000000000000000000000..e3bffb5a2d0cf00013518839459c2bc9f544a746 --- /dev/null +++ b/deps/SZ/sz/include/szd_double_pwr.h @@ -0,0 +1,36 @@ +/** + * @file szd_double_pwr.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the szd_double_pwr.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZD_Double_PWR_H +#define _SZD_Double_PWR_H + +#ifdef __cplusplus +extern "C" { +#endif + +void decompressDataSeries_double_1D_pwr(double** data, size_t dataSeriesLength, TightDataPointStorageD* tdps); +double* extractRealPrecision_2D_double(size_t R1, size_t R2, int blockSize, TightDataPointStorageD* tdps); +void decompressDataSeries_double_2D_pwr(double** data, size_t r1, size_t r2, TightDataPointStorageD* tdps); +double* extractRealPrecision_3D_double(size_t R1, size_t R2, size_t R3, int blockSize, TightDataPointStorageD* tdps); +void decompressDataSeries_double_3D_pwr(double** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageD* tdps); + +void decompressDataSeries_double_1D_pwrgroup(double** data, size_t dataSeriesLength, TightDataPointStorageD* tdps); +void decompressDataSeries_double_1D_pwr_pre_log(double** data, size_t dataSeriesLength, TightDataPointStorageD* tdps); +void decompressDataSeries_double_2D_pwr_pre_log(double** data, size_t r1, size_t r2, TightDataPointStorageD* tdps); +void decompressDataSeries_double_3D_pwr_pre_log(double** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageD* tdps); + +void decompressDataSeries_double_1D_pwr_pre_log_MSST19(double** data, size_t dataSeriesLength, TightDataPointStorageD* tdps); +void decompressDataSeries_double_2D_pwr_pre_log_MSST19(double** data, size_t r1, size_t r2, TightDataPointStorageD* tdps); +void decompressDataSeries_double_3D_pwr_pre_log_MSST19(double** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageD* tdps); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZD_Double_PWR_H ----- */ diff --git a/deps/SZ/sz/include/szd_double_ts.h b/deps/SZ/sz/include/szd_double_ts.h new file mode 100644 index 0000000000000000000000000000000000000000..6f7a768aef85ede04de1cd3b3ae2a0061b3654b9 --- /dev/null +++ b/deps/SZ/sz/include/szd_double_ts.h @@ -0,0 +1,25 @@ +/** + * @file szd_double_ts.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the szd_double_ts.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZD_Double_TS_H +#define _SZD_Double_TS_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "TightDataPointStorageD.h" + +void decompressDataSeries_double_1D_ts(double** data, size_t dataSeriesLength, double* hist_data, TightDataPointStorageD* tdps); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZD_Double_TS_H ----- */ diff --git a/deps/SZ/sz/include/szd_float.h b/deps/SZ/sz/include/szd_float.h new file mode 100644 index 0000000000000000000000000000000000000000..d11bba4478c6264b39008845d4a7abd994f40e66 --- /dev/null +++ b/deps/SZ/sz/include/szd_float.h @@ -0,0 +1,58 @@ +/** + * @file szd_float.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the szd_float.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZD_Float_H +#define _SZD_Float_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "TightDataPointStorageF.h" + +void decompressDataSeries_float_1D(float** data, size_t dataSeriesLength, float* hist_data, TightDataPointStorageF* tdps); +void decompressDataSeries_float_2D(float** data, size_t r1, size_t r2, float* hist_data, TightDataPointStorageF* tdps); +void decompressDataSeries_float_3D(float** data, size_t r1, size_t r2, size_t r3, float* hist_data, TightDataPointStorageF* tdps); +void decompressDataSeries_float_4D(float** data, size_t r1, size_t r2, size_t r3, size_t r4, float* hist_data, TightDataPointStorageF* tdps); + +void decompressDataSeries_float_1D_MSST19(float** data, size_t dataSeriesLength, TightDataPointStorageF* tdps); +void decompressDataSeries_float_2D_MSST19(float** data, size_t r1, size_t r2, TightDataPointStorageF* tdps); +void decompressDataSeries_float_3D_MSST19(float** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageF* tdps); + +void getSnapshotData_float_1D(float** data, size_t dataSeriesLength, TightDataPointStorageF* tdps, int errBoundMode, int compressionType, float* hist_data); +void getSnapshotData_float_2D(float** data, size_t r1, size_t r2, TightDataPointStorageF* tdps, int errBoundMode, int compressionType, float* hist_data); +void getSnapshotData_float_3D(float** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageF* tdps, int errBoundMode, int compressionType, float* hist_data); +void getSnapshotData_float_4D(float** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageF* tdps, int errBoundMode, int compressionType, float* hist_data); + +size_t decompressDataSeries_float_1D_RA_block(float * data, float mean, size_t dim_0, size_t block_dim_0, double realPrecision, int * type, float * unpredictable_data); +size_t decompressDataSeries_float_2D_RA_block(float * data, float mean, size_t dim_0, size_t dim_1, size_t block_dim_0, size_t block_dim_1, double realPrecision, int * type, float * unpredictable_data); + +int SZ_decompress_args_float(float** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize, int compressionType, float* hist_data); + +size_t decompressDataSeries_float_3D_RA_block(float * data, float mean, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, double realPrecision, int * type, float * unpredictable_data); + +void decompressDataSeries_float_1D_decompression_given_areas_with_blocked_regression(float** data, size_t r1, size_t s1, size_t e1, unsigned char* comp_data); + +void decompressDataSeries_float_2D_nonblocked_with_blocked_regression(float** data, size_t r1, size_t r2, unsigned char* comp_data, float* hist_data); +void decompressDataSeries_float_2D_decompression_given_areas_with_blocked_regression(float** data, size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2, unsigned char* comp_data); +void decompressDataSeries_float_3D_nonblocked_with_blocked_regression(float** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data, float* hist_data); +void decompressDataSeries_float_3D_random_access_with_blocked_regression(float** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data); +void decompressDataSeries_float_3D_decompression_random_access_with_blocked_regression(float** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data); +void decompressDataSeries_float_3D_decompression_given_areas_with_blocked_regression(float** data, size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, size_t e2, size_t e3, unsigned char* comp_data); +int SZ_decompress_args_randomaccess_float(float** newData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, +size_t s5, size_t s4, size_t s3, size_t s2, size_t s1, // start point +size_t e5, size_t e4, size_t e3, size_t e2, size_t e1, // end point +unsigned char* cmpBytes, size_t cmpSize); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZD_Float_H ----- */ diff --git a/deps/SZ/sz/include/szd_float_pwr.h b/deps/SZ/sz/include/szd_float_pwr.h new file mode 100644 index 0000000000000000000000000000000000000000..35249027f0116b98f3f5771473872db1a87cd9dc --- /dev/null +++ b/deps/SZ/sz/include/szd_float_pwr.h @@ -0,0 +1,38 @@ +/** + * @file szd_float_pwr.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the szd_float_pwr.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZD_Float_PWR_H +#define _SZD_Float_PWR_H + +#ifdef __cplusplus +extern "C" { +#endif + +void decompressDataSeries_float_1D_pwr(float** data, size_t dataSeriesLength, TightDataPointStorageF* tdps); +float* extractRealPrecision_2D_float(size_t R1, size_t R2, int blockSize, TightDataPointStorageF* tdps); +void decompressDataSeries_float_2D_pwr(float** data, size_t r1, size_t r2, TightDataPointStorageF* tdps); +float* extractRealPrecision_3D_float(size_t R1, size_t R2, size_t R3, int blockSize, TightDataPointStorageF* tdps); +void decompressDataSeries_float_3D_pwr(float** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageF* tdps); + +char* decompressGroupIDArray(unsigned char* bytes, size_t dataLength); +void decompressDataSeries_float_1D_pwrgroup(float** data, size_t dataSeriesLength, TightDataPointStorageF* tdps); +void decompressDataSeries_float_1D_pwr_pre_log(float** data, size_t dataSeriesLength, TightDataPointStorageF* tdps); +void decompressDataSeries_float_2D_pwr_pre_log(float** data, size_t r1, size_t r2, TightDataPointStorageF* tdps); +void decompressDataSeries_float_3D_pwr_pre_log(float** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageF* tdps); + +void decompressDataSeries_float_1D_pwr_pre_log_MSST19(float** data, size_t dataSeriesLength, TightDataPointStorageF* tdps); +void decompressDataSeries_float_2D_pwr_pre_log_MSST19(float** data, size_t r1, size_t r2, TightDataPointStorageF* tdps); +void decompressDataSeries_float_3D_pwr_pre_log_MSST19(float** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageF* tdps); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZD_Float_PWR_H ----- */ + diff --git a/deps/SZ/sz/include/szd_float_ts.h b/deps/SZ/sz/include/szd_float_ts.h new file mode 100644 index 0000000000000000000000000000000000000000..88ea07f4a8c09a8e23322315ab02438c2abbeabb --- /dev/null +++ b/deps/SZ/sz/include/szd_float_ts.h @@ -0,0 +1,25 @@ +/** + * @file szd_float_ts.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the szd_float_ts.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZD_Float_TS_H +#define _SZD_Float_TS_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "TightDataPointStorageF.h" + +void decompressDataSeries_float_1D_ts(float** data, size_t dataSeriesLength, float* hist_data, TightDataPointStorageF* tdps); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZD_Float_TS_H ----- */ diff --git a/deps/SZ/sz/include/szd_int16.h b/deps/SZ/sz/include/szd_int16.h new file mode 100644 index 0000000000000000000000000000000000000000..a55a3d0487bfb45e6efa553e2dacbcf0a89d9ae2 --- /dev/null +++ b/deps/SZ/sz/include/szd_int16.h @@ -0,0 +1,38 @@ +/** + * @file szd_int16.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the szd_int16.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZD_Int16_H +#define _SZD_Int16_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "TightDataPointStorageI.h" + +#define SZ_INT16_MIN -32768 +#define SZ_INT16_MAX 32767 + +void decompressDataSeries_int16_1D(int16_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps); +void decompressDataSeries_int16_2D(int16_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps); +void decompressDataSeries_int16_3D(int16_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps); +void decompressDataSeries_int16_4D(int16_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps); + +void getSnapshotData_int16_1D(int16_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps, int errBoundMode); +void getSnapshotData_int16_2D(int16_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps, int errBoundMode); +void getSnapshotData_int16_3D(int16_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps, int errBoundMode); +void getSnapshotData_int16_4D(int16_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps, int errBoundMode); + +int SZ_decompress_args_int16(int16_t** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZD_Int16_H ----- */ diff --git a/deps/SZ/sz/include/szd_int32.h b/deps/SZ/sz/include/szd_int32.h new file mode 100644 index 0000000000000000000000000000000000000000..233901f54e88b1d13586d2533fc16775f9d9f17a --- /dev/null +++ b/deps/SZ/sz/include/szd_int32.h @@ -0,0 +1,38 @@ +/** + * @file szd_int32.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the szd_int32.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZD_Int32_H +#define _SZD_Int32_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "TightDataPointStorageI.h" + +#define SZ_INT32_MIN -2147483648 +#define SZ_INT32_MAX 2147483647 + +void decompressDataSeries_int32_1D(int32_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps); +void decompressDataSeries_int32_2D(int32_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps); +void decompressDataSeries_int32_3D(int32_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps); +void decompressDataSeries_int32_4D(int32_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps); + +void getSnapshotData_int32_1D(int32_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps, int errBoundMode); +void getSnapshotData_int32_2D(int32_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps, int errBoundMode); +void getSnapshotData_int32_3D(int32_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps, int errBoundMode); +void getSnapshotData_int32_4D(int32_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps, int errBoundMode); + +int SZ_decompress_args_int32(int32_t** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZD_Int32_H ----- */ diff --git a/deps/SZ/sz/include/szd_int64.h b/deps/SZ/sz/include/szd_int64.h new file mode 100644 index 0000000000000000000000000000000000000000..5dcb97ac9be5bf6f544f29455189cd50ca878c25 --- /dev/null +++ b/deps/SZ/sz/include/szd_int64.h @@ -0,0 +1,35 @@ +/** + * @file szd_int64.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the szd_int64.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZD_Int64_H +#define _SZD_Int64_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "TightDataPointStorageI.h" + +void decompressDataSeries_int64_1D(int64_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps); +void decompressDataSeries_int64_2D(int64_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps); +void decompressDataSeries_int64_3D(int64_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps); +void decompressDataSeries_int64_4D(int64_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps); + +void getSnapshotData_int64_1D(int64_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps, int errBoundMode); +void getSnapshotData_int64_2D(int64_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps, int errBoundMode); +void getSnapshotData_int64_3D(int64_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps, int errBoundMode); +void getSnapshotData_int64_4D(int64_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps, int errBoundMode); + +int SZ_decompress_args_int64(int64_t** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZD_Int64_H ----- */ diff --git a/deps/SZ/sz/include/szd_int8.h b/deps/SZ/sz/include/szd_int8.h new file mode 100644 index 0000000000000000000000000000000000000000..c6186f866d008fa27e2978c89b4d207cd7426a2a --- /dev/null +++ b/deps/SZ/sz/include/szd_int8.h @@ -0,0 +1,38 @@ +/** + * @file szd_int8.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the szd_int8.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZD_Int8_H +#define _SZD_Int8_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "TightDataPointStorageI.h" + +#define SZ_INT8_MIN -128 +#define SZ_INT8_MAX 127 + +void decompressDataSeries_int8_1D(int8_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps); +void decompressDataSeries_int8_2D(int8_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps); +void decompressDataSeries_int8_3D(int8_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps); +void decompressDataSeries_int8_4D(int8_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps); + +void getSnapshotData_int8_1D(int8_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps, int errBoundMode); +void getSnapshotData_int8_2D(int8_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps, int errBoundMode); +void getSnapshotData_int8_3D(int8_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps, int errBoundMode); +void getSnapshotData_int8_4D(int8_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps, int errBoundMode); + +int SZ_decompress_args_int8(int8_t** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZD_Int8_H ----- */ diff --git a/deps/SZ/sz/include/szd_uint16.h b/deps/SZ/sz/include/szd_uint16.h new file mode 100644 index 0000000000000000000000000000000000000000..dcd3ed830703818dba7fa5b8b71c84ac448b205a --- /dev/null +++ b/deps/SZ/sz/include/szd_uint16.h @@ -0,0 +1,38 @@ +/** + * @file szd_uint16.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the szd_uint16.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZD_UInt16_H +#define _SZD_UInt16_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "TightDataPointStorageI.h" + +#define SZ_UINT16_MIN 0 +#define SZ_UINT16_MAX 65535 + +void decompressDataSeries_uint16_1D(uint16_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps); +void decompressDataSeries_uint16_2D(uint16_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps); +void decompressDataSeries_uint16_3D(uint16_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps); +void decompressDataSeries_uint16_4D(uint16_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps); + +void getSnapshotData_uint16_1D(uint16_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps, int errBoundMode); +void getSnapshotData_uint16_2D(uint16_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps, int errBoundMode); +void getSnapshotData_uint16_3D(uint16_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps, int errBoundMode); +void getSnapshotData_uint16_4D(uint16_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps, int errBoundMode); + +int SZ_decompress_args_uint16(uint16_t** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZD_Int16_H ----- */ diff --git a/deps/SZ/sz/include/szd_uint32.h b/deps/SZ/sz/include/szd_uint32.h new file mode 100644 index 0000000000000000000000000000000000000000..88ff5708a80aea4f28bfd4819b40a39c3f42e36e --- /dev/null +++ b/deps/SZ/sz/include/szd_uint32.h @@ -0,0 +1,38 @@ +/** + * @file szd_uint32.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the szd_uint32.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZD_UInt32_H +#define _SZD_UInt32_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "TightDataPointStorageI.h" + +#define SZ_UINT32_MIN 0 +#define SZ_UINT32_MAX 4294967295 + +void decompressDataSeries_uint32_1D(uint32_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps); +void decompressDataSeries_uint32_2D(uint32_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps); +void decompressDataSeries_uint32_3D(uint32_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps); +void decompressDataSeries_uint32_4D(uint32_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps); + +void getSnapshotData_uint32_1D(uint32_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps, int errBoundMode); +void getSnapshotData_uint32_2D(uint32_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps, int errBoundMode); +void getSnapshotData_uint32_3D(uint32_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps, int errBoundMode); +void getSnapshotData_uint32_4D(uint32_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps, int errBoundMode); + +int SZ_decompress_args_uint32(uint32_t** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZD_UInt32_H ----- */ diff --git a/deps/SZ/sz/include/szd_uint64.h b/deps/SZ/sz/include/szd_uint64.h new file mode 100644 index 0000000000000000000000000000000000000000..6992c68ebceaaeae5be46e6a2228457e1ee85cd2 --- /dev/null +++ b/deps/SZ/sz/include/szd_uint64.h @@ -0,0 +1,35 @@ +/** + * @file szd_uint64.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the szd_uint64.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZD_UInt64_H +#define _SZD_UInt64_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "TightDataPointStorageI.h" + +void decompressDataSeries_uint64_1D(uint64_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps); +void decompressDataSeries_uint64_2D(uint64_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps); +void decompressDataSeries_uint64_3D(uint64_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps); +void decompressDataSeries_uint64_4D(uint64_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps); + +void getSnapshotData_uint64_1D(uint64_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps, int errBoundMode); +void getSnapshotData_uint64_2D(uint64_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps, int errBoundMode); +void getSnapshotData_uint64_3D(uint64_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps, int errBoundMode); +void getSnapshotData_uint64_4D(uint64_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps, int errBoundMode); + +int SZ_decompress_args_uint64(uint64_t** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZD_UInt64_H ----- */ diff --git a/deps/SZ/sz/include/szd_uint8.h b/deps/SZ/sz/include/szd_uint8.h new file mode 100644 index 0000000000000000000000000000000000000000..2366c7e08e0f048c4634f7689e65d3b48fcc7bf2 --- /dev/null +++ b/deps/SZ/sz/include/szd_uint8.h @@ -0,0 +1,38 @@ +/** + * @file szd_uint8.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the szd_uint8.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZD_UInt8_H +#define _SZD_UInt8_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "TightDataPointStorageI.h" + +#define SZ_UINT8_MIN 0 +#define SZ_UINT8_MAX 255 + +void decompressDataSeries_uint8_1D(uint8_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps); +void decompressDataSeries_uint8_2D(uint8_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps); +void decompressDataSeries_uint8_3D(uint8_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps); +void decompressDataSeries_uint8_4D(uint8_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps); + +void getSnapshotData_uint8_1D(uint8_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps, int errBoundMode); +void getSnapshotData_uint8_2D(uint8_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps, int errBoundMode); +void getSnapshotData_uint8_3D(uint8_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps, int errBoundMode); +void getSnapshotData_uint8_4D(uint8_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps, int errBoundMode); + +int SZ_decompress_args_uint8(uint8_t** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZD_UInt8_H ----- */ diff --git a/deps/SZ/sz/include/szf.h b/deps/SZ/sz/include/szf.h new file mode 100644 index 0000000000000000000000000000000000000000..7cf2e838da3e157df1ce9ff8df6424c76b2c2ea8 --- /dev/null +++ b/deps/SZ/sz/include/szf.h @@ -0,0 +1,102 @@ +/** + * @file szf.h + * @author Sheng Di + * @date July, 2017 + * @brief Header file for the szf.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _SZF_H +#define _SZF_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +//szf.c +void sz_init_c_(char *configFile,int *len,int *ierr); +void sz_finalize_c_(); +void SZ_writeData_inBinary_d1_Float_(float* data, char *fileName, int *len); +void sz_compress_d1_float_(float* data, unsigned char *bytes, size_t *outSize, size_t *r1); +void sz_compress_d1_float_rev_(float* data, float *reservedValue, unsigned char *bytes, size_t *outSize, size_t *r1); +void sz_compress_d2_float_(float* data, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2); +void sz_compress_d2_float_rev_(float* data, float *reservedValue, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2); +void sz_compress_d3_float_(float* data, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2, size_t *r3); +void sz_compress_d3_float_rev_(float* data, float *reservedValue, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2, size_t *r3); +void sz_compress_d4_float_(float* data, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2, size_t *r3, size_t *r4); +void sz_compress_d4_float_rev_(float* data, float *reservedValue, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2, size_t *r3, size_t *r4); +void sz_compress_d5_float_(float* data, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5); +void sz_compress_d5_float_rev_(float* data, float *reservedValue, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5); + +void sz_compress_d1_double_(double* data, unsigned char *bytes, size_t *outSize, size_t *r1); +void sz_compress_d1_double_rev_(double* data, double *reservedValue, unsigned char *bytes, size_t *outSize, size_t *r1); +void sz_compress_d2_double_(double* data, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2); +void sz_compress_d2_double_rev_(double* data, double *reservedValue, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2); +void sz_compress_d3_double_(double* data, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2, size_t *r3); +void sz_compress_d3_double_rev_(double* data, double *reservedValue, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2, size_t *r3); +void sz_compress_d4_double_(double* data, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2, size_t *r3, size_t *r4); +void sz_compress_d4_double_rev_(double* data, double *reservedValue, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2, size_t *r3, size_t *r4); +void sz_compress_d5_double_(double* data, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5); +void sz_compress_d5_double_rev_(double* data, double *reservedValue, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5); + +void sz_compress_d1_float_args_(float* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1); +void sz_compress_d2_float_args_(float* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2); +void sz_compress_d3_float_args_(float* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3); +void sz_compress_d4_float_args_(float* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4); +void sz_compress_d5_float_args_(float* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5); +void sz_compress_d1_double_args_(double* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1); +void sz_compress_d2_double_args_(double* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2); +void sz_compress_d3_double_args_(double* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3); +void sz_compress_d4_double_args_(double* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4); +void sz_compress_d5_double_args_(double* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5); + +void sz_compress_d1_float_rev_args_(float* data, float *reservedValue, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1); +void sz_compress_d2_float_rev_args_(float* data, float *reservedValue, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2); +void sz_compress_d3_float_rev_args_(float* data, float *reservedValue, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3); +void sz_compress_d4_float_rev_args_(float* data, float *reservedValue, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4); +void sz_compress_d5_float_rev_args_(float* data, float *reservedValue, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5); +void sz_compress_d1_double_rev_args_(double* data, float *reservedValue, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1); +void sz_compress_d2_double_rev_args_(double* data, float *reservedValue, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2); +void sz_compress_d3_double_rev_args_(double* data, float *reservedValue, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3); +void sz_compress_d4_double_rev_args_(double* data, double *reservedValue, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4); +void sz_compress_d5_double_rev_args_(double* data, double *reservedValue, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5); + +void sz_decompress_d1_float_(unsigned char *bytes, size_t *byteLength, float *data, size_t *r1); +void sz_decompress_d2_float_(unsigned char *bytes, size_t *byteLength, float *data, size_t *r1, size_t *r2); +void sz_decompress_d3_float_(unsigned char *bytes, size_t *byteLength, float *data, size_t *r1, size_t *r2, size_t *r3); +void sz_decompress_d4_float_(unsigned char *bytes, size_t *byteLength, float *data, size_t *r1, size_t *r2, size_t *r3, size_t *r4); +void sz_decompress_d5_float_(unsigned char *bytes, size_t *byteLength, float *data, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5); +void sz_decompress_d1_double_(unsigned char *bytes, size_t *byteLength, double *data, size_t *r1); +void sz_decompress_d2_double_(unsigned char *bytes, size_t *byteLength, double *data, size_t *r1, size_t *r2); +void sz_decompress_d3_double_(unsigned char *bytes, size_t *byteLength, double *data, size_t *r1, size_t *r2, size_t *r3); +void sz_decompress_d4_double_(unsigned char *bytes, size_t *byteLength, double *data, size_t *r1, size_t *r2, size_t *r3, size_t *r4); +void sz_decompress_d5_double_(unsigned char *bytes, size_t *byteLength, double *data, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5); + +void sz_batchaddVar_d1_float_(int var_id, char* varName, int *len, float* data, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1); +void sz_batchaddvar_d2_float_(int var_id, char* varName, int *len, float* data, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2); +void sz_batchaddvar_d3_float_(int var_id, char* varName, int *len, float* data, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3); +void sz_batchaddvar_d4_float_(int var_id, char* varName, int *len, float* data, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4); +void sz_batchaddvar_d5_float_(int var_id, char* varName, int *len, float* data, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5); +void sz_batchaddvar_d1_double_(int var_id, char* varName, int *len, double* data, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1); +void sz_batchaddvar_d2_double_(int var_id, char* varName, int *len, double* data, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2); +void sz_batchaddvar_d3_double_(int var_id, char* varName, int *len, double* data, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3); +void sz_batchaddvar_d4_double_(int var_id, char* varName, int *len, double* data, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4); +void sz_batchaddvar_d5_double_(int var_id, char* varName, int *len, double* data, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5); +void sz_batchdelvar_c_(char* varName, int *len, int *errState); +void sz_batch_compress_c_(unsigned char* bytes, size_t *outSize); +void sz_batch_decompress_c_(unsigned char* bytes, size_t *byteLength, int *ierr); +void sz_getvardim_c_(char* varName, int *len, int *dim, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5); +void compute_total_batch_size_c_(size_t *totalSize); +void sz_getvardata_float_(char* varName, int *len, float* data); +void sz_getvardata_double_(char* varName, int *len, double* data); +void sz_freevarset_c_(int *mode); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _SZF_H ----- */ + diff --git a/deps/SZ/sz/include/utility.h b/deps/SZ/sz/include/utility.h new file mode 100644 index 0000000000000000000000000000000000000000..f4ae6415890bc71139bc6300ca4c8a981c757833 --- /dev/null +++ b/deps/SZ/sz/include/utility.h @@ -0,0 +1,45 @@ +/** + * @file utility.h + * @author Sheng Di, Sihuan Li + * @date July, 2018 + * @brief Header file for the utility.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _UTILITY_H +#define _UTILITY_H + +#include "sz.h" + +#ifdef __cplusplus +extern "C" { +#endif + +//sihuan added: use a assistant struct to do sorting and swap that are easy to implement: should +//consider optimizing the performance later. +typedef struct sort_ast_particle{ + int64_t id; + float var[6]; +} sort_ast_particle; + +int compare_struct(const void* obj1, const void* obj2);//sihuan added: the compare function in the qsort parameter for 2 structures +void reorder_vars(SZ_VarSet* vset);//sihuan added: reorder the variables increasingly by their index +size_t intersectAndsort(int64_t* preIndex, size_t preLen, SZ_VarSet* curVar, size_t dataLen, unsigned char* bitarray); +//sihuan added: find intersection and keep new var sorted by id +void write_reordered_tofile(SZ_VarSet* curVar, size_t dataLen); +//sihuan added: write the reordered input to files for further decompression validation +float calculate_delta_t(size_t size);//sihuan added + +int is_lossless_compressed_data(unsigned char* compressedBytes, size_t cmpSize); +unsigned long sz_lossless_compress(int losslessCompressor, int level, unsigned char* data, unsigned long dataLength, unsigned char** compressBytes); +unsigned long sz_lossless_decompress(int losslessCompressor, unsigned char* compressBytes, unsigned long cmpSize, unsigned char** oriData, unsigned long targetOriSize); +unsigned long sz_lossless_decompress65536bytes(int losslessCompressor, unsigned char* compressBytes, unsigned long cmpSize, unsigned char** oriData); +void* detransposeData(void* data, int dataType, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1); +void* transposeData(void* data, int dataType, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _UTILITY_H ----- */ diff --git a/deps/SZ/sz/src/ArithmeticCoding.c b/deps/SZ/sz/src/ArithmeticCoding.c new file mode 100644 index 0000000000000000000000000000000000000000..278a2e917542dffaf2ecce457b17df618c17eca8 --- /dev/null +++ b/deps/SZ/sz/src/ArithmeticCoding.c @@ -0,0 +1,692 @@ +/** + * @file ArithmeticCoding.c + * @author Sheng Di, Mark Thomas Nelson + * @date April, 2016 + * @brief Byte Toolkit + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + * (C) The MIT License (MIT), this code was modified from Mark's arithmetic coding code: http://www.drdobbs.com/cpp/data-compression-with-arithmetic-encodin/240169251?pgno=1 + */ +#include +#include + +inline void output_bit_1(unsigned int* buf) +{ + (*buf) = (*buf) << 1; + (*buf) |= 1; +} + +inline void output_bit_0(unsigned int* buf) +{ + (*buf) = (*buf) << 1; + //(*byte) |= 0; //actually doesn't have to set the bit to 0 +} + +//TODO: problematic +inline unsigned int output_bit_1_plus_pending(int pending_bits) +{ + unsigned int buf = 0, pbits = pending_bits; + output_bit_1(&buf); + while(pbits--) + output_bit_0(&buf); + buf = buf << (32-(pending_bits+1)); //alignment to the left leading bit, which would be easier for the final output + return buf; +} + +inline unsigned int output_bit_0_plus_pending(int pending_bits) +{ + unsigned int buf = 0, pbits = pending_bits; + //output_bit_0(&buf); + while(pbits--) + output_bit_1(&buf); + buf = buf << (32-(pending_bits+1)); //alignment to the left leading bit + return buf; +} + +/** + * Create AriCoder for the following arithmetic encoding operation. + * In this function, it will compute the real frequency of the integer codes. + * @param int numOfStates (input): numOfStates is the real # states calculated to the optimization_num_of_interval code + * @param int *s (input): the integer code array (i.e., type_array generated by prediction+quantization) + * @param size_t length: the number of integer codes in the type_array + * + * */ +AriCoder *createAriCoder(int numOfStates, int *s, size_t length) +{ + AriCoder *ariCoder = (AriCoder*)malloc(sizeof(AriCoder)); + memset(ariCoder, 0, sizeof(AriCoder)); + ariCoder->numOfRealStates = numOfStates; + ari_init(ariCoder, s, length); + return ariCoder; +} + +void freeAriCoder(AriCoder *ariCoder) +{ + free(ariCoder->cumulative_frequency); + free(ariCoder); +} + +void ari_init(AriCoder *ariCoder, int *s, size_t length) +{ + size_t i; //# states is in the range of integer. + int index = 0; + size_t *freq = (size_t *)malloc(ariCoder->numOfRealStates*sizeof(size_t)); + memset(freq, 0, ariCoder->numOfRealStates*sizeof(size_t)); + for(i = 0;i < length;i++) + { + index = s[i]; + freq[index]++; + } + + int counter = 0; + size_t _sum = 0, sum = 0, freqDiv = 0; + ariCoder->cumulative_frequency = (Prob *)malloc(ariCoder->numOfRealStates*sizeof(Prob)); + + memset(ariCoder->cumulative_frequency, 0, ariCoder->numOfRealStates*sizeof(Prob)); + + if(length <= MAX_INTERVALS) + { + for (index = 0; index < ariCoder->numOfRealStates; index++) + { + if (freq[index]) + { + sum += freq[index]; + (ariCoder->cumulative_frequency[index]).low = _sum; + (ariCoder->cumulative_frequency[index]).high = sum; + (ariCoder->cumulative_frequency[index]).state = index; + _sum = sum; + counter++; + } + } + ariCoder->numOfValidStates = counter; + ariCoder->total_frequency = sum; + } + else + { + int intvSize = length%MAX_INTERVALS==0?length/MAX_INTERVALS:length/MAX_INTERVALS+1; + for (index = 0; index < ariCoder->numOfRealStates; index++) + { + if (freq[index]) + { + freqDiv = freq[index]/intvSize; //control the sum of frequency to be no greater than MAX_INTERVALS + if(freqDiv==0) + freqDiv = 1; + sum += freqDiv; + (ariCoder->cumulative_frequency[index]).low = _sum; + (ariCoder->cumulative_frequency[index]).high = sum; + (ariCoder->cumulative_frequency[index]).state = index; + _sum = sum; + counter++; + } + } + ariCoder->numOfValidStates = counter; + ariCoder->total_frequency = sum; + } + + free(freq); +} + +/** + * Convert AriCoder to bytes for storage + * @param AriCoder* ariCoder (input) + * @param unsigned char** out (output) + * + * @return outSize + * */ +unsigned int pad_ariCoder(AriCoder* ariCoder, unsigned char** out) +{ + int numOfRealStates = ariCoder->numOfRealStates; + int numOfValidStates = ariCoder->numOfValidStates; + uint64_t total_frequency = ariCoder->total_frequency; + Prob* cumulative_frequency = ariCoder->cumulative_frequency; + + unsigned int outSize = 0; + *out = (unsigned char*)malloc(2*sizeof(int)+sizeof(uint64_t)+sizeof(Prob)*numOfRealStates); + + unsigned char* p = *out; + intToBytes_bigEndian(p, numOfRealStates); + p+=sizeof(int); + intToBytes_bigEndian(p, numOfValidStates); + p+=sizeof(int); + int64ToBytes_bigEndian(p, total_frequency); + p+=sizeof(uint64_t); + size_t i = 0; + if(total_frequency <= 65536) + { + uint16_t low, high; + if(numOfRealStates<=256) + { + for(i=0;inumOfValidStates*5; //2*sizeof(uint16_t)+1 + } + else if(numOfRealStates<=65536) + { + for(i=0;inumOfValidStates*6; + } + else + { + for(i=0;inumOfValidStates*8; + } + } + else if(total_frequency <=4294967296) + { + uint32_t low, high; + if(numOfRealStates<=256) + { + for(i=0;inumOfValidStates*9; + } + else if(numOfRealStates<=65536) + { + for(i=0;inumOfValidStates*10; + } + else + { + for(i=0;inumOfValidStates*12; + } + } + else + { + uint64_t low, high; + if(numOfRealStates<=256) + { + for(i=0;inumOfValidStates*17; + } + else if(numOfRealStates<=65536) + { + for(i=0;inumOfValidStates*18; + } + else + { + for(i=0;inumOfValidStates*20; + } + } + return outSize; +} + +/** + * Reconstruct AriCoder based on the bytes loaded from compressed data + * @param AriCoder** ariCoder (ourput) + * @param unsigned char* bytes (input) + * + * @return offset + * */ +int unpad_ariCoder(AriCoder** ariCoder, unsigned char* bytes) +{ + int offset = 0; + + *ariCoder = (AriCoder*)malloc(sizeof(AriCoder)); + memset(*ariCoder, 0, sizeof(AriCoder)); + + unsigned char *p = bytes; + int numOfRealStates = (*ariCoder)->numOfRealStates = bytesToInt_bigEndian(p); + p += sizeof(int); + int numOfValidStates = (*ariCoder)->numOfValidStates = bytesToInt_bigEndian(p); + p += sizeof(int); + size_t total_frequency = (*ariCoder)->total_frequency = bytesToInt64_bigEndian(p); + p += sizeof(uint64_t); + + (*ariCoder)->cumulative_frequency = (Prob*)malloc((*ariCoder)->numOfRealStates*sizeof(Prob)); + memset((*ariCoder)->cumulative_frequency, 0, (*ariCoder)->numOfRealStates*sizeof(Prob)); + size_t i = 0; + unsigned char *low_p = NULL, *high_p = NULL, *state_p = NULL; + int state = 0; + if(total_frequency <= 65536) + { + if(numOfRealStates<=256) + { + for(i=0;icumulative_frequency[state].low = bytesToUInt16_bigEndian(low_p); + (*ariCoder)->cumulative_frequency[state].high = bytesToUInt16_bigEndian(high_p); + (*ariCoder)->cumulative_frequency[state].state = state; + + p = state_p + 1; + } + offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*5; //2*sizeof(uint16_t)+1 + } + else if(numOfRealStates<=65536) + { + for(i=0;icumulative_frequency[state].low = bytesToUInt16_bigEndian(low_p); + (*ariCoder)->cumulative_frequency[state].high = bytesToUInt16_bigEndian(high_p); + (*ariCoder)->cumulative_frequency[state].state = state; + + p = state_p + sizeof(uint16_t); + } + offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*6; + } + else + { + for(i=0;icumulative_frequency[state].low = bytesToUInt16_bigEndian(low_p); + (*ariCoder)->cumulative_frequency[state].high = bytesToUInt16_bigEndian(high_p); + (*ariCoder)->cumulative_frequency[state].state = state; + + p = state_p + sizeof(uint32_t); + } + offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*8; + } + } + else if(total_frequency <=4294967296) + { + if(numOfRealStates<=256) + { + for(i=0;icumulative_frequency[state].low = bytesToUInt32_bigEndian(low_p); + (*ariCoder)->cumulative_frequency[state].high = bytesToUInt32_bigEndian(high_p); + (*ariCoder)->cumulative_frequency[state].state = state; + + p = state_p + 1; + } + offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*9; + } + else if(numOfRealStates<=65536) + { + for(i=0;icumulative_frequency[state].low = bytesToUInt32_bigEndian(low_p); + (*ariCoder)->cumulative_frequency[state].high = bytesToUInt32_bigEndian(high_p); + (*ariCoder)->cumulative_frequency[state].state = state; + + p = state_p + sizeof(uint16_t); + } + offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*10; + } + else + { + for(i=0;icumulative_frequency[state].low = bytesToUInt32_bigEndian(low_p); + (*ariCoder)->cumulative_frequency[state].high = bytesToUInt32_bigEndian(high_p); + (*ariCoder)->cumulative_frequency[state].state = state; + + p = state_p + sizeof(uint32_t); + } + offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*12; + } + } + else + { + if(numOfRealStates<=256) + { + for(i=0;icumulative_frequency[state].low = bytesToUInt64_bigEndian(low_p); + (*ariCoder)->cumulative_frequency[state].high = bytesToUInt64_bigEndian(high_p); + (*ariCoder)->cumulative_frequency[state].state = state; + + p = state_p + 1; + } + offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*17; + } + else if(numOfRealStates<=65536) + { + for(i=0;icumulative_frequency[state].low = bytesToUInt64_bigEndian(low_p); + (*ariCoder)->cumulative_frequency[state].high = bytesToUInt64_bigEndian(high_p); + (*ariCoder)->cumulative_frequency[state].state = state; + + p = state_p + sizeof(uint16_t); + } + offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*18; + } + else + { + for(i=0;icumulative_frequency[state].low = bytesToUInt64_bigEndian(low_p); + (*ariCoder)->cumulative_frequency[state].high = bytesToUInt64_bigEndian(high_p); + (*ariCoder)->cumulative_frequency[state].state = state; + + p = state_p + sizeof(uint32_t); + } + offset = 2*sizeof(int)+sizeof(uint64_t)+(*ariCoder)->numOfValidStates*20; + } + } + return offset; +} + +/** + * Arithmetic Encoding + * @param AriCoder *ariCoder (input) + * @param int *s (input) + * @param size_t length (input) + * @param unsigned char *out (output) + * @param size_t *outSize (output) + * + * */ +void ari_encode(AriCoder *ariCoder, int *s, size_t length, unsigned char *out, size_t *outSize) +{ + int pending_bits = 0; + size_t low = 0; + size_t high = MAX_CODE; + size_t i = 0, range = 0; + size_t count = ariCoder->total_frequency; + int c = 0, lackBits = 0; + *outSize = 0; + + unsigned char *outp = out; + + Prob *cumulative_frequency = ariCoder->cumulative_frequency; + unsigned int buf = 0; + + for (i=0;i= ONE_HALF ) + { + buf = output_bit_1_plus_pending(pending_bits); + put_codes_to_output(buf, pending_bits+1, &outp, &lackBits, outSize); + pending_bits = 0; + } + else if ( low >= ONE_FOURTH && high < THREE_FOURTHS ) + { + pending_bits++; + low -= ONE_FOURTH; + high -= ONE_FOURTH; + } else + break; + high <<= 1; + high++; + low <<= 1; + high &= MAX_CODE; + low &= MAX_CODE; + } + } + pending_bits++; + if(low < ONE_FOURTH) + { + buf = output_bit_0_plus_pending(pending_bits); + put_codes_to_output(buf, pending_bits+1, &outp, &lackBits, outSize); + } + else + { + buf = output_bit_1_plus_pending(pending_bits); + put_codes_to_output(buf, pending_bits+1, &outp, &lackBits, outSize); + } +} + +/** + * Get the integer code based on Arithmetic Coding Value + * @param AriCoder *ariCoder (input) + * @param size_t scaled_value (input) + * + * @return Prob* (output) + * + * */ +Prob* getCode(AriCoder *ariCoder, size_t scaled_value) +{ + int numOfRealStates = ariCoder->numOfRealStates; + int i = 0; + Prob *p = ariCoder->cumulative_frequency; + for(i=0;ihigh) + break; + } + return p; +} + +/** + * Get one bit from the input stream of bytes + * @param unsigned char* p (input): the current location to be read (byte) of the byte stream + * @param int offset (input): the offset of the specified byte in the byte stream + * + * @return unsigned char (output) : 1 or 0 + * */ +inline unsigned char get_bit(unsigned char* p, int offset) +{ + return ((*p) >> (7-offset)) & 0x01; +} + +/** + * Arithmetic Decoding algorithm + * @param AriCoder *ariCoder (input): the encoder with the constructed frequency information + * @param unsigned char *s (input): the compressed stream of bytes + * @param size_t s_len (input): the number of bytes in the 'unsigned char *s' + * @param size_t targetLength (input): the target number of elements in the type array + * @param int *out (output) : the result (type array decompressed from the stream 's') + * + * */ +void ari_decode(AriCoder *ariCoder, unsigned char *s, size_t s_len, size_t targetLength, int *out) +{ + size_t high = MAX_CODE; + size_t low = 0, i = 0; + size_t range = 0, scaled_value = 0; + size_t total_frequency = ariCoder->total_frequency; + unsigned char *sp = s+5; + unsigned int offset = 4; + size_t value = (bytesToUInt64_bigEndian(s) >> 20); //alignment with the MAX_CODE + size_t s_counter = sizeof(int); + + for(i=0;itotal_frequency - 1 ) / range; + Prob *p = getCode(ariCoder, scaled_value); + out[i] = p->state; //output the state to the 'out' array + high = low + (range*p->high)/total_frequency -1; + low = low + (range*p->low)/total_frequency; + + for( ; ; ) + { + if (high < ONE_HALF) { + //do nothing, bit is a zero + } else if ( low >= ONE_HALF ) + { + value -= ONE_HALF; //subtract one half from all three code values + low -= ONE_HALF; + high -= ONE_HALF; + } else if ( low >= ONE_FOURTH && high < THREE_FOURTHS ) + { + value -= ONE_FOURTH; + low -= ONE_FOURTH; + high -= ONE_FOURTH; + } else + break; + low <<= 1; + high <<= 1; + high++; + value <<= 1; + //load one bit from the input byte stream + if(s_counter < s_len) + { + value += get_bit(sp, offset++); + if(offset==8) + { + sp++; + s_counter++; + offset = 0; + } + } + } + } +} diff --git a/deps/SZ/sz/src/ByteToolkit.c b/deps/SZ/sz/src/ByteToolkit.c new file mode 100644 index 0000000000000000000000000000000000000000..c3d7ad1eb5ccfcabc6861a0a7e04d48d0098108e --- /dev/null +++ b/deps/SZ/sz/src/ByteToolkit.c @@ -0,0 +1,1063 @@ +/** + * @file ByteToolkit.c + * @author Sheng Di + * @date April, 2016 + * @brief Byte Toolkit + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include "sz.h" +#include "zlib.h" + +inline unsigned short bytesToUInt16_bigEndian(unsigned char* bytes) +{ + int temp = 0; + unsigned short res = 0; + + temp = bytes[0] & 0xff; + res |= temp; + + res <<= 8; + temp = bytes[1] & 0xff; + res |= temp; + + return res; +} + +inline unsigned int bytesToUInt32_bigEndian(unsigned char* bytes) +{ + unsigned int temp = 0; + unsigned int res = 0; + + res <<= 8; + temp = bytes[0] & 0xff; + res |= temp; + + res <<= 8; + temp = bytes[1] & 0xff; + res |= temp; + + res <<= 8; + temp = bytes[2] & 0xff; + res |= temp; + + res <<= 8; + temp = bytes[3] & 0xff; + res |= temp; + + return res; +} + +inline unsigned long bytesToUInt64_bigEndian(unsigned char* b) { + unsigned long temp = 0; + unsigned long res = 0; + + res <<= 8; + temp = b[0] & 0xff; + res |= temp; + + res <<= 8; + temp = b[1] & 0xff; + res |= temp; + + res <<= 8; + temp = b[2] & 0xff; + res |= temp; + + res <<= 8; + temp = b[3] & 0xff; + res |= temp; + + res <<= 8; + temp = b[4] & 0xff; + res |= temp; + + res <<= 8; + temp = b[5] & 0xff; + res |= temp; + + res <<= 8; + temp = b[6] & 0xff; + res |= temp; + + res <<= 8; + temp = b[7] & 0xff; + res |= temp; + + return res; +} + +inline short bytesToInt16_bigEndian(unsigned char* bytes) +{ + int temp = 0; + short res = 0; + + temp = bytes[0] & 0xff; + res |= temp; + + res <<= 8; + temp = bytes[1] & 0xff; + res |= temp; + + return res; +} + +inline int bytesToInt32_bigEndian(unsigned char* bytes) +{ + int temp = 0; + int res = 0; + + res <<= 8; + temp = bytes[0] & 0xff; + res |= temp; + + res <<= 8; + temp = bytes[1] & 0xff; + res |= temp; + + res <<= 8; + temp = bytes[2] & 0xff; + res |= temp; + + res <<= 8; + temp = bytes[3] & 0xff; + res |= temp; + + return res; +} + +inline long bytesToInt64_bigEndian(unsigned char* b) { + long temp = 0; + long res = 0; + + res <<= 8; + temp = b[0] & 0xff; + res |= temp; + + res <<= 8; + temp = b[1] & 0xff; + res |= temp; + + res <<= 8; + temp = b[2] & 0xff; + res |= temp; + + res <<= 8; + temp = b[3] & 0xff; + res |= temp; + + res <<= 8; + temp = b[4] & 0xff; + res |= temp; + + res <<= 8; + temp = b[5] & 0xff; + res |= temp; + + res <<= 8; + temp = b[6] & 0xff; + res |= temp; + + res <<= 8; + temp = b[7] & 0xff; + res |= temp; + + return res; +} + +inline int bytesToInt_bigEndian(unsigned char* bytes) +{ + int temp = 0; + int res = 0; + + res <<= 8; + temp = bytes[0] & 0xff; + res |= temp; + + res <<= 8; + temp = bytes[1] & 0xff; + res |= temp; + + res <<= 8; + temp = bytes[2] & 0xff; + res |= temp; + + res <<= 8; + temp = bytes[3] & 0xff; + res |= temp; + + return res; +} + +/** + * @unsigned char *b the variable to store the converted bytes (length=4) + * @unsigned int num + * */ +inline void intToBytes_bigEndian(unsigned char *b, unsigned int num) +{ + b[0] = (unsigned char)(num >> 24); + b[1] = (unsigned char)(num >> 16); + b[2] = (unsigned char)(num >> 8); + b[3] = (unsigned char)(num); + + //note: num >> xxx already considered endian_type... +//if(dataEndianType==LITTLE_ENDIAN_DATA) +// symTransform_4bytes(*b); //change to BIG_ENDIAN_DATA +} + +inline void int64ToBytes_bigEndian(unsigned char *b, uint64_t num) +{ + b[0] = (unsigned char)(num>>56); + b[1] = (unsigned char)(num>>48); + b[2] = (unsigned char)(num>>40); + b[3] = (unsigned char)(num>>32); + b[4] = (unsigned char)(num>>24); + b[5] = (unsigned char)(num>>16); + b[6] = (unsigned char)(num>>8); + b[7] = (unsigned char)(num); +} + +inline void int32ToBytes_bigEndian(unsigned char *b, uint32_t num) +{ + b[0] = (unsigned char)(num >> 24); + b[1] = (unsigned char)(num >> 16); + b[2] = (unsigned char)(num >> 8); + b[3] = (unsigned char)(num); +} + +inline void int16ToBytes_bigEndian(unsigned char *b, uint16_t num) +{ + b[0] = (unsigned char)(num >> 8); + b[1] = (unsigned char)(num); +} + +/** + * @endianType: refers to the endian_type of unsigned char* b. + * */ +inline long bytesToLong_bigEndian(unsigned char* b) { + long temp = 0; + long res = 0; + + res <<= 8; + temp = b[0] & 0xff; + res |= temp; + + res <<= 8; + temp = b[1] & 0xff; + res |= temp; + + res <<= 8; + temp = b[2] & 0xff; + res |= temp; + + res <<= 8; + temp = b[3] & 0xff; + res |= temp; + + res <<= 8; + temp = b[4] & 0xff; + res |= temp; + + res <<= 8; + temp = b[5] & 0xff; + res |= temp; + + res <<= 8; + temp = b[6] & 0xff; + res |= temp; + + res <<= 8; + temp = b[7] & 0xff; + res |= temp; + + return res; +} + +inline void longToBytes_bigEndian(unsigned char *b, unsigned long num) +{ + b[0] = (unsigned char)(num>>56); + b[1] = (unsigned char)(num>>48); + b[2] = (unsigned char)(num>>40); + b[3] = (unsigned char)(num>>32); + b[4] = (unsigned char)(num>>24); + b[5] = (unsigned char)(num>>16); + b[6] = (unsigned char)(num>>8); + b[7] = (unsigned char)(num); +// if(dataEndianType==LITTLE_ENDIAN_DATA) +// symTransform_8bytes(*b); +} + + +inline long doubleToOSEndianLong(double value) +{ + ldouble buf; + buf.value = value; + return buf.lvalue; +} + +inline int floatToOSEndianInt(float value) +{ + lfloat buf; + buf.value = value; + return buf.ivalue; +} + +//TODO: debug: lfBuf.lvalue could be actually little_endian.... +inline short getExponent_float(float value) +{ + //int ivalue = floatToBigEndianInt(value); + + lfloat lbuf; + lbuf.value = value; + int ivalue = lbuf.ivalue; + + int expValue = (ivalue & 0x7F800000) >> 23; + expValue -= 127; + return (short)expValue; +} + +inline short getPrecisionReqLength_float(float precision) +{ + lfloat lbuf; + lbuf.value = precision; + int ivalue = lbuf.ivalue; + + int expValue = (ivalue & 0x7F800000) >> 23; + expValue -= 127; +// unsigned char the1stManBit = (unsigned char)((ivalue & 0x00400000) >> 22); +// if(the1stManBit==1) +// expValue--; + return (short)expValue; +} + +inline short getExponent_double(double value) +{ + //long lvalue = doubleToBigEndianLong(value); + + ldouble lbuf; + lbuf.value = value; + long lvalue = lbuf.lvalue; + + int expValue = (int)((lvalue & 0x7FF0000000000000) >> 52); + expValue -= 1023; + return (short)expValue; +} + +inline short getPrecisionReqLength_double(double precision) +{ + ldouble lbuf; + lbuf.value = precision; + long lvalue = lbuf.lvalue; + + int expValue = (int)((lvalue & 0x7FF0000000000000) >> 52); + expValue -= 1023; +// unsigned char the1stManBit = (unsigned char)((lvalue & 0x0008000000000000) >> 51); +// if(the1stManBit==1) +// expValue--; + return (short)expValue; +} + +unsigned char numberOfLeadingZeros_Int(int i) { + if (i == 0) + return 32; + unsigned char n = 1; + if (((unsigned int)i) >> 16 == 0) { n += 16; i <<= 16; } + if (((unsigned int)i) >> 24 == 0) { n += 8; i <<= 8; } + if (((unsigned int)i) >> 28 == 0) { n += 4; i <<= 4; } + if (((unsigned int)i) >> 30 == 0) { n += 2; i <<= 2; } + n -= ((unsigned int)i) >> 31; + return n; +} + +unsigned char numberOfLeadingZeros_Long(long i) { + if (i == 0) + return 64; + unsigned char n = 1; + int x = (int)(((unsigned long)i) >> 32); + if (x == 0) { n += 32; x = (int)i; } + if (((unsigned int)x) >> 16 == 0) { n += 16; x <<= 16; } + if (((unsigned int)x) >> 24 == 0) { n += 8; x <<= 8; } + if (((unsigned int)x) >> 28 == 0) { n += 4; x <<= 4; } + if (((unsigned int)x) >> 30 == 0) { n += 2; x <<= 2; } + n -= ((unsigned int)x) >> 31; + return n; +} + +unsigned char getLeadingNumbers_Int(int v1, int v2) +{ + int v = v1 ^ v2; + return (unsigned char)numberOfLeadingZeros_Int(v); +} + +unsigned char getLeadingNumbers_Long(long v1, long v2) +{ + long v = v1 ^ v2; + return (unsigned char)numberOfLeadingZeros_Long(v); +} + +/** + * By default, the endian type is OS endian type. + * */ +short bytesToShort(unsigned char* bytes) +{ + lint16 buf; + memcpy(buf.byte, bytes, 2); + + return buf.svalue; +} + +void shortToBytes(unsigned char* b, short value) +{ + lint16 buf; + buf.svalue = value; + memcpy(b, buf.byte, 2); +} + +int bytesToInt(unsigned char* bytes) +{ + lfloat buf; + memcpy(buf.byte, bytes, 4); + return buf.ivalue; +} + +long bytesToLong(unsigned char* bytes) +{ + ldouble buf; + memcpy(buf.byte, bytes, 8); + return buf.lvalue; +} + +//the byte to input is in the big-endian format +inline float bytesToFloat(unsigned char* bytes) +{ + lfloat buf; + memcpy(buf.byte, bytes, 4); + if(sysEndianType==LITTLE_ENDIAN_SYSTEM) + symTransform_4bytes(buf.byte); + return buf.value; +} + +inline void floatToBytes(unsigned char *b, float num) +{ + lfloat buf; + buf.value = num; + memcpy(b, buf.byte, 4); + if(sysEndianType==LITTLE_ENDIAN_SYSTEM) + symTransform_4bytes(b); +} + +//the byte to input is in the big-endian format +inline double bytesToDouble(unsigned char* bytes) +{ + ldouble buf; + memcpy(buf.byte, bytes, 8); + if(sysEndianType==LITTLE_ENDIAN_SYSTEM) + symTransform_8bytes(buf.byte); + return buf.value; +} + +inline void doubleToBytes(unsigned char *b, double num) +{ + ldouble buf; + buf.value = num; + memcpy(b, buf.byte, 8); + if(sysEndianType==LITTLE_ENDIAN_SYSTEM) + symTransform_8bytes(b); +} + +int extractBytes(unsigned char* byteArray, size_t k, int validLength) +{ + size_t outIndex = k/8; + int innerIndex = k%8; + unsigned char intBytes[4]; + int length = innerIndex + validLength; + int byteNum = 0; + if(length%8==0) + byteNum = length/8; + else + byteNum = length/8+1; + + int i; + for(i = 0;iSZ_SIZE_TYPE-byteNum+i] = byteArray[outIndex+i]; + int result = bytesToInt_bigEndian(intBytes); + int rightMovSteps = innerIndex +(8 - (innerIndex+validLength)%8)%8; + result = result << innerIndex; + switch(byteNum) + { + case 1: + result = result & 0xff; + break; + case 2: + result = result & 0xffff; + break; + case 3: + result = result & 0xffffff; + break; + case 4: + break; + default: + printf("Error: other cases are impossible...\n"); + exit(0); + } + result = result >> rightMovSteps; + + return result; +} + +inline int getMaskRightCode(int m) { + switch (m) { + case 1: + return 0x01; + case 2: + return 0x03; + case 3: + return 0x07; + case 4: + return 0x0F; + case 5: + return 0x1F; + case 6: + return 0x3F; + case 7: + return 0X7F; + case 8: + return 0XFF; + default: + return 0; + } +} + +inline int getLeftMovingCode(int kMod8) +{ + return getMaskRightCode(8 - kMod8); +} + +inline int getRightMovingSteps(int kMod8, int resiBitLength) { + return 8 - kMod8 - resiBitLength; +} + +inline int getRightMovingCode(int kMod8, int resiBitLength) +{ + int rightMovingSteps = 8 - kMod8 - resiBitLength; + if(rightMovingSteps < 0) + { + switch(-rightMovingSteps) + { + case 1: + return 0x80; + case 2: + return 0xC0; + case 3: + return 0xE0; + case 4: + return 0xF0; + case 5: + return 0xF8; + case 6: + return 0xFC; + case 7: + return 0XFE; + default: + return 0; + } + } + else //if(rightMovingSteps >= 0) + { + int a = getMaskRightCode(8 - kMod8); + int b = getMaskRightCode(8 - kMod8 - resiBitLength); + int c = a - b; + return c; + } +} + +short* convertByteDataToShortArray(unsigned char* bytes, size_t byteLength) +{ + lint16 ls; + size_t i, stateLength = byteLength/2; + short* states = (short*)malloc(stateLength*sizeof(short)); + if(sysEndianType==dataEndianType) + { + for(i=0;iSZ_SIZE_TYPE==4) + result = bytesToInt_bigEndian(bytes);//4 + else + result = bytesToLong_bigEndian(bytes);//8 + return result; +} + +inline void sizeToBytes(unsigned char* outBytes, size_t size) +{ + if(exe_params->SZ_SIZE_TYPE==4) + intToBytes_bigEndian(outBytes, size);//4 + else + longToBytes_bigEndian(outBytes, size);//8 +} + +/** + * put 'buf_nbBits' bits represented by buf into a long byte stream (the current output byte pointer is p, where offset is the number of bits already filled out for this byte so far) + * */ +void put_codes_to_output(unsigned int buf, int bitSize, unsigned char** p, int* lackBits, size_t *outSize) +{ + int byteSize, byteSizep; + if(*lackBits == 0) + { + byteSize = bitSize%8==0 ? bitSize/8 : bitSize/8+1; //it's equal to the number of bytes involved (for *outSize) + byteSizep = bitSize >> 3; //it's used to move the pointer p for next data + intToBytes_bigEndian(*p, buf); + (*p) += byteSizep; + *outSize += byteSize; + (*lackBits) = bitSize%8==0 ? 0 : 8 - bitSize%8; + } + else + { + **p = (**p) | (unsigned char)(buf >> (32 - *lackBits)); + if((*lackBits) < bitSize) + { + (*p)++; + int newCode = buf << (*lackBits); + intToBytes_bigEndian(*p, newCode); + bitSize -= *lackBits; + byteSizep = bitSize >> 3; // =bitSize/8 + byteSize = bitSize%8==0 ? byteSizep : byteSizep+1; + *p += byteSizep; + (*outSize)+=byteSize; + (*lackBits) = bitSize%8==0 ? 0 : 8 - bitSize%8; + } + else + { + (*lackBits) -= bitSize; + if(*lackBits==0) + (*p)++; + } + } +} + +void convertSZParamsToBytes(sz_params* params, unsigned char* result) +{ + //unsigned char* result = (unsigned char*)malloc(16); + unsigned char buf = 0; + //flag1: exe_params->optQuantMode(1bit), dataEndianType(1bit), sysEndianType(1bit), conf_params->szMode (1bit), conf_params->gzipMode (2bits), pwrType (2bits) + buf = exe_params->optQuantMode; + buf = (buf << 1) | dataEndianType; + buf = (buf << 1) | sysEndianType; + buf = (buf << 2) | params->szMode; + + int tmp = 0; + switch(params->gzipMode) + { + case Z_BEST_SPEED: + tmp = 0; + break; + case Z_DEFAULT_STRATEGY: + tmp = 1; + break; + case Z_BEST_COMPRESSION: + tmp = 2; + break; + } + buf = (buf << 2) | tmp; + //buf = (buf << 2) | params->pwr_type; //deprecated + result[0] = buf; + + //sampleDistance; //2 bytes + int16ToBytes_bigEndian(&result[1], params->sampleDistance); + + //conf_params->predThreshold; // 2 bytes + short tmp2 = params->predThreshold * 10000; + int16ToBytes_bigEndian(&result[3], tmp2); + + //errorBoundMode; //4bits(0.5 byte) + result[5] = params->errorBoundMode; + + //data type (float, double, int8, int16, ....) //10 choices, so 4 bits + result[5] = (result[5] << 4) | (params->dataType & 0x17); + + //result[5]: abs_err_bound or psnr //4 bytes + //result[9]: rel_bound_ratio or pwr_err_bound//4 bytes + switch(params->errorBoundMode) + { + case ABS: + floatToBytes(&result[6], (float)(params->absErrBound)); //big_endian + memset(&result[10], 0, 4); + break; + case REL: + memset(&result[6], 0, 4); + floatToBytes(&result[10], (float)(params->relBoundRatio)); //big_endian + break; + case ABS_AND_REL: + case ABS_OR_REL: + floatToBytes(&result[6], (float)(params->absErrBound)); + floatToBytes(&result[10], (float)(params->relBoundRatio)); //big_endian + break; + case PSNR: + floatToBytes(&result[6], (float)(params->psnr)); + memset(&result[9], 0, 4); + break; + case ABS_AND_PW_REL: + case ABS_OR_PW_REL: + floatToBytes(&result[6], (float)(params->absErrBound)); + floatToBytes(&result[10], (float)(params->pw_relBoundRatio)); //big_endian + break; + case REL_AND_PW_REL: + case REL_OR_PW_REL: + floatToBytes(&result[6], (float)(params->relBoundRatio)); + floatToBytes(&result[10], (float)(params->pw_relBoundRatio)); //big_endian + break; + case PW_REL: + memset(&result[6], 0, 4); + floatToBytes(&result[10], (float)(params->pw_relBoundRatio)); //big_endian + break; + } + + //compressor + result[14] = (unsigned char)params->sol_ID; + + //int16ToBytes_bigEndian(&result[14], (short)(params->segment_size)); + + if(exe_params->optQuantMode==1) + int32ToBytes_bigEndian(&result[16], params->max_quant_intervals); + else + int32ToBytes_bigEndian(&result[16], params->quantization_intervals); + + if(params->dataType==SZ_FLOAT) + { + floatToBytes(&result[20], params->fmin); + floatToBytes(&result[24], params->fmax); + } + else + { + doubleToBytes(&result[20], params->dmin); + doubleToBytes(&result[28], params->dmax); + } + +} + +void convertBytesToSZParams(unsigned char* bytes, sz_params* params) +{ + unsigned char flag1 = bytes[0]; + exe_params->optQuantMode = (flag1 & 0x40) >> 6; + dataEndianType = (flag1 & 0x20) >> 5; + //sysEndianType = (flag1 & 0x10) >> 4; + + params->szMode = (flag1 & 0x0c) >> 2; + + int tmp = (flag1 & 0x03); + switch(tmp) + { + case 0: + params->gzipMode = Z_BEST_SPEED; + break; + case 1: + params->gzipMode = Z_DEFAULT_STRATEGY; + break; + case 2: + params->gzipMode = Z_BEST_COMPRESSION; + break; + } + + //params->pwr_type = (flag1 & 0x03) >> 0; + + params->sampleDistance = bytesToInt16_bigEndian(&bytes[1]); + + params->predThreshold = 1.0*bytesToInt16_bigEndian(&bytes[3])/10000.0; + + params->dataType = bytes[5] & 0x07; + + params->errorBoundMode = (bytes[5] & 0xf0) >> 4; + + switch(params->errorBoundMode) + { + case ABS: + params->absErrBound = bytesToFloat(&bytes[6]); + break; + case REL: + params->relBoundRatio = bytesToFloat(&bytes[10]); + break; + case ABS_AND_REL: + case ABS_OR_REL: + params->absErrBound = bytesToFloat(&bytes[6]); + params->relBoundRatio = bytesToFloat(&bytes[10]); + break; + case PSNR: + params->psnr = bytesToFloat(&bytes[6]); + break; + case ABS_AND_PW_REL: + case ABS_OR_PW_REL: + params->absErrBound = bytesToFloat(&bytes[6]); + params->pw_relBoundRatio = bytesToFloat(&bytes[10]); + break; + case REL_AND_PW_REL: + case REL_OR_PW_REL: + params->relBoundRatio = bytesToFloat(&bytes[6]); + params->pw_relBoundRatio = bytesToFloat(&bytes[10]); + break; + case PW_REL: + params->pw_relBoundRatio = bytesToFloat(&bytes[10]); + } + + //segment_size // 2 bytes + //params->segment_size = bytesToInt16_bigEndian(&bytes[14]); + params->sol_ID = (int)(bytes[14]); + + if(exe_params->optQuantMode==1) + { + params->max_quant_intervals = bytesToInt32_bigEndian(&bytes[16]); + params->quantization_intervals = 0; + } + else + { + params->max_quant_intervals = 0; + params->quantization_intervals = bytesToInt32_bigEndian(&bytes[16]); + } + + if(params->dataType==SZ_FLOAT) + { + params->fmin = bytesToFloat(&bytes[20]); + params->fmax = bytesToFloat(&bytes[24]); + } + else if(params->dataType==SZ_DOUBLE) + { + params->dmin = bytesToDouble(&bytes[20]); + params->dmax = bytesToDouble(&bytes[28]); + } + +} diff --git a/deps/SZ/sz/src/CacheTable.c b/deps/SZ/sz/src/CacheTable.c new file mode 100644 index 0000000000000000000000000000000000000000..296be0f8face9767fd1848051a96da688669d6fa --- /dev/null +++ b/deps/SZ/sz/src/CacheTable.c @@ -0,0 +1,100 @@ +/** + * @file CacheTable.c + * @author Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang, Sheng Di, Dingwen Tao + * @date Jan, 2019 + * @brief Cache Table + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include "CacheTable.h" + +double* g_CacheTable; +uint32_t * g_InverseTable; +uint32_t baseIndex; +uint32_t topIndex; +int bits; + +inline int doubleGetExpo(double d){ + long* ptr = (long*)&d; + *ptr = ((*ptr) >> 52) - 1023; + return *ptr; +} + +int CacheTableGetRequiredBits(double precision, int quantization_intervals){ + double min_distance = pow((1+precision), -(quantization_intervals>>1)) * precision; + return -(doubleGetExpo(min_distance)); +} + +inline uint32_t CacheTableGetIndex(float value, int bits){ + uint32_t* ptr = (uint32_t*)&value; + int shift = 32 - 9 - bits; + if(shift>0){ + return (*ptr) >> shift; + }else{ + return 0; + } +} + +inline uint64_t CacheTableGetIndexDouble(double value, int bits){ + uint64_t* ptr = (uint64_t*)&value; + int shift = 64 - 12 - bits; + if(shift>0){ + return (*ptr) >> shift; + }else{ + return 0; + } +} + +inline int CacheTableIsInBoundary(uint32_t index){ + if(index <= topIndex && index > baseIndex){ + return 1; + }else{ + return 0; + } +} + +void CacheTableBuild(double * table, int count, double smallest, double largest, double precision, int quantization_intervals){ + bits = CacheTableGetRequiredBits(precision, quantization_intervals); + baseIndex = CacheTableGetIndex((float)smallest, bits)+1; + topIndex = CacheTableGetIndex((float)largest, bits); + uint32_t range = topIndex - baseIndex + 1; + g_InverseTable = (uint32_t *)malloc(sizeof(uint32_t) * range); + + /* + uint32_t fillInPos = 0; + for(int i=0; i fillInPos){ + for(int j=fillInPos; j0; i--){ + uint32_t upperIndex = CacheTableGetIndex((float)table[i]*(1+precision), bits); + uint32_t lowerIndex = CacheTableGetIndex((float)table[i]/(1+precision), bits); + for(uint32_t j = lowerIndex; j<=upperIndex; j++){ + if(jtopIndex){ + continue; + } + g_InverseTable[j-baseIndex] = i; + } + } + +} + +inline uint32_t CacheTableFind(uint32_t index){ + return g_InverseTable[index-baseIndex]; +} + +void CacheTableFree(){ + free(g_InverseTable); +} diff --git a/deps/SZ/sz/src/CompressElement.c b/deps/SZ/sz/src/CompressElement.c new file mode 100644 index 0000000000000000000000000000000000000000..0937300c1d54a8215aea3b5585ea102fbf6f3bfc --- /dev/null +++ b/deps/SZ/sz/src/CompressElement.c @@ -0,0 +1,255 @@ +/** + * @file CompressElement.c + * @author Sheng Di + * @date May, 2016 + * @brief Functions of CompressElement + * (C) 2015 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wchar-subscripts" + +#include +#include +#include +#include +#include + +char* decompressGroupIDArray(unsigned char* bytes, size_t dataLength) +{ + HuffmanTree* huffmanTree = SZ_Reset(); //create a default huffman tree + int* standGroupID = (int*)malloc(dataLength*sizeof(int)); + decode_withTree(huffmanTree, bytes, dataLength, standGroupID); + SZ_ReleaseHuffman(huffmanTree); + + char* groupID = (char*)malloc(dataLength*sizeof(char)); + size_t i = 0; + int lastGroupIDValue = 0, curStandIDValue = 0, curGroupIDValue = 0; + int offset = 2*(GROUP_COUNT + 2); + + curGroupIDValue = groupID[0] = standGroupID[0] - GROUP_COUNT; + lastGroupIDValue = curGroupIDValue; + for(i=1;i=0) + { + if(flags[groupNum]==0) + flags[groupNum] = 1; + groups[groupNum] = decValue; + } + else + { + groups[0] = decValue; + flags[0] = 1; + } + + if(oriValue>=0) + *curGroupID = groupNum+2; //+[-1,0,1,2,3,....,16] is mapped to [1,2,....,18] + else + *curGroupID = -(groupNum+2); //-[-1,0,1,2,3,....,16] is mapped to [-1,-2,....,-18] +} + +inline void listAdd_double_group(double *groups, int *flags, char groupNum, double oriValue, double decValue, char* curGroupID) +{ + if(groupNum>=0) + { + if(flags[groupNum]==0) + flags[groupNum] = 1; + groups[groupNum] = decValue; + } + else + { + groups[0] = decValue; + flags[0] = 1; + } + + if(oriValue>=0) + *curGroupID = groupNum+2; //+[-1,0,1,2,3,....,16] is mapped to [1,2,....,18] + else + *curGroupID = -(groupNum+2); //-[-1,0,1,2,3,....,16] is mapped to [-1,-2,....,-18] +} + +/** + * Determine whether the prediction value minErr is valid. + * + * */ +inline int validPrediction_double(double minErr, double precision) +{ + if(minErr<=precision) + return 1; + else + return 0; +} + +inline int validPrediction_float(float minErr, float precision) +{ + if(minErr<=precision) + return 1; + else + return 0; +} + +double* generateGroupErrBounds(int errorBoundMode, double realPrecision, double pwrErrBound) +{ + double pwrError; + double* result = (double*)malloc(GROUP_COUNT*sizeof(double)); + int i = 0; + for(i=0;ileadingZeroBytes = leadingNum; //0,1,2,or 3 + memcpy(lce->integerMidBytes,intMidBytes,intMidBytes_Length); + lce->integerMidBytes_Length = intMidBytes_Length; //they are mid_bits actually + lce->resMidBitsLength = resiMidBitsLength; + lce->residualMidBits = resiBits; +} + +void updateLossyCompElement_Double(unsigned char* curBytes, unsigned char* preBytes, + int reqBytesLength, int resiBitsLength, LossyCompressionElement *lce) +{ + int resiIndex, intMidBytes_Length = 0; + int leadingNum = compIdenticalLeadingBytesCount_double(preBytes, curBytes); //in fact, float is enough for both single-precision and double-precisiond ata. + int fromByteIndex = leadingNum; + int toByteIndex = reqBytesLength; //later on: should use "< toByteIndex" to tarverse.... + if(fromByteIndex < toByteIndex) + { + intMidBytes_Length = reqBytesLength - leadingNum; + memcpy(lce->integerMidBytes, &(curBytes[fromByteIndex]), intMidBytes_Length); + } + int resiBits = 0; + if(resiBitsLength!=0) + { + resiIndex = reqBytesLength; + if(resiIndex < 8) + resiBits = (curBytes[resiIndex] & 0xFF) >> (8-resiBitsLength); + } + lce->leadingZeroBytes = leadingNum; + lce->integerMidBytes_Length = intMidBytes_Length; + lce->resMidBitsLength = resiBitsLength; + lce->residualMidBits = resiBits; +} + +inline void updateLossyCompElement_Float(unsigned char* curBytes, unsigned char* preBytes, + int reqBytesLength, int resiBitsLength, LossyCompressionElement *lce) +{ + int resiIndex, intMidBytes_Length = 0; + int leadingNum = compIdenticalLeadingBytesCount_float(preBytes, curBytes); //in fact, float is enough for both single-precision and double-precisiond ata. + int fromByteIndex = leadingNum; + int toByteIndex = reqBytesLength; //later on: should use "< toByteIndex" to tarverse.... + if(fromByteIndex < toByteIndex) + { + intMidBytes_Length = reqBytesLength - leadingNum; + memcpy(lce->integerMidBytes, &(curBytes[fromByteIndex]), intMidBytes_Length); + } + int resiBits = 0; + if(resiBitsLength!=0) + { + resiIndex = reqBytesLength; + if(resiIndex < 8) + resiBits = (curBytes[resiIndex] & 0xFF) >> (8-resiBitsLength); + } + lce->leadingZeroBytes = leadingNum; + lce->integerMidBytes_Length = intMidBytes_Length; + lce->resMidBitsLength = resiBitsLength; + lce->residualMidBits = resiBits; +} + +#pragma GCC diagnostic pop diff --git a/deps/SZ/sz/src/DynamicByteArray.c b/deps/SZ/sz/src/DynamicByteArray.c new file mode 100644 index 0000000000000000000000000000000000000000..64b7d5c7b4a59bd7682b912ce0eca0bdc5c50241 --- /dev/null +++ b/deps/SZ/sz/src/DynamicByteArray.c @@ -0,0 +1,68 @@ +/** + * @file DynamicByteArray.c + * @author Sheng Di + * @date May, 2016 + * @brief Dynamic Byte Array + * (C) 2015 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include "DynamicByteArray.h" + +void new_DBA(DynamicByteArray **dba, size_t cap) { + *dba = (DynamicByteArray *)malloc(sizeof(DynamicByteArray)); + (*dba)->size = 0; + (*dba)->capacity = cap; + (*dba)->array = (unsigned char*)malloc(sizeof(unsigned char)*cap); + } + +void convertDBAtoBytes(DynamicByteArray *dba, unsigned char** bytes) +{ + size_t size = dba->size; + if(size>0) + *bytes = (unsigned char*)malloc(size * sizeof(unsigned char)); + else + *bytes = NULL; + memcpy(*bytes, dba->array, size*sizeof(unsigned char)); +} + +void free_DBA(DynamicByteArray *dba) +{ + free(dba->array); + free(dba); +} + +inline unsigned char getDBA_Data(DynamicByteArray *dba, size_t pos) +{ + if(pos>=dba->size) + { + printf("Error: wrong position of DBA (impossible case unless bugs elsewhere in the code?).\n"); + exit(0); + } + return dba->array[pos]; +} + +inline void addDBA_Data(DynamicByteArray *dba, unsigned char value) +{ + if(dba->size==dba->capacity) + { + dba->capacity = dba->capacity << 1; + dba->array = (unsigned char *)realloc(dba->array, dba->capacity*sizeof(unsigned char)); + } + dba->array[dba->size] = value; + dba->size ++; +} + +inline void memcpyDBA_Data(DynamicByteArray *dba, unsigned char* data, size_t length) +{ + if(dba->size + length > dba->capacity) + { + dba->capacity = dba->size + length; + dba->array = (unsigned char *)realloc(dba->array, dba->capacity*sizeof(unsigned char)); + } + memcpy(&(dba->array[dba->size]), data, length); + dba->size += length; +} diff --git a/deps/SZ/sz/src/DynamicDoubleArray.c b/deps/SZ/sz/src/DynamicDoubleArray.c new file mode 100644 index 0000000000000000000000000000000000000000..54bbb109aaa500e6412357f5504e1616e76ed03f --- /dev/null +++ b/deps/SZ/sz/src/DynamicDoubleArray.c @@ -0,0 +1,57 @@ +/** + * @file DynamicFloatArray.c + * @author Sheng Di + * @date May, 2016 + * @brief Dynamic Float Array + * (C) 2015 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include "DynamicDoubleArray.h" + +void new_DDA(DynamicDoubleArray **dda, size_t cap) { + *dda = (DynamicDoubleArray *)malloc(sizeof(DynamicDoubleArray)); + (*dda)->size = 0; + (*dda)->capacity = cap; + (*dda)->array = (double*)malloc(sizeof(double)*cap); + } + +void convertDDAtoDoubles(DynamicDoubleArray *dba, double **data) +{ + size_t size = dba->size; + if(size>0) + *data = (double*)malloc(size * sizeof(double)); + else + *data = NULL; + memcpy(*data, dba->array, size*sizeof(double)); +} + +void free_DDA(DynamicDoubleArray *dda) +{ + free(dda->array); + free(dda); +} + +double getDDA_Data(DynamicDoubleArray *dda, size_t pos) +{ + if(pos>=dda->size) + { + printf("Error: wrong position of DIA.\n"); + exit(0); + } + return dda->array[pos]; +} + +void addDDA_Data(DynamicDoubleArray *dda, double value) +{ + if(dda->size==dda->capacity) + { + dda->capacity *= 2; + dda->array = (double *)realloc(dda->array, dda->capacity*sizeof(double)); + } + dda->array[dda->size] = value; + dda->size ++; +} diff --git a/deps/SZ/sz/src/DynamicFloatArray.c b/deps/SZ/sz/src/DynamicFloatArray.c new file mode 100644 index 0000000000000000000000000000000000000000..1a80a4888f79998b706d318fadd79485a3f19ca4 --- /dev/null +++ b/deps/SZ/sz/src/DynamicFloatArray.c @@ -0,0 +1,57 @@ +/** + * @file DynamicFloatArray.c + * @author Sheng Di + * @date May, 2016 + * @brief Dynamic Float Array + * (C) 2015 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include "DynamicFloatArray.h" + +void new_DFA(DynamicFloatArray **dfa, size_t cap) { + *dfa = (DynamicFloatArray *)malloc(sizeof(DynamicFloatArray)); + (*dfa)->size = 0; + (*dfa)->capacity = cap; + (*dfa)->array = (float*)malloc(sizeof(float)*cap); + } + +void convertDFAtoFloats(DynamicFloatArray *dfa, float **data) +{ + size_t size = dfa->size; + if(size>0) + *data = (float*)malloc(size * sizeof(float)); + else + *data = NULL; + memcpy(*data, dfa->array, size*sizeof(float)); +} + +void free_DFA(DynamicFloatArray *dfa) +{ + free(dfa->array); + free(dfa); +} + +float getDFA_Data(DynamicFloatArray *dfa, size_t pos) +{ + if(pos>=dfa->size) + { + printf("Error: wrong position of DIA.\n"); + exit(0); + } + return dfa->array[pos]; +} + +void addDFA_Data(DynamicFloatArray *dfa, float value) +{ + if(dfa->size==dfa->capacity) + { + dfa->capacity *= 2; + dfa->array = (float *)realloc(dfa->array, dfa->capacity*sizeof(float)); + } + dfa->array[dfa->size] = value; + dfa->size++; +} diff --git a/deps/SZ/sz/src/DynamicIntArray.c b/deps/SZ/sz/src/DynamicIntArray.c new file mode 100644 index 0000000000000000000000000000000000000000..347e3a18080b53b0ce10890728f61262ddeee1b2 --- /dev/null +++ b/deps/SZ/sz/src/DynamicIntArray.c @@ -0,0 +1,57 @@ +/** + * @file DynamicIntArray.c + * @author Sheng Di + * @date May, 2016 + * @brief Dynamic Int Array + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include "DynamicIntArray.h" + +void new_DIA(DynamicIntArray **dia, size_t cap) { + *dia = (DynamicIntArray *)malloc(sizeof(DynamicIntArray)); + (*dia)->size = 0; + (*dia)->capacity = cap; + (*dia)->array = (unsigned char*)malloc(sizeof(unsigned char)*cap); + } + +void convertDIAtoInts(DynamicIntArray *dia, unsigned char **data) +{ + size_t size = dia->size; + if(size>0) + *data = (unsigned char*)malloc(size * sizeof(char)); + else + *data = NULL; + memcpy(*data, dia->array, size*sizeof(unsigned char)); +} + +void free_DIA(DynamicIntArray *dia) +{ + free(dia->array); + free(dia); +} + +int getDIA_Data(DynamicIntArray *dia, size_t pos) +{ + if(pos>=dia->size) + { + printf("Error: wrong position of DIA.\n"); + exit(0); + } + return dia->array[pos]; +} + +inline void addDIA_Data(DynamicIntArray *dia, int value) +{ + if(dia->size==dia->capacity) + { + dia->capacity = dia->capacity << 1; + dia->array = (unsigned char *)realloc(dia->array, dia->capacity*sizeof(unsigned char)); + } + dia->array[dia->size] = (unsigned char)value; + dia->size ++; +} diff --git a/deps/SZ/sz/src/Huffman.c b/deps/SZ/sz/src/Huffman.c new file mode 100644 index 0000000000000000000000000000000000000000..f0f95134a1b0c2428a60860016e4767327562035 --- /dev/null +++ b/deps/SZ/sz/src/Huffman.c @@ -0,0 +1,932 @@ +/** + * @file Huffman.c + * @author Sheng Di + * @date Aug., 2016 + * @brief Customized Huffman Encoding, Compression and Decompression functions + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include "Huffman.h" +#include "sz.h" + + +HuffmanTree* createHuffmanTree(int stateNum) +{ + HuffmanTree *huffmanTree = (HuffmanTree*)malloc(sizeof(HuffmanTree)); + memset(huffmanTree, 0, sizeof(HuffmanTree)); + huffmanTree->stateNum = stateNum; + huffmanTree->allNodes = 2*stateNum; + + huffmanTree->pool = (struct node_t*)malloc(huffmanTree->allNodes*2*sizeof(struct node_t)); + huffmanTree->qqq = (node*)malloc(huffmanTree->allNodes*2*sizeof(node)); + huffmanTree->code = (unsigned long**)malloc(huffmanTree->stateNum*sizeof(unsigned long*)); + huffmanTree->cout = (unsigned char *)malloc(huffmanTree->stateNum*sizeof(unsigned char)); + + memset(huffmanTree->pool, 0, huffmanTree->allNodes*2*sizeof(struct node_t)); + memset(huffmanTree->qqq, 0, huffmanTree->allNodes*2*sizeof(node)); + memset(huffmanTree->code, 0, huffmanTree->stateNum*sizeof(unsigned long*)); + memset(huffmanTree->cout, 0, huffmanTree->stateNum*sizeof(unsigned char)); + huffmanTree->qq = huffmanTree->qqq - 1; + huffmanTree->n_nodes = 0; + huffmanTree->n_inode = 0; + huffmanTree->qend = 1; + + return huffmanTree; +} + +HuffmanTree* createDefaultHuffmanTree() +{ + int maxRangeRadius = 32768; + int stateNum = maxRangeRadius << 1; //*2 + + return createHuffmanTree(stateNum); +} + +node new_node(HuffmanTree* huffmanTree, size_t freq, unsigned int c, node a, node b) +{ + node n = huffmanTree->pool + huffmanTree->n_nodes++; + if (freq) + { + n->c = c; + n->freq = freq; + n->t = 1; + } + else { + n->left = a; + n->right = b; + n->freq = a->freq + b->freq; + n->t = 0; + //n->c = 0; + } + return n; +} + +node new_node2(HuffmanTree *huffmanTree, unsigned int c, unsigned char t) +{ + huffmanTree->pool[huffmanTree->n_nodes].c = c; + huffmanTree->pool[huffmanTree->n_nodes].t = t; + return huffmanTree->pool + huffmanTree->n_nodes++; +} + +/* priority queue */ +void qinsert(HuffmanTree *huffmanTree, node n) +{ + int j, i = huffmanTree->qend++; + while ((j = (i>>1))) //j=i/2 + { + if (huffmanTree->qq[j]->freq <= n->freq) break; + huffmanTree->qq[i] = huffmanTree->qq[j], i = j; + } + huffmanTree->qq[i] = n; +} + +node qremove(HuffmanTree* huffmanTree) +{ + int i, l; + node n = huffmanTree->qq[i = 1]; + node p; + if (huffmanTree->qend < 2) return 0; + huffmanTree->qend --; + huffmanTree->qq[i] = huffmanTree->qq[huffmanTree->qend]; + + while ((l = (i<<1)) < huffmanTree->qend) //l=(i*2) + { + if (l + 1 < huffmanTree->qend && huffmanTree->qq[l + 1]->freq < huffmanTree->qq[l]->freq) l++; + if(huffmanTree->qq[i]->freq > huffmanTree->qq[l]->freq) + { + p = huffmanTree->qq[i]; + huffmanTree->qq[i] = huffmanTree->qq[l]; + huffmanTree->qq[l] = p; + i = l; + } + else + { + break; + } + + } + + return n; +} + +/* walk the tree and put 0s and 1s */ +/** + * @out1 should be set to 0. + * @out2 should be 0 as well. + * @index: the index of the byte + * */ +void build_code(HuffmanTree *huffmanTree, node n, int len, unsigned long out1, unsigned long out2) +{ + if (n->t) { + huffmanTree->code[n->c] = (unsigned long*)malloc(2*sizeof(unsigned long)); + if(len<=64) + { + (huffmanTree->code[n->c])[0] = out1 << (64 - len); + (huffmanTree->code[n->c])[1] = out2; + } + else + { + (huffmanTree->code[n->c])[0] = out1; + (huffmanTree->code[n->c])[1] = out2 << (128 - len); + } + huffmanTree->cout[n->c] = (unsigned char)len; + return; + } + int index = len >> 6; //=len/64 + if(index == 0) + { + out1 = out1 << 1; + out1 = out1 | 0; + build_code(huffmanTree, n->left, len + 1, out1, 0); + out1 = out1 | 1; + build_code(huffmanTree, n->right, len + 1, out1, 0); + } + else + { + if(len%64!=0) + out2 = out2 << 1; + out2 = out2 | 0; + build_code(huffmanTree, n->left, len + 1, out1, out2); + out2 = out2 | 1; + build_code(huffmanTree, n->right, len + 1, out1, out2); + } +} + +/** + * Compute the frequency of the data and build the Huffman tree + * @param HuffmanTree* huffmanTree (output) + * @param int *s (input) + * @param size_t length (input) + * */ +void init(HuffmanTree* huffmanTree, int *s, size_t length) +{ + size_t i, index; + size_t *freq = (size_t *)malloc(huffmanTree->allNodes*sizeof(size_t)); + memset(freq, 0, huffmanTree->allNodes*sizeof(size_t)); + for(i = 0;i < length;i++) + { + index = s[i]; + freq[index]++; + } + + for (i = 0; i < huffmanTree->allNodes; i++) + if (freq[i]) + qinsert(huffmanTree, new_node(huffmanTree, freq[i], i, 0, 0)); + + while (huffmanTree->qend > 2) + qinsert(huffmanTree, new_node(huffmanTree, 0, 0, qremove(huffmanTree), qremove(huffmanTree))); + + build_code(huffmanTree, huffmanTree->qq[1], 0, 0, 0); + free(freq); +} + +void init_static(HuffmanTree* huffmanTree, int *s, size_t length) +{ + size_t i; + size_t *freq = (size_t *)malloc(huffmanTree->allNodes*sizeof(size_t)); + memset(freq, 0, huffmanTree->allNodes*sizeof(size_t)); + + + for (i = 0; i < huffmanTree->allNodes; i++) + if (freq[i]) + qinsert(huffmanTree, new_node(huffmanTree, freq[i], i, 0, 0)); + + while (huffmanTree->qend > 2) + qinsert(huffmanTree, new_node(huffmanTree, 0, 0, qremove(huffmanTree), qremove(huffmanTree))); + + build_code(huffmanTree, huffmanTree->qq[1], 0, 0, 0); + free(freq); +} + +void encode(HuffmanTree *huffmanTree, int *s, size_t length, unsigned char *out, size_t *outSize) +{ + size_t i = 0; + unsigned char bitSize = 0, byteSize, byteSizep; + int state; + unsigned char *p = out; + int lackBits = 0; + //long totalBitSize = 0, maxBitSize = 0, bitSize21 = 0, bitSize32 = 0; + for (i = 0;icout[state]; + + //printf("%d %d : %d %u\n",i, state, bitSize, (code[state])[0] >> (64-cout[state])); + //debug: compute the average bitSize and the count that is over 32... + /*if(bitSize>=21) + bitSize21++; + if(bitSize>=32) + bitSize32++; + if(maxBitSizecode[state])[0]); + p += byteSizep; + } + else //byteSize>8 + { + longToBytes_bigEndian(p, (huffmanTree->code[state])[0]); + p += 8; + longToBytes_bigEndian(p, (huffmanTree->code[state])[1]); + p += (byteSizep - 8); + } + *outSize += byteSize; + lackBits = bitSize%8==0 ? 0 : 8 - bitSize%8; + } + else + { + *p = (*p) | (unsigned char)((huffmanTree->code[state])[0] >> (64 - lackBits)); + if(lackBits < bitSize) + { + p++; + //(*outSize)++; + long newCode = (huffmanTree->code[state])[0] << lackBits; + longToBytes_bigEndian(p, newCode); + + if(bitSize<=64) + { + bitSize -= lackBits; + byteSize = bitSize%8==0 ? bitSize/8 : bitSize/8+1; + byteSizep = bitSize/8; + p += byteSizep; + (*outSize)+=byteSize; + lackBits = bitSize%8==0 ? 0 : 8 - bitSize%8; + } + else //bitSize > 64 + { + byteSizep = 7; //must be 7 bytes, because lackBits!=0 + p+=byteSizep; + (*outSize)+=byteSize; + + bitSize -= 64; + if(lackBits < bitSize) + { + *p = (*p) | (unsigned char)((huffmanTree->code[state])[0] >> (64 - lackBits)); + p++; + //(*outSize)++; + newCode = (huffmanTree->code[state])[1] << lackBits; + longToBytes_bigEndian(p, newCode); + bitSize -= lackBits; + byteSize = bitSize%8==0 ? bitSize/8 : bitSize/8+1; + byteSizep = bitSize/8; + p += byteSizep; + (*outSize)+=byteSize; + lackBits = bitSize%8==0 ? 0 : 8 - bitSize%8; + } + else //lackBits >= bitSize + { + *p = (*p) | (unsigned char)((huffmanTree->code[state])[0] >> (64 - bitSize)); + lackBits -= bitSize; + } + } + } + else //lackBits >= bitSize + { + lackBits -= bitSize; + if(lackBits==0) + p++; + } + } + } +// for(i=0;it) //root->t==1 means that all state values are the same (constant) + { + for(count=0;countc; + return; + } + + for(i=0;count>3; //i/8 + r = i%8; + if(((s[byteIndex] >> (7-r)) & 0x01) == 0) + n = n->left; + else + n = n->right; + + if (n->t) { + //putchar(n->c); + out[count] = n->c; + n = t; + count++; + } + } +// putchar('\n'); + if (t != n) printf("garbage input\n"); + return; +} + +void decode_MSST19(unsigned char *s, size_t targetLength, node t, int *out, int maxBits) +{ + size_t count = 0; + node n = t; + + if(n->t) //root->t==1 means that all state values are the same (constant) + { + for(count=0;countc; + return; + } + + if(maxBits > 16){ + maxBits = 16; + } + + int tableSize = 1 << maxBits; + int* valueTable = (int*)malloc(tableSize * sizeof(int)); + uint8_t* lengthTable = (uint8_t*)malloc(tableSize * sizeof(int)); + node* nodeTable = (node*)malloc(tableSize * sizeof(node)); + uint32_t maskTable[maxBits+8]; + int j; + for(uint32_t i=0; it && j < maxBits){ + uint32_t res = i >> (maxBits - j - 1); + if((res & 0x00000001) == 0){ + n = n->left; + }else{ + n = n->right; + } + j++; + } + if(!n->t){ + nodeTable[i] = n; + valueTable[i] = -1; + lengthTable[i] = maxBits; + }else{ + valueTable[i] = n->c; + lengthTable[i] = j; + } + } + for(int i=0; i> (leftBits - maxBits); + int value = valueTable[index]; + if(value != -1){ + out[count] = value; + int bitLength = lengthTable[index]; + leftBits -= bitLength; + uint32_t avoidHeadMask = maskTable[maxBits + 8 - leftBits - 1]; + currentValue = (currentValue & avoidHeadMask); + count++; + }else{ + int bitLength = lengthTable[index]; + leftBits -= bitLength; + n = nodeTable[index]; + while(!n->t){ + if(!leftBits){ + currentValue = currentValue << 8; + currentValue += s[i]; + leftBits += 8; + i++; + } + if(((currentValue >> (leftBits - 1)) & 0x01) == 0) + n = n->left; + else + n = n->right; + leftBits--; + } + currentValue &= maskTable[maxBits + 8 - leftBits - 1]; + out[count] = n->c; + count++; + } + + } + free(valueTable); + free(lengthTable); + free(nodeTable); + return; +} +void pad_tree_uchar(HuffmanTree* huffmanTree, unsigned char* L, unsigned char* R, unsigned int* C, unsigned char* t, unsigned int i, node root) +{ + C[i] = root->c; + t[i] = root->t; + node lroot = root->left; + if(lroot!=0) + { + huffmanTree->n_inode++; + L[i] = huffmanTree->n_inode; + pad_tree_uchar(huffmanTree, L,R,C,t, huffmanTree->n_inode, lroot); + } + node rroot = root->right; + if(rroot!=0) + { + huffmanTree->n_inode++; + R[i] = huffmanTree->n_inode; + pad_tree_uchar(huffmanTree, L,R,C,t, huffmanTree->n_inode, rroot); + } +} + +void pad_tree_ushort(HuffmanTree* huffmanTree, unsigned short* L, unsigned short* R, unsigned int* C, unsigned char* t, unsigned int i, node root) +{ + C[i] = root->c; + t[i] = root->t; + node lroot = root->left; + if(lroot!=0) + { + huffmanTree->n_inode++; + L[i] = huffmanTree->n_inode; + pad_tree_ushort(huffmanTree,L,R,C,t,huffmanTree->n_inode, lroot); + } + node rroot = root->right; + if(rroot!=0) + { + huffmanTree->n_inode++; + R[i] = huffmanTree->n_inode; + pad_tree_ushort(huffmanTree,L,R,C,t,huffmanTree->n_inode, rroot); + } +} + +void pad_tree_uint(HuffmanTree* huffmanTree, unsigned int* L, unsigned int* R, unsigned int* C, unsigned char* t, unsigned int i, node root) +{ + C[i] = root->c; + t[i] = root->t; + node lroot = root->left; + if(lroot!=0) + { + huffmanTree->n_inode++; + L[i] = huffmanTree->n_inode; + pad_tree_uint(huffmanTree,L,R,C,t,huffmanTree->n_inode, lroot); + } + node rroot = root->right; + if(rroot!=0) + { + huffmanTree->n_inode++; + R[i] = huffmanTree->n_inode; + pad_tree_uint(huffmanTree,L,R,C,t,huffmanTree->n_inode, rroot); + } +} + +unsigned int convert_HuffTree_to_bytes_anyStates(HuffmanTree* huffmanTree, int nodeCount, unsigned char** out) +{ + if(nodeCount<=256) + { + unsigned char* L = (unsigned char*)malloc(nodeCount*sizeof(unsigned char)); + memset(L, 0, nodeCount*sizeof(unsigned char)); + unsigned char* R = (unsigned char*)malloc(nodeCount*sizeof(unsigned char)); + memset(R, 0, nodeCount*sizeof(unsigned char)); + unsigned int* C = (unsigned int*)malloc(nodeCount*sizeof(unsigned int)); + memset(C, 0, nodeCount*sizeof(unsigned int)); + unsigned char* t = (unsigned char*)malloc(nodeCount*sizeof(unsigned char)); + memset(t, 0, nodeCount*sizeof(unsigned char)); + + pad_tree_uchar(huffmanTree,L,R,C,t,0,huffmanTree->qq[1]); + + unsigned int totalSize = 1+3*nodeCount*sizeof(unsigned char)+nodeCount*sizeof(unsigned int); + *out = (unsigned char*)malloc(totalSize*sizeof(unsigned char)); + (*out)[0] = (unsigned char)sysEndianType; + memcpy(*out+1, L, nodeCount*sizeof(unsigned char)); + memcpy((*out)+1+nodeCount*sizeof(unsigned char),R,nodeCount*sizeof(unsigned char)); + memcpy((*out)+1+2*nodeCount*sizeof(unsigned char),C,nodeCount*sizeof(unsigned int)); + memcpy((*out)+1+2*nodeCount*sizeof(unsigned char)+nodeCount*sizeof(unsigned int), t, nodeCount*sizeof(unsigned char)); + free(L); + free(R); + free(C); + free(t); + return totalSize; + + } + else if(nodeCount<=65536) + { + unsigned short* L = (unsigned short*)malloc(nodeCount*sizeof(unsigned short)); + memset(L, 0, nodeCount*sizeof(unsigned short)); + unsigned short* R = (unsigned short*)malloc(nodeCount*sizeof(unsigned short)); + memset(R, 0, nodeCount*sizeof(unsigned short)); + unsigned int* C = (unsigned int*)malloc(nodeCount*sizeof(unsigned int)); + memset(C, 0, nodeCount*sizeof(unsigned int)); + unsigned char* t = (unsigned char*)malloc(nodeCount*sizeof(unsigned char)); + memset(t, 0, nodeCount*sizeof(unsigned char)); + pad_tree_ushort(huffmanTree,L,R,C,t,0,huffmanTree->qq[1]); + unsigned int totalSize = 1+2*nodeCount*sizeof(unsigned short)+nodeCount*sizeof(unsigned char) + nodeCount*sizeof(unsigned int); + *out = (unsigned char*)malloc(totalSize); + (*out)[0] = (unsigned char)sysEndianType; + memcpy(*out+1, L, nodeCount*sizeof(unsigned short)); + memcpy((*out)+1+nodeCount*sizeof(unsigned short),R,nodeCount*sizeof(unsigned short)); + memcpy((*out)+1+2*nodeCount*sizeof(unsigned short),C,nodeCount*sizeof(unsigned int)); + memcpy((*out)+1+2*nodeCount*sizeof(unsigned short)+nodeCount*sizeof(unsigned int),t,nodeCount*sizeof(unsigned char)); + free(L); + free(R); + free(C); + free(t); + return totalSize; + } + else //nodeCount>65536 + { + unsigned int* L = (unsigned int*)malloc(nodeCount*sizeof(unsigned int)); + memset(L, 0, nodeCount*sizeof(unsigned int)); + unsigned int* R = (unsigned int*)malloc(nodeCount*sizeof(unsigned int)); + memset(R, 0, nodeCount*sizeof(unsigned int)); + unsigned int* C = (unsigned int*)malloc(nodeCount*sizeof(unsigned int)); + memset(C, 0, nodeCount*sizeof(unsigned int)); + unsigned char* t = (unsigned char*)malloc(nodeCount*sizeof(unsigned char)); + memset(t, 0, nodeCount*sizeof(unsigned char)); + pad_tree_uint(huffmanTree, L,R,C,t,0,huffmanTree->qq[1]); + + //debug + //node root = new_node2(0,0); + //unpad_tree_uint(L,R,C,t,0,root); + + unsigned int totalSize = 1+3*nodeCount*sizeof(unsigned int)+nodeCount*sizeof(unsigned char); + *out = (unsigned char*)malloc(totalSize); + (*out)[0] = (unsigned char)sysEndianType; + memcpy(*out+1, L, nodeCount*sizeof(unsigned int)); + memcpy((*out)+1+nodeCount*sizeof(unsigned int),R,nodeCount*sizeof(unsigned int)); + memcpy((*out)+1+2*nodeCount*sizeof(unsigned int),C,nodeCount*sizeof(unsigned int)); + memcpy((*out)+1+3*nodeCount*sizeof(unsigned int),t,nodeCount*sizeof(unsigned char)); + free(L); + free(R); + free(C); + free(t); + return totalSize; + } +} + +void unpad_tree_uchar(HuffmanTree* huffmanTree, unsigned char* L, unsigned char* R, unsigned int* C, unsigned char *t, unsigned int i, node root) +{ + //root->c = C[i]; + if(root->t==0) + { + unsigned char l, r; + l = L[i]; + if(l!=0) + { + node lroot = new_node2(huffmanTree,C[l],t[l]); + root->left = lroot; + unpad_tree_uchar(huffmanTree,L,R,C,t,l,lroot); + } + r = R[i]; + if(r!=0) + { + node rroot = new_node2(huffmanTree,C[r],t[r]); + root->right = rroot; + unpad_tree_uchar(huffmanTree,L,R,C,t,r,rroot); + } + } +} + +void unpad_tree_ushort(HuffmanTree* huffmanTree, unsigned short* L, unsigned short* R, unsigned int* C, unsigned char* t, unsigned int i, node root) +{ + //root->c = C[i]; + if(root->t==0) + { + unsigned short l, r; + l = L[i]; + if(l!=0) + { + node lroot = new_node2(huffmanTree,C[l],t[l]); + root->left = lroot; + unpad_tree_ushort(huffmanTree,L,R,C,t,l,lroot); + } + r = R[i]; + if(r!=0) + { + node rroot = new_node2(huffmanTree,C[r],t[r]); + root->right = rroot; + unpad_tree_ushort(huffmanTree,L,R,C,t,r,rroot); + } + } +} + +void unpad_tree_uint(HuffmanTree* huffmanTree, unsigned int* L, unsigned int* R, unsigned int* C, unsigned char* t, unsigned int i, node root) +{ + //root->c = C[i]; + if(root->t==0) + { + unsigned int l, r; + l = L[i]; + if(l!=0) + { + node lroot = new_node2(huffmanTree,C[l],t[l]); + root->left = lroot; + unpad_tree_uint(huffmanTree,L,R,C,t,l,lroot); + } + r = R[i]; + if(r!=0) + { + node rroot = new_node2(huffmanTree,C[r],t[r]); + root->right = rroot; + unpad_tree_uint(huffmanTree,L,R,C,t,r,rroot); + } + } +} + +node reconstruct_HuffTree_from_bytes_anyStates(HuffmanTree *huffmanTree, unsigned char* bytes, int nodeCount) +{ + if(nodeCount<=256) + { + unsigned char* L = (unsigned char*)malloc(nodeCount*sizeof(unsigned char)); + memset(L, 0, nodeCount*sizeof(unsigned char)); + unsigned char* R = (unsigned char*)malloc(nodeCount*sizeof(unsigned char)); + memset(R, 0, nodeCount*sizeof(unsigned char)); + unsigned int* C = (unsigned int*)malloc(nodeCount*sizeof(unsigned int)); + memset(C, 0, nodeCount*sizeof(unsigned int)); + unsigned char* t = (unsigned char*)malloc(nodeCount*sizeof(unsigned char)); + memset(t, 0, nodeCount*sizeof(unsigned char)); + unsigned char cmpSysEndianType = bytes[0]; + if(cmpSysEndianType!=(unsigned char)sysEndianType) + { + unsigned char* p = (unsigned char*)(bytes+1+2*nodeCount*sizeof(unsigned char)); + size_t i = 0, size = nodeCount*sizeof(unsigned int); + while(1) + { + symTransform_4bytes(p); + i+=sizeof(unsigned int); + if(i65536 + { + unsigned int* L = (unsigned int*)malloc(nodeCount*sizeof(unsigned int)); + memset(L, 0, nodeCount*sizeof(unsigned int)); + unsigned int* R = (unsigned int*)malloc(nodeCount*sizeof(unsigned int)); + memset(R, 0, nodeCount*sizeof(unsigned int)); + unsigned int* C = (unsigned int*)malloc(nodeCount*sizeof(unsigned int)); + memset(C, 0, nodeCount*sizeof(unsigned int)); + unsigned char* t = (unsigned char*)malloc(nodeCount*sizeof(unsigned char)); + memset(t, 0, nodeCount*sizeof(unsigned char)); + unsigned char cmpSysEndianType = bytes[0]; + if(cmpSysEndianType!=(unsigned char)sysEndianType) + { + unsigned char* p = (unsigned char*)(bytes+1); + size_t i = 0, size = 3*nodeCount*sizeof(unsigned int); + while(1) + { + symTransform_4bytes(p); + i+=sizeof(unsigned int); + if(istateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree,nodeCount, &treeBytes); + //printf("treeByteSize = %d\n", treeByteSize); + + *out = (unsigned char*)malloc(length*sizeof(int)+treeByteSize); + intToBytes_bigEndian(buffer, nodeCount); + memcpy(*out, buffer, 4); + intToBytes_bigEndian(buffer, huffmanTree->stateNum/2); //real number of intervals + memcpy(*out+4, buffer, 4); + memcpy(*out+8, treeBytes, treeByteSize); + free(treeBytes); + size_t enCodeSize = 0; + encode(huffmanTree, s, length, *out+8+treeByteSize, &enCodeSize); + *outSize = 8+treeByteSize+enCodeSize; +} + +int encode_withTree_MSST19(HuffmanTree* huffmanTree, int *s, size_t length, unsigned char **out, size_t *outSize) +{ + //struct ClockPoint clockPointInit; + //TimeDurationStart("init", &clockPointInit); + size_t i; + int nodeCount = 0; + unsigned char *treeBytes, buffer[4]; + + init(huffmanTree, s, length); + + int maxBits = 0; + for (i = 0; i < huffmanTree->stateNum; i++) + if (huffmanTree->code[i]){ + nodeCount++; + if(huffmanTree->cout[i] > maxBits) maxBits = huffmanTree->cout[i]; + } + nodeCount = nodeCount*2-1; + //TimeDurationEnd(&clockPointInit); + //struct ClockPoint clockPointST; + //TimeDurationStart("save tree", &clockPointST); + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree,nodeCount, &treeBytes); + //printf("treeByteSize = %d\n", treeByteSize); + + *out = (unsigned char*)malloc(length*sizeof(int)+treeByteSize); + intToBytes_bigEndian(buffer, nodeCount); + memcpy(*out, buffer, 4); + intToBytes_bigEndian(buffer, huffmanTree->stateNum/2); //real number of intervals + memcpy(*out+4, buffer, 4); + memcpy(*out+8, treeBytes, treeByteSize); + free(treeBytes); + size_t enCodeSize = 0; + //TimeDurationEnd(&clockPointST); + //struct ClockPoint clockPointEncode; + //TimeDurationStart("encode", &clockPointEncode); + encode(huffmanTree, s, length, *out+8+treeByteSize, &enCodeSize); + *outSize = 8+treeByteSize+enCodeSize; + //TimeDurationEnd(&clockPointEncode); + //unsigned short state[length]; + //decode(*out+4+treeByteSize, enCodeSize, qqq[0], state); + //printf("dataSeriesLength=%d",length ); + return maxBits; +} + +/** + * @par *out rememmber to allocate targetLength short_type data for it beforehand. + * + * */ +void decode_withTree(HuffmanTree* huffmanTree, unsigned char *s, size_t targetLength, int *out) +{ + size_t encodeStartIndex; + size_t nodeCount = bytesToInt_bigEndian(s); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,s+8, nodeCount); + + //sdi: Debug +/* build_code(root, 0, 0, 0); + int i; + unsigned long code_1, code_2; + for (i = 0; i < stateNum; i++) + if (code[i]) + { + printf("%d: %lu,%lu ; %u\n", i, (code[i])[0],(code[i])[1], cout[i]); + //code_1 = (code[i])[0]; + }*/ + + if(nodeCount<=256) + encodeStartIndex = 1+3*nodeCount*sizeof(unsigned char)+nodeCount*sizeof(unsigned int); + else if(nodeCount<=65536) + encodeStartIndex = 1+2*nodeCount*sizeof(unsigned short)+nodeCount*sizeof(unsigned char)+nodeCount*sizeof(unsigned int); + else + encodeStartIndex = 1+3*nodeCount*sizeof(unsigned int)+nodeCount*sizeof(unsigned char); + decode(s+8+encodeStartIndex, targetLength, root, out); +} + +void decode_withTree_MSST19(HuffmanTree* huffmanTree, unsigned char *s, size_t targetLength, int *out, int maxBits) +{ + size_t encodeStartIndex; + size_t nodeCount = bytesToInt_bigEndian(s); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,s+8, nodeCount); + + //sdi: Debug +/* build_code(root, 0, 0, 0); + int i; + unsigned long code_1, code_2; + for (i = 0; i < stateNum; i++) + if (code[i]) + { + printf("%d: %lu,%lu ; %u\n", i, (code[i])[0],(code[i])[1], cout[i]); + //code_1 = (code[i])[0]; + }*/ + + if(nodeCount<=256) + encodeStartIndex = 1+3*nodeCount*sizeof(unsigned char)+nodeCount*sizeof(unsigned int); + else if(nodeCount<=65536) + encodeStartIndex = 1+2*nodeCount*sizeof(unsigned short)+nodeCount*sizeof(unsigned char)+nodeCount*sizeof(unsigned int); + else + encodeStartIndex = 1+3*nodeCount*sizeof(unsigned int)+nodeCount*sizeof(unsigned char); + + decode_MSST19(s+8+encodeStartIndex, targetLength, root, out, maxBits); +} + +void SZ_ReleaseHuffman(HuffmanTree* huffmanTree) +{ + size_t i; + free(huffmanTree->pool); + huffmanTree->pool = NULL; + free(huffmanTree->qqq); + huffmanTree->qqq = NULL; + for(i=0;istateNum;i++) + { + if(huffmanTree->code[i]!=NULL) + free(huffmanTree->code[i]); + } + free(huffmanTree->code); + huffmanTree->code = NULL; + free(huffmanTree->cout); + huffmanTree->cout = NULL; + free(huffmanTree); + huffmanTree = NULL; +} diff --git a/deps/SZ/sz/src/MultiLevelCacheTable.c b/deps/SZ/sz/src/MultiLevelCacheTable.c new file mode 100644 index 0000000000000000000000000000000000000000..ce16b7c661cdeb4ff68afa50a27ae3f0d94857a7 --- /dev/null +++ b/deps/SZ/sz/src/MultiLevelCacheTable.c @@ -0,0 +1,193 @@ +/** + * @file MultiLevelCacheTable.c + * @author Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang, Sheng Di, Dingwen Tao + * @date Jan, 2019 + * @brief Header file. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include "stdio.h" +#include "MultiLevelCacheTable.h" + +uint8_t MLCT_GetExpoIndex(float value){ + uint32_t* ptr = (uint32_t*)&value; + return (*ptr) >> 23; +} + +uint8_t MLCT_GetRequiredBits(float precision){ + int32_t* ptr = (int32_t*)&precision; + return -(((*ptr) >> 23) - 127); +} + + +uint32_t MLCT_GetMantiIndex(float value, int bits){ + uint32_t* ptr = (uint32_t*)&value; + (*ptr) = (*ptr) << 9 >> 9; + int shift = 32 - 9 - bits; + if(shift > 0){ + return (*ptr) >> shift; + }else{ + return (*ptr); + } +} + +float MLTC_RebuildFloat(uint8_t expo, uint32_t manti, int bits){ + float result = 0; + uint32_t *ptr = (uint32_t*)&result; + *ptr = expo; + (*ptr) = (*ptr) << 23; + (*ptr) |= (manti << (23-bits)); + return result; +} + +void MultiLevelCacheTableBuild(struct TopLevelTable* topTable, float* precisionTable, int count, float precision){ + uint8_t bits = MLCT_GetRequiredBits(precision); + topTable->bits = bits; + topTable->bottomBoundary = precisionTable[1]/(1+precision); + topTable->topBoundary = precisionTable[count-1]/(1-precision); + topTable->baseIndex = MLCT_GetExpoIndex(topTable->bottomBoundary); + topTable->topIndex = MLCT_GetExpoIndex(topTable->topBoundary); + int subTableCount = topTable->topIndex - topTable->baseIndex + 1; + topTable->subTables = (struct SubLevelTable*)malloc(sizeof(struct SubLevelTable) * subTableCount); + memset(topTable->subTables, 0, sizeof(struct SubLevelTable) * subTableCount); + + //uint32_t expoBoundary[subTableCount]; + uint8_t lastExpo = 0xff; + uint8_t lastIndex = 0; + for(int i=0; itopIndex-topTable->baseIndex; i>=0; i--){ + struct SubLevelTable* processingSubTable = &topTable->subTables[i]; + if(i == topTable->topIndex - topTable->baseIndex && + MLCT_GetExpoIndex(topTable->topBoundary) == MLCT_GetExpoIndex(precisionTable[count-1])){ + processingSubTable->topIndex = MLCT_GetMantiIndex(topTable->topBoundary, bits) - 1; + }else{ + uint32_t maxIndex = 0; + for(int j=0; jtopIndex = maxIndex; + } + if(i == 0 && MLCT_GetExpoIndex(topTable->bottomBoundary) == MLCT_GetExpoIndex(precisionTable[0])){ + processingSubTable->baseIndex = MLCT_GetMantiIndex(topTable->bottomBoundary, bits)+1; + }else{ + processingSubTable->baseIndex = 0; + } + + int subTableLength = processingSubTable->topIndex - processingSubTable-> baseIndex+ 1; + processingSubTable->table = (uint32_t*)malloc(sizeof(uint32_t) * subTableLength); + memset(processingSubTable->table, 0, sizeof(uint32_t) * subTableLength); + processingSubTable->expoIndex = topTable->baseIndex + i; + } + + uint32_t index = 1; + for(uint8_t i = 0; i<=topTable->topIndex-topTable->baseIndex; i++){ + struct SubLevelTable* processingSubTable = &topTable->subTables[i]; + uint8_t expoIndex = i+topTable->baseIndex; + for(uint32_t j = 0; j<=processingSubTable->topIndex - processingSubTable->baseIndex; j++){ + uint32_t mantiIndex = j+processingSubTable->baseIndex; + float sample = MLTC_RebuildFloat(expoIndex, mantiIndex, topTable->bits); + float bottomBoundary = precisionTable[index] / (1+precision); + float topBoundary = precisionTable[index] / (1-precision); + if(sample < topBoundary && sample > bottomBoundary){ + processingSubTable->table[j] = index; + }else{ + //float newPrecision = precisionTable[index]; + index++; + processingSubTable->table[j] = index; + if(j) + processingSubTable->table[j-1] = index; + else{ + struct SubLevelTable* pastSubTable = &topTable->subTables[i-1]; + pastSubTable->table[pastSubTable->topIndex - pastSubTable->baseIndex] = index; + } + } + } + if(i == topTable->topIndex - topTable->baseIndex){ + uint32_t j = processingSubTable->topIndex - processingSubTable->baseIndex + 1; + uint32_t mantiIndex = j + processingSubTable->baseIndex; + float sample = MLTC_RebuildFloat(expoIndex, mantiIndex, topTable->bits); + float bottomBoundary = precisionTable[index] / (1+precision); + float topBoundary = precisionTable[index] / (1-precision); + if(sample > topBoundary || sample < bottomBoundary){ + index++; + processingSubTable->table[j-1] = index; + } + } + } + + /* + long lastIndexInExpoRange = count-1; + bool trigger = false; + float preRange = 0.0; + uint32_t preIndex = 0; + for(int i=topTable->topIndex-topTable->baseIndex; i>=0; i--){ + struct SubLevelTable* processingSubTable = &topTable->subTables[i]; + if(trigger){ + uint32_t bound = MLCT_GetMantiIndex(preRange, bits); + for(int j = processingSubTable->topIndex; j>=processingSubTable->baseIndex; j--){ + if(j >= bound){ + processingSubTable->table[j-processingSubTable->baseIndex] = preIndex; + }else{ + break; + } + } + trigger = false; + } + long firstIndexInExpoRange = expoBoundary[i]; + uint8_t expoInRange = MLCT_GetExpoIndex(precisionTable[firstIndexInExpoRange]); + for(int j=lastIndexInExpoRange; j>=firstIndexInExpoRange; j--){ + float test = precisionTable[j]; + uint32_t rangeTop = MLCT_GetMantiIndex(precisionTable[j]*(1+precision), bits) - 1; + uint32_t rangeBottom; + if(j == firstIndexInExpoRange){ + preRange = precisionTable[j]/(1+precision); + if(expoInRange != MLCT_GetExpoIndex(preRange)){ + trigger = true; + preIndex = firstIndexInExpoRange; + rangeBottom = 0; + }else{ + rangeBottom= MLCT_GetMantiIndex(precisionTable[j]/(1+precision), bits) + 1; + } + }else{ + rangeBottom= MLCT_GetMantiIndex(precisionTable[j]/(1+precision), bits) + 1; + } + for(int k = rangeBottom; k<=rangeTop; k++){ + if( k <= processingSubTable->topIndex && k >= processingSubTable->baseIndex) + processingSubTable->table[k - processingSubTable->baseIndex] = j; + } + } + lastIndexInExpoRange = firstIndexInExpoRange-1; + } + */ +} + +uint32_t MultiLevelCacheTableGetIndex(float value, struct TopLevelTable* topLevelTable){ + uint8_t expoIndex = MLCT_GetExpoIndex(value); + if(expoIndex <= topLevelTable->topIndex && expoIndex >= topLevelTable->baseIndex){ + struct SubLevelTable* subLevelTable = &topLevelTable->subTables[expoIndex-topLevelTable->baseIndex]; + uint32_t mantiIndex = MLCT_GetMantiIndex(value, topLevelTable->bits); + MLTC_RebuildFloat(expoIndex, mantiIndex, topLevelTable->bits); + if(mantiIndex >= subLevelTable->baseIndex && mantiIndex <= subLevelTable->topIndex) + return subLevelTable->table[mantiIndex - subLevelTable->baseIndex]; + } + return 0; +} + +void MultiLevelCacheTableFree(struct TopLevelTable* table){ + for(int i=0; itopIndex - table->baseIndex + 1; i++){ + free(table->subTables[i].table); + } + free(table->subTables); +} diff --git a/deps/SZ/sz/src/MultiLevelCacheTableWideInterval.c b/deps/SZ/sz/src/MultiLevelCacheTableWideInterval.c new file mode 100644 index 0000000000000000000000000000000000000000..d137115f9097d8803fbae46e41af43f31cfb6484 --- /dev/null +++ b/deps/SZ/sz/src/MultiLevelCacheTableWideInterval.c @@ -0,0 +1,125 @@ +/** + * @file MultiLevelCacheTableWideInterval.h + * @author Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang, Sheng Di, Dingwen Tao + * @date Jan, 2019 + * @brief Header file. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include "MultiLevelCacheTableWideInterval.h" + +void freeTopLevelTableWideInterval(struct TopLevelTableWideInterval* topTable) +{ + for(int i=topTable->topIndex-topTable->baseIndex; i>=0; i--) + { + struct SubLevelTableWideInterval* processingSubTable = &topTable->subTables[i]; + free(processingSubTable->table); + } + free(topTable->subTables); +} + +uint16_t MLCTWI_GetExpoIndex(double value){ + uint64_t* ptr = (uint64_t*)&value; + return (*ptr) >> 52; +} + +uint16_t MLCTWI_GetRequiredBits(double precision){ + uint64_t* ptr = (uint64_t*)&precision; + return -(((*ptr) >> 52) - 1023); +} + +uint64_t MLCTWI_GetMantiIndex(double value, int bits){ + uint64_t* ptr = (uint64_t*)&value; + (*ptr) = (*ptr) << 12 >> 12; + int shift = 64 - 12 - bits; + if(shift > 0){ + return (*ptr) >> shift; + }else{ + return (*ptr); + } +} + +double MLTCWI_RebuildDouble(uint16_t expo, uint64_t manti, int bits){ + double result = 0; + uint64_t *ptr = (uint64_t*)&result; + *ptr = expo; + (*ptr) = (*ptr) << 52; + (*ptr) += (manti << (52-bits)); + return result; +} + +void MultiLevelCacheTableWideIntervalBuild(struct TopLevelTableWideInterval* topTable, double* precisionTable, int count, double precision, int plus_bits){ + uint16_t bits = MLCTWI_GetRequiredBits(precision) + plus_bits; + topTable->bits = bits; + topTable->bottomBoundary = precisionTable[1]/(1+precision); + topTable->topBoundary = precisionTable[count-1]/(1-precision); + topTable->baseIndex = MLCTWI_GetExpoIndex(topTable->bottomBoundary); + topTable->topIndex = MLCTWI_GetExpoIndex(topTable->topBoundary); + int subTableCount = topTable->topIndex - topTable->baseIndex + 1; + topTable->subTables = (struct SubLevelTableWideInterval*)malloc(sizeof(struct SubLevelTableWideInterval) * subTableCount); + memset(topTable->subTables, 0, sizeof(struct SubLevelTableWideInterval) * subTableCount); + + for(int i=topTable->topIndex-topTable->baseIndex; i>=0; i--){ + struct SubLevelTableWideInterval* processingSubTable = &topTable->subTables[i]; + + uint32_t maxIndex = 0; + for(int j=0; jtopIndex = maxIndex; + processingSubTable->baseIndex = 0; + + uint64_t subTableLength = processingSubTable->topIndex - processingSubTable-> baseIndex+ 1; + processingSubTable->table = (uint16_t*)malloc(sizeof(uint16_t) * subTableLength); + memset(processingSubTable->table, 0, sizeof(uint16_t) * subTableLength); + processingSubTable->expoIndex = topTable->baseIndex + i; + } + + + uint32_t index = 0; + bool flag = false; + for(uint16_t i = 0; i<=topTable->topIndex-topTable->baseIndex; i++){ + struct SubLevelTableWideInterval* processingSubTable = &topTable->subTables[i]; + uint16_t expoIndex = i+topTable->baseIndex; + for(uint32_t j = 0; j<=processingSubTable->topIndex - processingSubTable->baseIndex; j++){ + uint64_t mantiIndex = j + processingSubTable->baseIndex; + double sampleBottom = MLTCWI_RebuildDouble(expoIndex, mantiIndex, topTable->bits); + double sampleTop = MLTCWI_RebuildDouble(expoIndex, mantiIndex+1, topTable->bits); + double bottomBoundary = precisionTable[index] / (1+precision); + double topBoundary = precisionTable[index] / (1-precision); + if(sampleTop < topBoundary && sampleBottom > bottomBoundary){ + processingSubTable->table[j] = index; + flag = true; + }else{ + if(flag && index < count-1){ + index++; + processingSubTable->table[j] = index; + }else{ + processingSubTable->table[j] = 0; + } + } + } + } + +} + +uint32_t MultiLevelCacheTableWideIntervalGetIndex(double value, struct TopLevelTableWideInterval* topLevelTable){ + uint16_t expoIndex = MLCTWI_GetExpoIndex(value); + if(expoIndex <= topLevelTable->topIndex && expoIndex >= topLevelTable->baseIndex){ + struct SubLevelTableWideInterval* subLevelTable = &topLevelTable->subTables[expoIndex-topLevelTable->baseIndex]; + uint64_t mantiIndex = MLCTWI_GetMantiIndex(value, topLevelTable->bits); + return subLevelTable->table[mantiIndex - subLevelTable->baseIndex]; + + } + return 0; +} + +void MultiLevelCacheTableWideIntervalFree(struct TopLevelTableWideInterval* table){ + for(int i=0; itopIndex - table->baseIndex + 1; i++){ + free(table->subTables[i].table); + } + free(table->subTables); +} + diff --git a/deps/SZ/sz/src/TightDataPointStorageD.c b/deps/SZ/sz/src/TightDataPointStorageD.c new file mode 100644 index 0000000000000000000000000000000000000000..f30d8cdd62f20a0bc1c43cdefdac1ce74b5b9c74 --- /dev/null +++ b/deps/SZ/sz/src/TightDataPointStorageD.c @@ -0,0 +1,751 @@ +/** + * @file TightPointDataStorageD.c + * @author Sheng Di and Dingwen Tao + * @date Aug, 2016 + * @brief The functions used to construct the tightPointDataStorage element for storing compressed bytes. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include "TightDataPointStorageD.h" +#include "sz.h" +#include "Huffman.h" +//#include "rw.h" + +void new_TightDataPointStorageD_Empty(TightDataPointStorageD **this) +{ + *this = (TightDataPointStorageD*)malloc(sizeof(TightDataPointStorageD)); + (*this)->dataSeriesLength = 0; + (*this)->allSameData = 0; + (*this)->exactDataNum = 0; + (*this)->reservedValue = 0; + (*this)->reqLength = 0; + (*this)->radExpo = 0; + + (*this)->rtypeArray = NULL; + (*this)->rtypeArray_size = 0; + + (*this)->typeArray = NULL; //its size is dataSeriesLength/4 (or xxx/4+1) + (*this)->typeArray_size = 0; + + (*this)->leadNumArray = NULL; //its size is exactDataNum/4 (or exactDataNum/4+1) + (*this)->leadNumArray_size = 0; + + (*this)->exactMidBytes = NULL; + (*this)->exactMidBytes_size = 0; + + (*this)->residualMidBits = NULL; + (*this)->residualMidBits_size = 0; + + (*this)->intervals = 0; + (*this)->isLossless = 0; + + (*this)->segment_size = 0; + (*this)->pwrErrBoundBytes = NULL; + (*this)->pwrErrBoundBytes_size = 0; + + (*this)->raBytes = NULL; + (*this)->raBytes_size = 0; + +} + +int new_TightDataPointStorageD_fromFlatBytes(TightDataPointStorageD **this, unsigned char* flatBytes, size_t flatBytesLength) +{ + new_TightDataPointStorageD_Empty(this); + size_t i, index = 0; + size_t pwrErrBoundBytes_size = 0, segmentL = 0, radExpoL = 0, pwrErrBoundBytesL = 0; + char version[3]; + for (i = 0; i < 3; i++) + version[i] = flatBytes[index++]; //3 + unsigned char sameRByte = flatBytes[index++]; //1 + if(checkVersion2(version)!=1) + { + //wrong version + printf("Wrong version: \nCompressed-data version (%d.%d.%d)\n",version[0], version[1], version[2]); + printf("Current sz version: (%d.%d.%d)\n", versionNumber[0], versionNumber[1], versionNumber[2]); + printf("Please double-check if the compressed data (or file) is correct.\n"); + exit(0); + } + + int same = sameRByte & 0x01; + //confparams_dec->szMode = (sameRByte & 0x06)>>1; + (*this)->isLossless = (sameRByte & 0x10)>>4; + int isPW_REL = (sameRByte & 0x20)>>5; + exe_params->SZ_SIZE_TYPE = ((sameRByte & 0x40)>>6)==1?8:4; + //confparams_dec->randomAccess = (sameRByte & 0x02) >> 1; + //confparams_dec->szMode = (sameRByte & 0x06) >> 1; //this 0000,0110 are not used for szMode any more + confparams_dec->protectValueRange = (sameRByte & 0x04)>>2; + confparams_dec->accelerate_pw_rel_compression = (sameRByte & 0x08) >> 3; + int errorBoundMode = ABS; + if(isPW_REL) + { + errorBoundMode = PW_REL; + segmentL = exe_params->SZ_SIZE_TYPE; + pwrErrBoundBytesL = 4; + } + + if(confparams_dec==NULL) + { + confparams_dec = (sz_params*)malloc(sizeof(sz_params)); + memset(confparams_dec, 0, sizeof(sz_params)); + } + convertBytesToSZParams(&(flatBytes[index]), confparams_dec); + + index += MetaDataByteLength_double; + + int isRegression = (sameRByte >> 7) & 0x01; + + unsigned char dsLengthBytes[8]; + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + dsLengthBytes[i] = flatBytes[index++]; + (*this)->dataSeriesLength = bytesToSize(dsLengthBytes); + + //printf("confparams_dec->szMode=%d\n",confparams_dec->szMode); + + if((*this)->isLossless==1) + { + //(*this)->exactMidBytes = flatBytes+8; + return errorBoundMode; + } + else if(same==1) + { + (*this)->allSameData = 1; + //size_t exactMidBytesLength = sizeof(double);//flatBytesLength - 3 - 1 - MetaDataByteLength_double -exe_params->SZ_SIZE_TYPE; + (*this)->exactMidBytes = &(flatBytes[index]); + return errorBoundMode; + } + else + (*this)->allSameData = 0; + + if(isRegression == 1) + { + (*this)->raBytes_size = flatBytesLength - 3 - 1 - MetaDataByteLength_double - exe_params->SZ_SIZE_TYPE; + (*this)->raBytes = &(flatBytes[index]); + return errorBoundMode; + } + + int rtype_ = 0;//sameRByte & 0x08; //1000 + + unsigned char byteBuf[8]; + + for (i = 0; i < 4; i++) + byteBuf[i] = flatBytes[index++]; + int max_quant_intervals = bytesToInt_bigEndian(byteBuf);// 4 + + confparams_dec->maxRangeRadius = max_quant_intervals/2; + + if(errorBoundMode>=PW_REL) + { + (*this)->radExpo = flatBytes[index++];//1 + radExpoL = 1; + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + byteBuf[i] = flatBytes[index++]; + confparams_dec->segment_size = (*this)->segment_size = bytesToSize(byteBuf);// exe_params->SZ_SIZE_TYPE + + for (i = 0; i < 4; i++) + byteBuf[i] = flatBytes[index++]; + pwrErrBoundBytes_size = (*this)->pwrErrBoundBytes_size = bytesToInt_bigEndian(byteBuf);// 4 + } + else + { + pwrErrBoundBytes_size = 0; + (*this)->pwrErrBoundBytes = NULL; + } + + for (i = 0; i < 4; i++) + byteBuf[i] = flatBytes[index++]; + (*this)->intervals = bytesToInt_bigEndian(byteBuf);// 4 + + for (i = 0; i < 8; i++) + byteBuf[i] = flatBytes[index++]; + (*this)->medianValue = bytesToDouble(byteBuf);//8 + + (*this)->reqLength = flatBytes[index++]; //1 + + if(isPW_REL && confparams_dec->accelerate_pw_rel_compression) + { + (*this)->plus_bits = flatBytes[index++]; + (*this)->max_bits = flatBytes[index++]; + } + + for (i = 0; i < 8; i++) + byteBuf[i] = flatBytes[index++]; + (*this)->realPrecision = bytesToDouble(byteBuf);//8 + + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + byteBuf[i] = flatBytes[index++]; + (*this)->typeArray_size = bytesToSize(byteBuf);// exe_params->SZ_SIZE_TYPE + + if(rtype_!=0) + { + for(i = 0;iSZ_SIZE_TYPE;i++) + byteBuf[i] = flatBytes[index++]; + (*this)->rtypeArray_size = bytesToSize(byteBuf);//ST + } + else + (*this)->rtypeArray_size = 0; + + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + byteBuf[i] = flatBytes[index++]; + (*this)->exactDataNum = bytesToSize(byteBuf);// ST + + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + byteBuf[i] = flatBytes[index++]; + (*this)->exactMidBytes_size = bytesToSize(byteBuf);// ST + + if (rtype_ != 0) { + if((*this)->rtypeArray_size>0) + (*this)->rtypeArray = (unsigned char*)malloc(sizeof(unsigned char)*(*this)->rtypeArray_size); + else + (*this)->rtypeArray = NULL; + + for (i = 0; i < 8; i++) + byteBuf[i] = flatBytes[index++]; + (*this)->reservedValue = bytesToDouble(byteBuf);//8 + } + + size_t logicLeadNumBitsNum = (*this)->exactDataNum * 2; + if (logicLeadNumBitsNum % 8 == 0) + { + (*this)->leadNumArray_size = logicLeadNumBitsNum >> 3; + } + else + { + (*this)->leadNumArray_size = (logicLeadNumBitsNum >> 3) + 1; + } + + int minLogValueSize = 0; + if(errorBoundMode>=PW_REL) + minLogValueSize = 8; + + if ((*this)->rtypeArray != NULL) + { + (*this)->residualMidBits_size = flatBytesLength - 3 - 1 - MetaDataByteLength_double - exe_params->SZ_SIZE_TYPE - 4 - radExpoL - segmentL - pwrErrBoundBytesL - 4 - 8 - 1 - 8 + - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - minLogValueSize - exe_params->SZ_SIZE_TYPE - 8 - (*this)->rtypeArray_size + - minLogValueSize - (*this)->typeArray_size - (*this)->leadNumArray_size + - (*this)->exactMidBytes_size - pwrErrBoundBytes_size - 1 - 1; + for (i = 0; i < (*this)->rtypeArray_size; i++) + (*this)->rtypeArray[i] = flatBytes[index++]; + } + else + { + (*this)->residualMidBits_size = flatBytesLength - 3 - 1 - MetaDataByteLength_double - exe_params->SZ_SIZE_TYPE - 4 - radExpoL - segmentL - pwrErrBoundBytesL - 4 - 8 - 1 - 8 + - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - minLogValueSize - (*this)->typeArray_size + - (*this)->leadNumArray_size - (*this)->exactMidBytes_size - pwrErrBoundBytes_size - 1 - 1; + } + + if(errorBoundMode >= PW_REL){ + (*this)->minLogValue = bytesToDouble(&flatBytes[index]); + index+=8; + } + + (*this)->typeArray = &flatBytes[index]; + //retrieve the number of states (i.e., stateNum) + (*this)->allNodes = bytesToInt_bigEndian((*this)->typeArray); //the first 4 bytes store the stateNum + (*this)->stateNum = ((*this)->allNodes+1)/2; + + index+=(*this)->typeArray_size; + + (*this)->pwrErrBoundBytes = &flatBytes[index]; + + index+=pwrErrBoundBytes_size; + + (*this)->leadNumArray = &flatBytes[index]; + + index+=(*this)->leadNumArray_size; + + (*this)->exactMidBytes = &flatBytes[index]; + + index+=(*this)->exactMidBytes_size; + + (*this)->residualMidBits = &flatBytes[index]; + + //index+=(*this)->residualMidBits_size; + + return errorBoundMode; +} + +/** + * + * type's length == dataSeriesLength + * exactMidBytes's length == exactMidBytes_size + * leadNumIntArray's length == exactDataNum + * escBytes's length == escBytes_size + * resiBitLength's length == resiBitLengthSize + * */ +void new_TightDataPointStorageD(TightDataPointStorageD **this, + size_t dataSeriesLength, size_t exactDataNum, + int* type, unsigned char* exactMidBytes, size_t exactMidBytes_size, + unsigned char* leadNumIntArray, //leadNumIntArray contains readable numbers.... + unsigned char* resiMidBits, size_t resiMidBits_size, + unsigned char resiBitLength, + double realPrecision, double medianValue, char reqLength, unsigned int intervals, + unsigned char* pwrErrBoundBytes, size_t pwrErrBoundBytes_size, unsigned char radExpo) { + //int i = 0; + *this = (TightDataPointStorageD *)malloc(sizeof(TightDataPointStorageD)); + (*this)->allSameData = 0; + (*this)->realPrecision = realPrecision; + (*this)->medianValue = medianValue; + (*this)->reqLength = reqLength; + + (*this)->dataSeriesLength = dataSeriesLength; + (*this)->exactDataNum = exactDataNum; + + (*this)->rtypeArray = NULL; + (*this)->rtypeArray_size = 0; + + int stateNum = 2*intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + if(confparams_cpr->errorBoundMode == PW_REL && confparams_cpr->accelerate_pw_rel_compression) + (*this)->max_bits = encode_withTree_MSST19(huffmanTree, type, dataSeriesLength, &(*this)->typeArray, &(*this)->typeArray_size); + else + encode_withTree(huffmanTree, type, dataSeriesLength, &(*this)->typeArray, &(*this)->typeArray_size); + SZ_ReleaseHuffman(huffmanTree); + + (*this)->exactMidBytes = exactMidBytes; + (*this)->exactMidBytes_size = exactMidBytes_size; + + (*this)->leadNumArray_size = convertIntArray2ByteArray_fast_2b(leadNumIntArray, exactDataNum, &((*this)->leadNumArray)); + + (*this)->residualMidBits_size = convertIntArray2ByteArray_fast_dynamic(resiMidBits, resiBitLength, exactDataNum, &((*this)->residualMidBits)); + + (*this)->intervals = intervals; + + (*this)->isLossless = 0; + + if(confparams_cpr->errorBoundMode>=PW_REL) + (*this)->pwrErrBoundBytes = pwrErrBoundBytes; + else + (*this)->pwrErrBoundBytes = NULL; + + (*this)->radExpo = radExpo; + + (*this)->pwrErrBoundBytes_size = pwrErrBoundBytes_size; +} + +void new_TightDataPointStorageD2(TightDataPointStorageD **this, + size_t dataSeriesLength, size_t exactDataNum, + int* type, unsigned char* exactMidBytes, size_t exactMidBytes_size, + unsigned char* leadNumIntArray, //leadNumIntArray contains readable numbers.... + unsigned char* resiMidBits, size_t resiMidBits_size, + unsigned char* resiBitLength, size_t resiBitLengthSize, + double realPrecision, double medianValue, char reqLength, unsigned int intervals, + unsigned char* pwrErrBoundBytes, size_t pwrErrBoundBytes_size, unsigned char radExpo) { + //int i = 0; + *this = (TightDataPointStorageD *)malloc(sizeof(TightDataPointStorageD)); + (*this)->allSameData = 0; + (*this)->realPrecision = realPrecision; + (*this)->medianValue = medianValue; + (*this)->reqLength = reqLength; + + (*this)->dataSeriesLength = dataSeriesLength; + (*this)->exactDataNum = exactDataNum; + + (*this)->rtypeArray = NULL; + (*this)->rtypeArray_size = 0; + + int stateNum = 2*intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + encode_withTree(huffmanTree, type, dataSeriesLength, &(*this)->typeArray, &(*this)->typeArray_size); + SZ_ReleaseHuffman(huffmanTree); + + (*this)->exactMidBytes = exactMidBytes; + (*this)->exactMidBytes_size = exactMidBytes_size; + + (*this)->leadNumArray_size = convertIntArray2ByteArray_fast_2b(leadNumIntArray, exactDataNum, &((*this)->leadNumArray)); + + //(*this)->residualMidBits = resiMidBits; + //(*this)->residualMidBits_size = resiMidBits_size; + + (*this)->residualMidBits_size = convertIntArray2ByteArray_fast_dynamic2(resiMidBits, resiBitLength, resiBitLengthSize, &((*this)->residualMidBits)); + + (*this)->intervals = intervals; + + (*this)->isLossless = 0; + + if(confparams_cpr->errorBoundMode>=PW_REL) + (*this)->pwrErrBoundBytes = pwrErrBoundBytes; + else + (*this)->pwrErrBoundBytes = NULL; + + (*this)->radExpo = radExpo; + + (*this)->pwrErrBoundBytes_size = pwrErrBoundBytes_size; +} + +void convertTDPStoBytes_double(TightDataPointStorageD* tdps, unsigned char* bytes, unsigned char* dsLengthBytes, unsigned char sameByte) +{ + size_t i, k = 0; + unsigned char intervalsBytes[4]; + unsigned char typeArrayLengthBytes[8]; + unsigned char exactLengthBytes[8]; + unsigned char exactMidBytesLength[8]; + unsigned char realPrecisionBytes[8]; + + unsigned char medianValueBytes[8]; + + unsigned char segment_sizeBytes[8]; + unsigned char pwrErrBoundBytes_sizeBytes[4]; + unsigned char max_quant_intervals_Bytes[4]; + + for(i = 0;i<3;i++)//3 bytes + bytes[k++] = versionNumber[i]; + bytes[k++] = sameByte; //1 byte + + convertSZParamsToBytes(confparams_cpr, &(bytes[k])); + k = k + MetaDataByteLength_double; + + for(i = 0;iSZ_SIZE_TYPE;i++)//ST: 4 or 8 bytes + bytes[k++] = dsLengthBytes[i]; + intToBytes_bigEndian(max_quant_intervals_Bytes, confparams_cpr->max_quant_intervals); + for(i = 0;i<4;i++)//4 + bytes[k++] = max_quant_intervals_Bytes[i]; + + if(confparams_cpr->errorBoundMode>=PW_REL) + { + bytes[k++] = tdps->radExpo; //1 byte + + sizeToBytes(segment_sizeBytes, confparams_cpr->segment_size); + for(i = 0;iSZ_SIZE_TYPE;i++)//ST + bytes[k++] = segment_sizeBytes[i]; + + intToBytes_bigEndian(pwrErrBoundBytes_sizeBytes, tdps->pwrErrBoundBytes_size); + for(i = 0;i<4;i++)//4 + bytes[k++] = pwrErrBoundBytes_sizeBytes[i]; + } + + intToBytes_bigEndian(intervalsBytes, tdps->intervals); + for(i = 0;i<4;i++)//4 + bytes[k++] = intervalsBytes[i]; + + doubleToBytes(medianValueBytes, tdps->medianValue); + for (i = 0; i < 8; i++)// 8 + bytes[k++] = medianValueBytes[i]; + + bytes[k++] = tdps->reqLength; //1 byte + + if(confparams_cpr->errorBoundMode == PW_REL && confparams_cpr->accelerate_pw_rel_compression==1) + { + bytes[k++] = tdps->plus_bits; + bytes[k++] = tdps->max_bits; + } + + doubleToBytes(realPrecisionBytes, tdps->realPrecision); + for (i = 0; i < 8; i++)// 8 + bytes[k++] = realPrecisionBytes[i]; + + sizeToBytes(typeArrayLengthBytes, tdps->typeArray_size); + for(i = 0;iSZ_SIZE_TYPE;i++)//ST + bytes[k++] = typeArrayLengthBytes[i]; + + sizeToBytes(exactLengthBytes, tdps->exactDataNum); + for(i = 0;iSZ_SIZE_TYPE;i++)//ST + bytes[k++] = exactLengthBytes[i]; + + sizeToBytes(exactMidBytesLength, tdps->exactMidBytes_size); + for(i = 0;iSZ_SIZE_TYPE;i++)//ST + bytes[k++] = exactMidBytesLength[i]; + + if(confparams_cpr->errorBoundMode>=PW_REL) + { + doubleToBytes(exactMidBytesLength, tdps->minLogValue); + for(i = 0;i < 8; i++) + bytes[k++] = exactMidBytesLength[i]; + } + + memcpy(&(bytes[k]), tdps->typeArray, tdps->typeArray_size); + k += tdps->typeArray_size; + if(confparams_cpr->errorBoundMode>=PW_REL) + { + memcpy(&(bytes[k]), tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size); + k += tdps->pwrErrBoundBytes_size; + } + + memcpy(&(bytes[k]), tdps->leadNumArray, tdps->leadNumArray_size); + k += tdps->leadNumArray_size; + memcpy(&(bytes[k]), tdps->exactMidBytes, tdps->exactMidBytes_size); + k += tdps->exactMidBytes_size; + + if(tdps->residualMidBits!=NULL) + { + memcpy(&(bytes[k]), tdps->residualMidBits, tdps->residualMidBits_size); + k += tdps->residualMidBits_size; + } +} + +void convertTDPStoBytes_double_reserve(TightDataPointStorageD* tdps, unsigned char* bytes, unsigned char* dsLengthBytes, unsigned char sameByte) +{ + size_t i, k = 0; + unsigned char intervalsBytes[4]; + unsigned char typeArrayLengthBytes[8]; + unsigned char rTypeLengthBytes[8]; + unsigned char exactLengthBytes[8]; + unsigned char exactMidBytesLength[8]; + unsigned char reservedValueBytes[8]; + unsigned char realPrecisionBytes[8]; + + unsigned char medianValueBytes[8]; + + unsigned char segment_sizeBytes[8]; + unsigned char pwrErrBoundBytes_sizeBytes[4]; + unsigned char max_quant_intervals_Bytes[4]; + + for(i = 0;i<3;i++)//3 + bytes[k++] = versionNumber[i]; + bytes[k++] = sameByte; //1 + + convertSZParamsToBytes(confparams_cpr, &(bytes[k])); + k = k + MetaDataByteLength_double; + + for(i = 0;iSZ_SIZE_TYPE;i++)//ST + bytes[k++] = dsLengthBytes[i]; + + intToBytes_bigEndian(max_quant_intervals_Bytes, confparams_cpr->max_quant_intervals); + for(i = 0;i<4;i++)//4 + bytes[k++] = max_quant_intervals_Bytes[i]; + + if(confparams_cpr->errorBoundMode>=PW_REL) + { + bytes[k++] = tdps->radExpo; //1 byte + + sizeToBytes(segment_sizeBytes, confparams_cpr->segment_size); + for(i = 0;iSZ_SIZE_TYPE;i++)//4 + bytes[k++] = segment_sizeBytes[i]; + + intToBytes_bigEndian(pwrErrBoundBytes_sizeBytes, tdps->pwrErrBoundBytes_size); + for(i = 0;i<4;i++)//4 + bytes[k++] = pwrErrBoundBytes_sizeBytes[i]; + } + intToBytes_bigEndian(intervalsBytes, tdps->intervals); + for(i = 0;i<4;i++)//4 + bytes[k++] = intervalsBytes[i]; + + doubleToBytes(medianValueBytes, tdps->medianValue); + for (i = 0; i < 8; i++)// 8 + bytes[k++] = medianValueBytes[i]; + + bytes[k++] = tdps->reqLength; //1 byte + + doubleToBytes(realPrecisionBytes, tdps->realPrecision); + for (i = 0; i < 8; i++)// 8 + bytes[k++] = realPrecisionBytes[i]; + + sizeToBytes(typeArrayLengthBytes, tdps->typeArray_size); + for(i = 0;iSZ_SIZE_TYPE;i++)//ST + bytes[k++] = typeArrayLengthBytes[i]; + + sizeToBytes(rTypeLengthBytes, tdps->rtypeArray_size); + for(i = 0;iSZ_SIZE_TYPE;i++)//ST + bytes[k++] = rTypeLengthBytes[i]; + + sizeToBytes(exactLengthBytes, tdps->exactDataNum); + for(i = 0;iSZ_SIZE_TYPE;i++)//ST + bytes[k++] = exactLengthBytes[i]; + + sizeToBytes(exactMidBytesLength, tdps->exactMidBytes_size); + for(i = 0;iSZ_SIZE_TYPE;i++)//ST + bytes[k++] = exactMidBytesLength[i]; + + doubleToBytes(reservedValueBytes, tdps->reservedValue); + for (i = 0; i < 8; i++)// 8 + bytes[k++] = reservedValueBytes[i]; + + memcpy(&(bytes[k]), tdps->rtypeArray, tdps->rtypeArray_size); + k += tdps->rtypeArray_size; + + if(confparams_cpr->errorBoundMode>=PW_REL) + { + doubleToBytes(exactMidBytesLength, tdps->minLogValue); + for(i = 0;i < 8; i++) + bytes[k++] = exactMidBytesLength[i]; + } + + memcpy(&(bytes[k]), tdps->typeArray, tdps->typeArray_size); + k += tdps->typeArray_size; + if(confparams_cpr->errorBoundMode>=PW_REL) + { + memcpy(&(bytes[k]), tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size); + k += tdps->pwrErrBoundBytes_size; + } + memcpy(&(bytes[k]), tdps->leadNumArray, tdps->leadNumArray_size); + k += tdps->leadNumArray_size; + memcpy(&(bytes[k]), tdps->exactMidBytes, tdps->exactMidBytes_size); + k += tdps->exactMidBytes_size; + if(tdps->residualMidBits!=NULL) + { + memcpy(&(bytes[k]), tdps->residualMidBits, tdps->residualMidBits_size); + k += tdps->residualMidBits_size; + } +} + +//Convert TightDataPointStorageD to bytes... +void convertTDPStoFlatBytes_double(TightDataPointStorageD *tdps, unsigned char** bytes, size_t *size) +{ + size_t i, k = 0; + unsigned char dsLengthBytes[8]; + + if(exe_params->SZ_SIZE_TYPE==4) + intToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//4 + else + longToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//8 + + unsigned char sameByte = tdps->allSameData==1?(unsigned char)1:(unsigned char)0; + //sameByte = sameByte | (confparams_cpr->szMode << 1); + if(tdps->isLossless) + sameByte = (unsigned char) (sameByte | 0x10); + if(confparams_cpr->errorBoundMode>=PW_REL) + sameByte = (unsigned char) (sameByte | 0x20); // 00100000, the 5th bit + if(exe_params->SZ_SIZE_TYPE==8) + sameByte = (unsigned char) (sameByte | 0x40); // 01000000, the 6th bit + if(confparams_cpr->errorBoundMode == PW_REL && confparams_cpr->accelerate_pw_rel_compression) + sameByte = (unsigned char) (sameByte | 0x08); + + if(tdps->allSameData==1) + { + size_t totalByteLength = 3 + 1 + MetaDataByteLength_double + exe_params->SZ_SIZE_TYPE + tdps->exactMidBytes_size; + *bytes = (unsigned char *)malloc(sizeof(unsigned char)*totalByteLength); + + for (i = 0; i < 3; i++)//3 + (*bytes)[k++] = versionNumber[i]; + (*bytes)[k++] = sameByte; + + convertSZParamsToBytes(confparams_cpr, &((*bytes)[k])); + k = k + MetaDataByteLength_double; + + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + (*bytes)[k++] = dsLengthBytes[i]; + + for (i = 0; i < tdps->exactMidBytes_size; i++) + (*bytes)[k++] = tdps->exactMidBytes[i]; + + *size = totalByteLength; + } + else if (tdps->rtypeArray == NULL) + { + size_t residualMidBitsLength = tdps->residualMidBits == NULL ? 0 : tdps->residualMidBits_size; + size_t segmentL = 0, radExpoL = 0, pwrBoundArrayL = 0; + + int minLogValueSize = 0; + if(confparams_cpr->errorBoundMode>=PW_REL) + { + segmentL = exe_params->SZ_SIZE_TYPE; + radExpoL = 1; + pwrBoundArrayL = 4; + minLogValueSize = 8; + } + + size_t totalByteLength = 3 + 1 + MetaDataByteLength_double + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrBoundArrayL + 4 + 8 + 1 + 8 + + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + + minLogValueSize /*max absolute log value*/ + + tdps->typeArray_size + tdps->leadNumArray_size + + tdps->exactMidBytes_size + residualMidBitsLength + tdps->pwrErrBoundBytes_size; + if(confparams_cpr->errorBoundMode == PW_REL && confparams_cpr->accelerate_pw_rel_compression) + totalByteLength += (1+1); // for MSST19 + + *bytes = (unsigned char *)malloc(sizeof(unsigned char)*totalByteLength); + + convertTDPStoBytes_double(tdps, *bytes, dsLengthBytes, sameByte); + + *size = totalByteLength; + } + else //the case with reserved value + { + //TODO + } +} + +void convertTDPStoFlatBytes_double_args(TightDataPointStorageD *tdps, unsigned char* bytes, size_t *size) +{ + size_t i, k = 0; + unsigned char dsLengthBytes[8]; + + if(exe_params->SZ_SIZE_TYPE==4) + intToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//4 + else + longToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//8 + + unsigned char sameByte = tdps->allSameData==1?(unsigned char)1:(unsigned char)0; //0000,0001 + sameByte = sameByte | (confparams_cpr->szMode << 1); //0000,0110 + if(tdps->isLossless) + sameByte = (unsigned char) (sameByte | 0x10); // 0001,0000 + if(confparams_cpr->errorBoundMode>=PW_REL) + sameByte = (unsigned char) (sameByte | 0x20); // 0010,0000, the 5th bit + if(exe_params->SZ_SIZE_TYPE==8) + sameByte = (unsigned char) (sameByte | 0x40); //0100,0000, the 6th bit + if(confparams_cpr->errorBoundMode == PW_REL && confparams_cpr->accelerate_pw_rel_compression) + sameByte = (unsigned char) (sameByte | 0x08); //0000,1000, the 7th bit + if(confparams_cpr->protectValueRange) + sameByte = (unsigned char) (sameByte | 0x04); //0000,0100 + if(tdps->allSameData==1) + { + size_t totalByteLength = 3 + 1 + MetaDataByteLength_double + exe_params->SZ_SIZE_TYPE + tdps->exactMidBytes_size; + + for (i = 0; i < 3; i++)//3 + bytes[k++] = versionNumber[i]; + bytes[k++] = sameByte; + + convertSZParamsToBytes(confparams_cpr, &(bytes[k])); + k = k + MetaDataByteLength_double; + + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + bytes[k++] = dsLengthBytes[i]; + for (i = 0; i < tdps->exactMidBytes_size; i++) + bytes[k++] = tdps->exactMidBytes[i]; + + *size = totalByteLength; + } + else if (tdps->rtypeArray == NULL) + { + size_t residualMidBitsLength = tdps->residualMidBits == NULL ? 0 : tdps->residualMidBits_size; + size_t segmentL = 0, radExpoL = 0, pwrBoundArrayL = 0; + if(confparams_cpr->errorBoundMode>=PW_REL) + { + segmentL = exe_params->SZ_SIZE_TYPE; + radExpoL = 1; + pwrBoundArrayL = 4; + } + + size_t totalByteLength = 3 + 1 + MetaDataByteLength_double + exe_params->SZ_SIZE_TYPE+ 4 + radExpoL + segmentL + pwrBoundArrayL + 4 + 8 + 1 + 8 + + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + + tdps->typeArray_size + tdps->leadNumArray_size + + tdps->exactMidBytes_size + residualMidBitsLength + tdps->pwrErrBoundBytes_size; + if(confparams_cpr->errorBoundMode == PW_REL && confparams_cpr->accelerate_pw_rel_compression) + totalByteLength += (1+1); // for MSST19 + convertTDPStoBytes_double(tdps, bytes, dsLengthBytes, sameByte); + + *size = totalByteLength; + } + else //the case with reserved value + { + //TODO + } +} + + +void free_TightDataPointStorageD(TightDataPointStorageD *tdps) +{ + if(tdps->rtypeArray!=NULL) + free(tdps->rtypeArray); + if(tdps->typeArray!=NULL) + free(tdps->typeArray); + if(tdps->leadNumArray!=NULL) + free(tdps->leadNumArray); + if(tdps->exactMidBytes!=NULL) + free(tdps->exactMidBytes); + if(tdps->residualMidBits!=NULL) + free(tdps->residualMidBits); + if(tdps->pwrErrBoundBytes!=NULL) + free(tdps->pwrErrBoundBytes); + free(tdps); +} + +/** + * to free the memory used in the decompression + * */ +void free_TightDataPointStorageD2(TightDataPointStorageD *tdps) +{ + free(tdps); +} diff --git a/deps/SZ/sz/src/TightDataPointStorageF.c b/deps/SZ/sz/src/TightDataPointStorageF.c new file mode 100644 index 0000000000000000000000000000000000000000..aa0e0c1ce3b4d9f796e915f5f3cdc62cd6780e82 --- /dev/null +++ b/deps/SZ/sz/src/TightDataPointStorageF.c @@ -0,0 +1,754 @@ +/** + * @file TightPointDataStorageF.c + * @author Sheng Di and Dingwen Tao + * @date Aug, 2016 + * @brief The functions used to construct the tightPointDataStorage element for storing compressed bytes. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include "TightDataPointStorageF.h" +#include "sz.h" +#include "Huffman.h" +//#include "rw.h" + +void new_TightDataPointStorageF_Empty(TightDataPointStorageF **this) +{ + *this = (TightDataPointStorageF*)malloc(sizeof(TightDataPointStorageF)); + (*this)->dataSeriesLength = 0; + (*this)->allSameData = 0; + (*this)->exactDataNum = 0; + (*this)->reservedValue = 0; + (*this)->reqLength = 0; + (*this)->radExpo = 0; + + (*this)->rtypeArray = NULL; + (*this)->rtypeArray_size = 0; + + (*this)->typeArray = NULL; //its size is dataSeriesLength/4 (or xxx/4+1) + (*this)->typeArray_size = 0; + + (*this)->leadNumArray = NULL; //its size is exactDataNum/4 (or exactDataNum/4+1) + (*this)->leadNumArray_size = 0; + + (*this)->exactMidBytes = NULL; + (*this)->exactMidBytes_size = 0; + + (*this)->residualMidBits = NULL; + (*this)->residualMidBits_size = 0; + + (*this)->intervals = 0; + (*this)->isLossless = 0; + + (*this)->segment_size = 0; + (*this)->pwrErrBoundBytes = NULL; + (*this)->pwrErrBoundBytes_size = 0; + + (*this)->raBytes = NULL; + (*this)->raBytes_size = 0; +} + +int new_TightDataPointStorageF_fromFlatBytes(TightDataPointStorageF **this, unsigned char* flatBytes, size_t flatBytesLength) +{ + new_TightDataPointStorageF_Empty(this); + size_t i, index = 0; + size_t pwrErrBoundBytes_size = 0, segmentL = 0, radExpoL = 0, pwrErrBoundBytesL = 0; + char version[3]; + for (i = 0; i < 3; i++) + version[i] = flatBytes[index++]; //3 + unsigned char sameRByte = flatBytes[index++]; //1 + if(checkVersion2(version)!=1) + { + //wrong version + printf("Wrong version: \nCompressed-data version (%d.%d.%d)\n",version[0], version[1], version[2]); + printf("Current sz version: (%d.%d.%d)\n", versionNumber[0], versionNumber[1], versionNumber[2]); + printf("Please double-check if the compressed data (or file) is correct.\n"); + exit(0); + } + //note that 1000,0000 is reserved for regression tag. + int same = sameRByte & 0x01; //0000,0001 + (*this)->isLossless = (sameRByte & 0x10)>>4; //0001,0000 + int isPW_REL = (sameRByte & 0x20)>>5; //0010,0000 + exe_params->SZ_SIZE_TYPE = ((sameRByte & 0x40)>>6)==1?8:4; //0100,0000 + //confparams_dec->randomAccess = (sameRByte & 0x02) >> 1; + //confparams_dec->szMode = (sameRByte & 0x06) >> 1; //0000,0110 (in fact, this szMode could be removed because convertSZParamsToBytes will overwrite it) + + confparams_dec->protectValueRange = (sameRByte & 0x04)>>2; + + confparams_dec->accelerate_pw_rel_compression = (sameRByte & 0x08) >> 3;//0000,1000 + + int errorBoundMode = ABS; + if(isPW_REL) + { + errorBoundMode = PW_REL; + segmentL = exe_params->SZ_SIZE_TYPE; + pwrErrBoundBytesL = 4; + } + + if(confparams_dec==NULL) + { + confparams_dec = (sz_params*)malloc(sizeof(sz_params)); + memset(confparams_dec, 0, sizeof(sz_params)); + } + convertBytesToSZParams(&(flatBytes[index]), confparams_dec); + + index += MetaDataByteLength; + + int isRegression = (sameRByte >> 7) & 0x01; + + unsigned char dsLengthBytes[8]; + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + dsLengthBytes[i] = flatBytes[index++]; + (*this)->dataSeriesLength = bytesToSize(dsLengthBytes);// 4 or 8 + + if((*this)->isLossless==1) + { + //(*this)->exactMidBytes = flatBytes+8; + return errorBoundMode; + } + else if(same==1) + { + (*this)->allSameData = 1; + //size_t exactMidBytesLength = sizeof(double);//flatBytesLength - 3 - 1 - MetaDataByteLength -exe_params->SZ_SIZE_TYPE; + (*this)->exactMidBytes = &(flatBytes[index]); + return errorBoundMode; + } + else + (*this)->allSameData = 0; + if(isRegression == 1) + { + (*this)->raBytes_size = flatBytesLength - 3 - 1 - MetaDataByteLength - exe_params->SZ_SIZE_TYPE; + (*this)->raBytes = &(flatBytes[index]); + return errorBoundMode; + } + + int rtype_ = 0;//sameRByte & 0x08; //=00001000 + unsigned char byteBuf[8]; + + for (i = 0; i < 4; i++) + byteBuf[i] = flatBytes[index++]; + int max_quant_intervals = bytesToInt_bigEndian(byteBuf);// 4 + + confparams_dec->maxRangeRadius = max_quant_intervals/2; + + if(errorBoundMode>=PW_REL) + { + (*this)->radExpo = flatBytes[index++];//1 + radExpoL = 1; + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + byteBuf[i] = flatBytes[index++]; + confparams_dec->segment_size = (*this)->segment_size = bytesToSize(byteBuf);// exe_params->SZ_SIZE_TYPE + + for (i = 0; i < 4; i++) + byteBuf[i] = flatBytes[index++]; + pwrErrBoundBytes_size = (*this)->pwrErrBoundBytes_size = bytesToInt_bigEndian(byteBuf);// 4 + } + else + { + pwrErrBoundBytes_size = 0; + (*this)->pwrErrBoundBytes = NULL; + } + for (i = 0; i < 4; i++) + byteBuf[i] = flatBytes[index++]; + (*this)->intervals = bytesToInt_bigEndian(byteBuf);// 4 + + for (i = 0; i < 4; i++) + byteBuf[i] = flatBytes[index++]; + (*this)->medianValue = bytesToFloat(byteBuf); //4 + + (*this)->reqLength = flatBytes[index++]; //1 + + if(isPW_REL && confparams_dec->accelerate_pw_rel_compression) + { + (*this)->plus_bits = flatBytes[index++]; + (*this)->max_bits = flatBytes[index++]; + } + + for (i = 0; i < 8; i++) + byteBuf[i] = flatBytes[index++]; + (*this)->realPrecision = bytesToDouble(byteBuf);//8 + + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + byteBuf[i] = flatBytes[index++]; + (*this)->typeArray_size = bytesToSize(byteBuf);// 4 + if(rtype_!=0) + { + for(i = 0;iSZ_SIZE_TYPE;i++) + byteBuf[i] = flatBytes[index++]; + (*this)->rtypeArray_size = bytesToSize(byteBuf);//(ST) + } + else + (*this)->rtypeArray_size = 0; + + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + byteBuf[i] = flatBytes[index++]; + (*this)->exactDataNum = bytesToSize(byteBuf);// ST + + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + byteBuf[i] = flatBytes[index++]; + (*this)->exactMidBytes_size = bytesToSize(byteBuf);// ST + + if (rtype_ != 0) { + if((*this)->rtypeArray_size>0) + (*this)->rtypeArray = (unsigned char*)malloc(sizeof(unsigned char)*(*this)->rtypeArray_size); + else + (*this)->rtypeArray = NULL; + + for (i = 0; i < 4; i++) + byteBuf[i] = flatBytes[index++]; + (*this)->reservedValue = bytesToFloat(byteBuf);//4 + } + + size_t logicLeadNumBitsNum = (*this)->exactDataNum * 2; + if (logicLeadNumBitsNum % 8 == 0) + { + (*this)->leadNumArray_size = logicLeadNumBitsNum >> 3; + } + else + { + (*this)->leadNumArray_size = (logicLeadNumBitsNum >> 3) + 1; + } + + int minLogValueSize = 0; + if(errorBoundMode>=PW_REL) + minLogValueSize = 4; + + if ((*this)->rtypeArray != NULL) + { + (*this)->residualMidBits_size = flatBytesLength - 3 - 1 - MetaDataByteLength - exe_params->SZ_SIZE_TYPE - 4 - radExpoL - segmentL - pwrErrBoundBytesL - 4 - 4 - 1 - 8 + - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - minLogValueSize - exe_params->SZ_SIZE_TYPE - 4 - (*this)->rtypeArray_size + - minLogValueSize - (*this)->typeArray_size - (*this)->leadNumArray_size + - (*this)->exactMidBytes_size - pwrErrBoundBytes_size - 1 - 1; + for (i = 0; i < (*this)->rtypeArray_size; i++) + (*this)->rtypeArray[i] = flatBytes[index++]; + } + else + { + (*this)->residualMidBits_size = flatBytesLength - 3 - 1 - MetaDataByteLength - exe_params->SZ_SIZE_TYPE - 4 - radExpoL - segmentL - pwrErrBoundBytesL - 4 - 4 - 1 - 8 + - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - minLogValueSize - (*this)->typeArray_size + - (*this)->leadNumArray_size - (*this)->exactMidBytes_size - pwrErrBoundBytes_size - 1 - 1; + } + + if(errorBoundMode>=PW_REL) + { + (*this)->minLogValue = bytesToFloat(&flatBytes[index]); + index+=4; + } + + (*this)->typeArray = &flatBytes[index]; + //retrieve the number of states (i.e., stateNum) + (*this)->allNodes = bytesToInt_bigEndian((*this)->typeArray); //the first 4 bytes store the stateNum + (*this)->stateNum = ((*this)->allNodes+1)/2; + + index+=(*this)->typeArray_size; + + (*this)->pwrErrBoundBytes = &flatBytes[index]; + + index+=pwrErrBoundBytes_size; + + (*this)->leadNumArray = &flatBytes[index]; + + index+=(*this)->leadNumArray_size; + + (*this)->exactMidBytes = &flatBytes[index]; + + index+=(*this)->exactMidBytes_size; + + (*this)->residualMidBits = &flatBytes[index]; + + //index+=(*this)->residualMidBits_size; + + return errorBoundMode; +} + +/** + * + * type's length == dataSeriesLength + * exactMidBytes's length == exactMidBytes_size + * leadNumIntArray's length == exactDataNum + * escBytes's length == escBytes_size + * resiBitLength's length == resiBitLengthSize + * */ +void new_TightDataPointStorageF(TightDataPointStorageF **this, + size_t dataSeriesLength, size_t exactDataNum, + int* type, unsigned char* exactMidBytes, size_t exactMidBytes_size, + unsigned char* leadNumIntArray, //leadNumIntArray contains readable numbers.... + unsigned char* resiMidBits, size_t resiMidBits_size, + unsigned char resiBitLength, + double realPrecision, float medianValue, char reqLength, unsigned int intervals, + unsigned char* pwrErrBoundBytes, size_t pwrErrBoundBytes_size, unsigned char radExpo) { + + *this = (TightDataPointStorageF *)malloc(sizeof(TightDataPointStorageF)); + (*this)->allSameData = 0; + (*this)->realPrecision = realPrecision; + (*this)->medianValue = medianValue; + (*this)->reqLength = reqLength; + + (*this)->dataSeriesLength = dataSeriesLength; + (*this)->exactDataNum = exactDataNum; + + (*this)->rtypeArray = NULL; + (*this)->rtypeArray_size = 0; + + int stateNum = 2*intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + if(confparams_cpr->errorBoundMode == PW_REL && confparams_cpr->accelerate_pw_rel_compression) + (*this)->max_bits = encode_withTree_MSST19(huffmanTree, type, dataSeriesLength, &(*this)->typeArray, &(*this)->typeArray_size); + else + encode_withTree(huffmanTree, type, dataSeriesLength, &(*this)->typeArray, &(*this)->typeArray_size); + SZ_ReleaseHuffman(huffmanTree); + + (*this)->exactMidBytes = exactMidBytes; + (*this)->exactMidBytes_size = exactMidBytes_size; + + (*this)->leadNumArray_size = convertIntArray2ByteArray_fast_2b(leadNumIntArray, exactDataNum, &((*this)->leadNumArray)); + + (*this)->residualMidBits_size = convertIntArray2ByteArray_fast_dynamic(resiMidBits, resiBitLength, exactDataNum, &((*this)->residualMidBits)); + + (*this)->intervals = intervals; + + (*this)->isLossless = 0; + + if(confparams_cpr->errorBoundMode>=PW_REL) + (*this)->pwrErrBoundBytes = pwrErrBoundBytes; + else + (*this)->pwrErrBoundBytes = NULL; + + (*this)->radExpo = radExpo; + + (*this)->pwrErrBoundBytes_size = pwrErrBoundBytes_size; +} + +void new_TightDataPointStorageF2(TightDataPointStorageF **this, + size_t dataSeriesLength, size_t exactDataNum, + int* type, unsigned char* exactMidBytes, size_t exactMidBytes_size, + unsigned char* leadNumIntArray, //leadNumIntArray contains readable numbers.... + unsigned char* resiMidBits, size_t resiMidBits_size, + unsigned char* resiBitLength, size_t resiBitLengthSize, + double realPrecision, float medianValue, char reqLength, unsigned int intervals, + unsigned char* pwrErrBoundBytes, size_t pwrErrBoundBytes_size, unsigned char radExpo) { + //int i = 0; + *this = (TightDataPointStorageF *)malloc(sizeof(TightDataPointStorageF)); + (*this)->allSameData = 0; + (*this)->realPrecision = realPrecision; + (*this)->medianValue = medianValue; + (*this)->reqLength = reqLength; + + (*this)->dataSeriesLength = dataSeriesLength; + (*this)->exactDataNum = exactDataNum; + + (*this)->rtypeArray = NULL; + (*this)->rtypeArray_size = 0; + + int stateNum = 2*intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + encode_withTree(huffmanTree, type, dataSeriesLength, &(*this)->typeArray, &(*this)->typeArray_size); + SZ_ReleaseHuffman(huffmanTree); + + (*this)->exactMidBytes = exactMidBytes; + (*this)->exactMidBytes_size = exactMidBytes_size; + + (*this)->leadNumArray_size = convertIntArray2ByteArray_fast_2b(leadNumIntArray, exactDataNum, &((*this)->leadNumArray)); + + //(*this)->residualMidBits = resiMidBits; + //(*this)->residualMidBits_size = resiMidBits_size; + + (*this)->residualMidBits_size = convertIntArray2ByteArray_fast_dynamic2(resiMidBits, resiBitLength, resiBitLengthSize, &((*this)->residualMidBits)); + + (*this)->intervals = intervals; + + (*this)->isLossless = 0; + + if(confparams_cpr->errorBoundMode>=PW_REL) + (*this)->pwrErrBoundBytes = pwrErrBoundBytes; + else + (*this)->pwrErrBoundBytes = NULL; + + (*this)->radExpo = radExpo; + + (*this)->pwrErrBoundBytes_size = pwrErrBoundBytes_size; +} + +void convertTDPStoBytes_float(TightDataPointStorageF* tdps, unsigned char* bytes, unsigned char* dsLengthBytes, unsigned char sameByte) +{ + size_t i, k = 0; + unsigned char intervalsBytes[4]; + unsigned char typeArrayLengthBytes[8]; + unsigned char exactLengthBytes[8]; + unsigned char exactMidBytesLength[8]; + unsigned char realPrecisionBytes[8]; + + unsigned char medianValueBytes[4]; + + unsigned char segment_sizeBytes[8]; + unsigned char pwrErrBoundBytes_sizeBytes[4]; + unsigned char max_quant_intervals_Bytes[4]; + + + for(i = 0;i<3;i++)//3 bytes + bytes[k++] = versionNumber[i]; + bytes[k++] = sameByte; //1 byte + + convertSZParamsToBytes(confparams_cpr, &(bytes[k])); + k = k + MetaDataByteLength; + + for(i = 0;iSZ_SIZE_TYPE;i++)//ST: 4 or 8 bytes + bytes[k++] = dsLengthBytes[i]; + intToBytes_bigEndian(max_quant_intervals_Bytes, confparams_cpr->max_quant_intervals); + for(i = 0;i<4;i++)//4 + bytes[k++] = max_quant_intervals_Bytes[i]; + + if(confparams_cpr->errorBoundMode>=PW_REL) + { + bytes[k++] = tdps->radExpo; //1 byte + + sizeToBytes(segment_sizeBytes, confparams_cpr->segment_size); + for(i = 0;iSZ_SIZE_TYPE;i++)//ST + bytes[k++] = segment_sizeBytes[i]; + + intToBytes_bigEndian(pwrErrBoundBytes_sizeBytes, tdps->pwrErrBoundBytes_size); + for(i = 0;i<4;i++)//4 + bytes[k++] = pwrErrBoundBytes_sizeBytes[i]; + } + + intToBytes_bigEndian(intervalsBytes, tdps->intervals); + for(i = 0;i<4;i++)//4 + bytes[k++] = intervalsBytes[i]; + + floatToBytes(medianValueBytes, tdps->medianValue); + for (i = 0; i < 4; i++)// 4 + bytes[k++] = medianValueBytes[i]; + + bytes[k++] = tdps->reqLength; //1 byte + + if(confparams_cpr->errorBoundMode == PW_REL && confparams_cpr->accelerate_pw_rel_compression) + { + bytes[k++] = tdps->plus_bits; + bytes[k++] = tdps->max_bits; + } + + doubleToBytes(realPrecisionBytes, tdps->realPrecision); + + for (i = 0; i < 8; i++)// 8 + bytes[k++] = realPrecisionBytes[i]; + + sizeToBytes(typeArrayLengthBytes, tdps->typeArray_size); + for(i = 0;iSZ_SIZE_TYPE;i++)//ST + bytes[k++] = typeArrayLengthBytes[i]; + + sizeToBytes(exactLengthBytes, tdps->exactDataNum); + for(i = 0;iSZ_SIZE_TYPE;i++)//ST + bytes[k++] = exactLengthBytes[i]; + + sizeToBytes(exactMidBytesLength, tdps->exactMidBytes_size); + for(i = 0;iSZ_SIZE_TYPE;i++)//ST + bytes[k++] = exactMidBytesLength[i]; + + if(confparams_cpr->errorBoundMode>=PW_REL) + { + floatToBytes(exactMidBytesLength, tdps->minLogValue); + for(i=0;i<4;i++) + bytes[k++] = exactMidBytesLength[i]; + } + + memcpy(&(bytes[k]), tdps->typeArray, tdps->typeArray_size); + k += tdps->typeArray_size; + if(confparams_cpr->errorBoundMode>=PW_REL) + { + memcpy(&(bytes[k]), tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size); + k += tdps->pwrErrBoundBytes_size; + } + + memcpy(&(bytes[k]), tdps->leadNumArray, tdps->leadNumArray_size); + k += tdps->leadNumArray_size; + memcpy(&(bytes[k]), tdps->exactMidBytes, tdps->exactMidBytes_size); + k += tdps->exactMidBytes_size; + + if(tdps->residualMidBits!=NULL) + { + memcpy(&(bytes[k]), tdps->residualMidBits, tdps->residualMidBits_size); + k += tdps->residualMidBits_size; + } +} + +/*deprecated*/ +void convertTDPStoBytes_float_reserve(TightDataPointStorageF* tdps, unsigned char* bytes, unsigned char* dsLengthBytes, unsigned char sameByte) +{ + size_t i, k = 0; + unsigned char intervalsBytes[4]; + unsigned char typeArrayLengthBytes[8]; + unsigned char rTypeLengthBytes[8]; + unsigned char exactLengthBytes[8]; + unsigned char exactMidBytesLength[8]; + unsigned char realPrecisionBytes[8]; + unsigned char reservedValueBytes[4]; + + unsigned char medianValueBytes[4]; + + unsigned char segment_sizeBytes[8]; + unsigned char pwrErrBoundBytes_sizeBytes[4]; + unsigned char max_quant_intervals_Bytes[4]; + + for(i = 0;i<3;i++)//3 + bytes[k++] = versionNumber[i]; + bytes[k++] = sameByte; //1 + + convertSZParamsToBytes(confparams_cpr, &(bytes[k])); + k = k + MetaDataByteLength; + + for(i = 0;iSZ_SIZE_TYPE;i++)//ST + bytes[k++] = dsLengthBytes[i]; + + + intToBytes_bigEndian(max_quant_intervals_Bytes, confparams_cpr->max_quant_intervals); + for(i = 0;i<4;i++)//4 + bytes[k++] = max_quant_intervals_Bytes[i]; + + if(confparams_cpr->errorBoundMode>=PW_REL) + { + bytes[k++] = tdps->radExpo; //1 byte + + sizeToBytes(segment_sizeBytes, confparams_cpr->segment_size); + for(i = 0;iSZ_SIZE_TYPE;i++)//ST + bytes[k++] = segment_sizeBytes[i]; + + intToBytes_bigEndian(pwrErrBoundBytes_sizeBytes, tdps->pwrErrBoundBytes_size); + for(i = 0;i<4;i++)//4 + bytes[k++] = pwrErrBoundBytes_sizeBytes[i]; + } + + intToBytes_bigEndian(intervalsBytes, tdps->intervals); + for(i = 0;i<4;i++)//4 + bytes[k++] = intervalsBytes[i]; + + floatToBytes(medianValueBytes, tdps->medianValue); + for (i = 0; i < 4; i++)// 4 + bytes[k++] = medianValueBytes[i]; + + bytes[k++] = tdps->reqLength; //1 byte + + floatToBytes(realPrecisionBytes, tdps->realPrecision); + for (i = 0; i < 8; i++)// 8 + bytes[k++] = realPrecisionBytes[i]; + + sizeToBytes(typeArrayLengthBytes, tdps->typeArray_size); + for(i = 0;iSZ_SIZE_TYPE;i++)//ST + bytes[k++] = typeArrayLengthBytes[i]; + + sizeToBytes(rTypeLengthBytes, tdps->rtypeArray_size); + for(i = 0;iSZ_SIZE_TYPE;i++)//ST + bytes[k++] = rTypeLengthBytes[i]; + + sizeToBytes(exactLengthBytes, tdps->exactDataNum); + for(i = 0;iSZ_SIZE_TYPE;i++)//ST + bytes[k++] = exactLengthBytes[i]; + + sizeToBytes(exactMidBytesLength, tdps->exactMidBytes_size); + for(i = 0;iSZ_SIZE_TYPE;i++)//ST + bytes[k++] = exactMidBytesLength[i]; + + floatToBytes(reservedValueBytes, tdps->reservedValue); + for (i = 0; i < 4; i++)// 4 + bytes[k++] = reservedValueBytes[i]; + + memcpy(&(bytes[k]), tdps->rtypeArray, tdps->rtypeArray_size); + k += tdps->rtypeArray_size; + + if(confparams_cpr->errorBoundMode>=PW_REL) + { + floatToBytes(exactMidBytesLength, tdps->minLogValue); + for(i=0;i<4;i++) + bytes[k++] = exactMidBytesLength[i]; + } + + memcpy(&(bytes[k]), tdps->typeArray, tdps->typeArray_size); + k += tdps->typeArray_size; + if(confparams_cpr->errorBoundMode>=PW_REL) + { + memcpy(&(bytes[k]), tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size); + k += tdps->pwrErrBoundBytes_size; + } + memcpy(&(bytes[k]), tdps->leadNumArray, tdps->leadNumArray_size); + k += tdps->leadNumArray_size; + memcpy(&(bytes[k]), tdps->exactMidBytes, tdps->exactMidBytes_size); + k += tdps->exactMidBytes_size; + if(tdps->residualMidBits!=NULL) + { + memcpy(&(bytes[k]), tdps->residualMidBits, tdps->residualMidBits_size); + k += tdps->residualMidBits_size; + } +} + +//convert TightDataPointStorageD to bytes... +void convertTDPStoFlatBytes_float(TightDataPointStorageF *tdps, unsigned char** bytes, size_t *size) +{ + size_t i, k = 0; + unsigned char dsLengthBytes[8]; + + if(exe_params->SZ_SIZE_TYPE==4) + intToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//4 + else + longToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//8 + + unsigned char sameByte = tdps->allSameData==1?(unsigned char)1:(unsigned char)0; //0000,0001 + //sameByte = sameByte | (confparams_cpr->szMode << 1); //0000,0110 (no need because of convertSZParamsToBytes + if(tdps->isLossless) + sameByte = (unsigned char) (sameByte | 0x10); // 0001,0000 + if(confparams_cpr->errorBoundMode>=PW_REL) + sameByte = (unsigned char) (sameByte | 0x20); // 0010,0000, the 5th bit + if(exe_params->SZ_SIZE_TYPE==8) + sameByte = (unsigned char) (sameByte | 0x40); // 0100,0000, the 6th bit + if(confparams_cpr->errorBoundMode == PW_REL && confparams_cpr->accelerate_pw_rel_compression) + sameByte = (unsigned char) (sameByte | 0x08); //0000,1000 + if(confparams_cpr->protectValueRange) + sameByte = (unsigned char) (sameByte | 0x04); //0000,0100 + + if(tdps->allSameData==1) + { + size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + tdps->exactMidBytes_size; + *bytes = (unsigned char *)malloc(sizeof(unsigned char)*totalByteLength); + + for (i = 0; i < 3; i++)//3 + (*bytes)[k++] = versionNumber[i]; + (*bytes)[k++] = sameByte; + + convertSZParamsToBytes(confparams_cpr, &((*bytes)[k])); + k = k + MetaDataByteLength; + + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + (*bytes)[k++] = dsLengthBytes[i]; + + for (i = 0; i < tdps->exactMidBytes_size; i++) + (*bytes)[k++] = tdps->exactMidBytes[i]; + + *size = totalByteLength; + } + else if (tdps->rtypeArray == NULL) + { + size_t residualMidBitsLength = tdps->residualMidBits == NULL ? 0 : tdps->residualMidBits_size; + size_t segmentL = 0, radExpoL = 0, pwrBoundArrayL = 0; + int minLogValueSize = 0; + if(confparams_cpr->errorBoundMode>=PW_REL) + { + segmentL = exe_params->SZ_SIZE_TYPE; + radExpoL = 1; + pwrBoundArrayL = 4; + minLogValueSize = 4; + } + + size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrBoundArrayL + 4 + 4 + 1 + 8 + + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + minLogValueSize + + tdps->typeArray_size + tdps->leadNumArray_size + + tdps->exactMidBytes_size + residualMidBitsLength + tdps->pwrErrBoundBytes_size; + if(confparams_cpr->errorBoundMode == PW_REL && confparams_cpr->accelerate_pw_rel_compression) + totalByteLength += (1+1); // for MSST19 + + *bytes = (unsigned char *)malloc(sizeof(unsigned char)*totalByteLength); + + convertTDPStoBytes_float(tdps, *bytes, dsLengthBytes, sameByte); + + *size = totalByteLength; + } + else //the case with reserved value + { + //TODO + } +} + +void convertTDPStoFlatBytes_float_args(TightDataPointStorageF *tdps, unsigned char* bytes, size_t *size) +{ + size_t i, k = 0; + unsigned char dsLengthBytes[8]; + + if(exe_params->SZ_SIZE_TYPE==4) + intToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//4 + else + longToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//8 + + unsigned char sameByte = tdps->allSameData==1?(unsigned char)1:(unsigned char)0; + sameByte = sameByte | (confparams_cpr->szMode << 1); + if(tdps->isLossless) + sameByte = (unsigned char) (sameByte | 0x10); + if(confparams_cpr->errorBoundMode>=PW_REL) + sameByte = (unsigned char) (sameByte | 0x20); // 00100000, the 5th bit + if(exe_params->SZ_SIZE_TYPE==8) + sameByte = (unsigned char) (sameByte | 0x40); // 01000000, the 6th bit + if(confparams_cpr->errorBoundMode == PW_REL && confparams_cpr->accelerate_pw_rel_compression) + sameByte = (unsigned char) (sameByte | 0x08); + + if(tdps->allSameData==1) + { + size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + tdps->exactMidBytes_size; + //*bytes = (unsigned char *)malloc(sizeof(unsigned char)*totalByteLength); + + for (i = 0; i < 3; i++)//3 + bytes[k++] = versionNumber[i]; + bytes[k++] = sameByte; + + convertSZParamsToBytes(confparams_cpr, &(bytes[k])); + k = k + MetaDataByteLength; + + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + bytes[k++] = dsLengthBytes[i]; + for (i = 0; i < tdps->exactMidBytes_size; i++) + bytes[k++] = tdps->exactMidBytes[i]; + + *size = totalByteLength; + } + else if (tdps->rtypeArray == NULL) + { + size_t residualMidBitsLength = tdps->residualMidBits == NULL ? 0 : tdps->residualMidBits_size; + size_t segmentL = 0, radExpoL = 0, pwrBoundArrayL = 0; + if(confparams_cpr->errorBoundMode>=PW_REL) + { + segmentL = exe_params->SZ_SIZE_TYPE; + radExpoL = 1; + pwrBoundArrayL = 4; + } + + size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrBoundArrayL + 4 + 4 + 1 + 8 + + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + + tdps->typeArray_size + tdps->leadNumArray_size + + tdps->exactMidBytes_size + residualMidBitsLength + tdps->pwrErrBoundBytes_size; + if(confparams_cpr->errorBoundMode == PW_REL && confparams_cpr->accelerate_pw_rel_compression) + totalByteLength += (1+1); // for MSST19 + convertTDPStoBytes_float(tdps, bytes, dsLengthBytes, sameByte); + + *size = totalByteLength; + } + else //the case with reserved value + { + //TODO + } +} + +/** + * to free the memory used in the compression + * */ +void free_TightDataPointStorageF(TightDataPointStorageF *tdps) +{ + if(tdps->rtypeArray!=NULL) + free(tdps->rtypeArray); + if(tdps->typeArray!=NULL) + free(tdps->typeArray); + if(tdps->leadNumArray!=NULL) + free(tdps->leadNumArray); + if(tdps->exactMidBytes!=NULL) + free(tdps->exactMidBytes); + if(tdps->residualMidBits!=NULL) + free(tdps->residualMidBits); + if(tdps->pwrErrBoundBytes!=NULL) + free(tdps->pwrErrBoundBytes); + free(tdps); +} + +/** + * to free the memory used in the decompression + * */ +void free_TightDataPointStorageF2(TightDataPointStorageF *tdps) +{ + free(tdps); +} diff --git a/deps/SZ/sz/src/TightDataPointStorageI.c b/deps/SZ/sz/src/TightDataPointStorageI.c new file mode 100644 index 0000000000000000000000000000000000000000..569f57987e226dcc551ff891be1508984adec032 --- /dev/null +++ b/deps/SZ/sz/src/TightDataPointStorageI.c @@ -0,0 +1,463 @@ +/** + * @file TightPointDataStorageI.c + * @author Sheng Di and Dingwen Tao + * @date Aug, 2016 + * @brief The functions used to construct the tightPointDataStorage element for storing compressed bytes. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include +#include "TightDataPointStorageI.h" +#include "sz.h" +#include "Huffman.h" +//#include "rw.h" + +int computeRightShiftBits(int exactByteSize, int dataType) +{ + int rightShift = 0; + switch(dataType) + { + case SZ_INT8: + case SZ_UINT8: + rightShift = 8 - exactByteSize*8; + break; + case SZ_INT16: + case SZ_UINT16: + rightShift = 16 - exactByteSize*8; + break; + case SZ_INT32: + case SZ_UINT32: + rightShift = 32 - exactByteSize*8; + break; + case SZ_INT64: + case SZ_UINT64: + rightShift = 64 - exactByteSize*8; + break; + } + return rightShift; +} + +int convertDataTypeSizeCode(int dataTypeSizeCode) +{ + int result = 0; + switch(dataTypeSizeCode) + { + case 0: + result = 1; + break; + case 1: + result = 2; + break; + case 2: + result = 4; + break; + case 3: + result = 8; + break; + } + return result; +} + +int convertDataTypeSize(int dataTypeSize) +{ + int result = 0; + switch(dataTypeSize) + { + case 1: + result = 0; //0000 + break; + case 2: + result = 4; //0100 + break; + case 4: + result = 8; //1000 + break; + case 8: + result = 12; //1100 + break; + } + return result; +} + +void new_TightDataPointStorageI_Empty(TightDataPointStorageI **this) +{ + *this = (TightDataPointStorageI*)malloc(sizeof(TightDataPointStorageI)); + + (*this)->dataSeriesLength = 0; + (*this)->allSameData = 0; + (*this)->exactDataNum = 0; + (*this)->realPrecision = 0; + (*this)->minValue = 0; + (*this)->exactByteSize = 0; + + (*this)->typeArray = NULL; //its size is dataSeriesLength/4 (or xxx/4+1) + (*this)->typeArray_size = 0; + + (*this)->exactDataBytes = NULL; + (*this)->exactDataBytes_size = 0; + + (*this)->intervals = 0; + (*this)->isLossless = 0; +} + +int new_TightDataPointStorageI_fromFlatBytes(TightDataPointStorageI **this, unsigned char* flatBytes, size_t flatBytesLength) +{ + new_TightDataPointStorageI_Empty(this); + size_t i, index = 0; + char version[3]; + for (i = 0; i < 3; i++) + version[i] = flatBytes[index++]; //3 + unsigned char sameRByte = flatBytes[index++]; //1 + if(checkVersion2(version)!=1) + { + //wrong version + printf("Wrong version: \nCompressed-data version (%d.%d.%d)\n",version[0], version[1], version[2]); + printf("Current sz version: (%d.%d.%d)\n", versionNumber[0], versionNumber[1], versionNumber[2]); + printf("Please double-check if the compressed data (or file) is correct.\n"); + exit(0); + } + int same = sameRByte & 0x01; + //conf_params->szMode = (sameRByte & 0x06)>>1; + int dataByteSizeCode = (sameRByte & 0x0C)>>2; + convertDataTypeSizeCode(dataByteSizeCode); //in bytes + (*this)->isLossless = (sameRByte & 0x10)>>4; + + exe_params->SZ_SIZE_TYPE = ((sameRByte & 0x40)>>6)==1?8:4; + int errorBoundMode = ABS; + + if(confparams_dec==NULL) + { + confparams_dec = (sz_params*)malloc(sizeof(sz_params)); + memset(confparams_dec, 0, sizeof(sz_params)); + } + convertBytesToSZParams(&(flatBytes[index]), confparams_dec); + /*sz_params* params = convertBytesToSZParams(&(flatBytes[index])); + int mode = confparams_dec->szMode; + int losslessCompressor = confparams_dec->losslessCompressor; + if(confparams_dec!=NULL) + free(confparams_dec); + confparams_dec = params; + confparams_dec->szMode = mode; + confparams_dec->losslessCompressor = losslessCompressor;*/ + + index += MetaDataByteLength; //20 + + if(same==0) + (*this)->exactByteSize = flatBytes[index++]; //1 + + unsigned char dsLengthBytes[8]; + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + dsLengthBytes[i] = flatBytes[index++]; + (*this)->dataSeriesLength = bytesToSize(dsLengthBytes);// ST + if((*this)->isLossless==1) + { + //(*this)->exactMidBytes = flatBytes+8; + return errorBoundMode; + } + else if(same==1) + { + (*this)->allSameData = 1; + (*this)->exactDataBytes = &(flatBytes[index]); + return errorBoundMode; + } + else + (*this)->allSameData = 0; + + unsigned char byteBuf[8]; + + for (i = 0; i < 4; i++) + byteBuf[i] = flatBytes[index++]; + int max_quant_intervals = bytesToInt_bigEndian(byteBuf);// 4 + + confparams_dec->maxRangeRadius = max_quant_intervals/2; + + if(errorBoundMode>=PW_REL) + { + printf("Error: errorBoundMode>=PW_REL in new_TightDataPointStorageI_fromFlatBytes!! Wrong...\n"); + exit(0); + } + + for (i = 0; i < 4; i++) + byteBuf[i] = flatBytes[index++]; + (*this)->intervals = bytesToInt_bigEndian(byteBuf);// 4 + + for (i = 0; i < 8; i++) + byteBuf[i] = flatBytes[index++]; + (*this)->minValue = bytesToLong_bigEndian(byteBuf); //8 + + for (i = 0; i < 8; i++) + byteBuf[i] = flatBytes[index++]; + (*this)->realPrecision = bytesToDouble(byteBuf);//8 + + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + byteBuf[i] = flatBytes[index++]; + (*this)->typeArray_size = bytesToSize(byteBuf);// ST + + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + byteBuf[i] = flatBytes[index++]; + (*this)->exactDataNum = bytesToSize(byteBuf);// ST + + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + byteBuf[i] = flatBytes[index++]; + (*this)->exactDataBytes_size = bytesToSize(byteBuf);// ST + + + (*this)->typeArray = &flatBytes[index]; + //retrieve the number of states (i.e., stateNum) + (*this)->allNodes = bytesToInt_bigEndian((*this)->typeArray); //the first 4 bytes store the stateNum + (*this)->stateNum = ((*this)->allNodes+1)/2; + + index+=(*this)->typeArray_size; + + if((*this)->exactDataBytes_size > 0) + { + (*this)->exactDataBytes = &flatBytes[index]; + index+=(*this)->exactDataBytes_size*sizeof(char); + } + else + (*this)->exactDataBytes = NULL; + return errorBoundMode; +} + +/** + * + * type's length == dataSeriesLength + * exactDataBytes's length == exactDataBytes_size + * */ +void new_TightDataPointStorageI(TightDataPointStorageI **this, + size_t dataSeriesLength, size_t exactDataNum, int byteSize, + int* type, unsigned char* exactDataBytes, size_t exactDataBytes_size, + double realPrecision, long minValue, int intervals, int dataType) +{ + //int i = 0; + *this = (TightDataPointStorageI *)malloc(sizeof(TightDataPointStorageI)); + (*this)->allSameData = 0; + (*this)->realPrecision = realPrecision; + (*this)->minValue = minValue; + switch(dataType) + { + case SZ_INT8: + case SZ_UINT8: + (*this)->dataTypeSize = 1; + break; + case SZ_INT16: + case SZ_UINT16: + (*this)->dataTypeSize = 2; + break; + case SZ_INT32: + case SZ_UINT32: + (*this)->dataTypeSize = 4; + break; + case SZ_INT64: + case SZ_UINT64: + (*this)->dataTypeSize = 8; + break; + } + + (*this)->dataSeriesLength = dataSeriesLength; + (*this)->exactDataNum = exactDataNum; + (*this)->exactByteSize = byteSize; + + + int stateNum = 2*intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + encode_withTree(huffmanTree, type, dataSeriesLength, &(*this)->typeArray, &(*this)->typeArray_size); + SZ_ReleaseHuffman(huffmanTree); + + (*this)->exactDataBytes = exactDataBytes; + (*this)->exactDataBytes_size = exactDataBytes_size; + + (*this)->intervals = intervals; + + (*this)->isLossless = 0; +} + +void convertTDPStoBytes_int(TightDataPointStorageI* tdps, unsigned char* bytes, unsigned char sameByte) +{ + size_t i, k = 0; + + unsigned char byteBuffer[8] = {0,0,0,0,0,0,0,0}; + + for(i = 0;i<3;i++)//3 bytes + bytes[k++] = versionNumber[i]; + bytes[k++] = sameByte; //1 byte + + convertSZParamsToBytes(confparams_cpr, &(bytes[k])); + k = k + MetaDataByteLength; + + bytes[k++] = tdps->exactByteSize; //1 byte + + sizeToBytes(byteBuffer, tdps->dataSeriesLength); + for(i = 0;iSZ_SIZE_TYPE;i++)//ST: 4 or 8 bytes + bytes[k++] = byteBuffer[i]; + + intToBytes_bigEndian(byteBuffer, confparams_cpr->max_quant_intervals); + for(i = 0;i<4;i++)//4 + bytes[k++] = byteBuffer[i]; + + intToBytes_bigEndian(byteBuffer, tdps->intervals); + for(i = 0;i<4;i++)//4 + bytes[k++] = byteBuffer[i]; + + longToBytes_bigEndian(byteBuffer, tdps->minValue); + for (i = 0; i < 8; i++)// 8 + bytes[k++] = byteBuffer[i]; + + doubleToBytes(byteBuffer, tdps->realPrecision); + for (i = 0; i < 8; i++)// 8 + bytes[k++] = byteBuffer[i]; + + sizeToBytes(byteBuffer, tdps->typeArray_size); + for(i = 0;iSZ_SIZE_TYPE;i++)//ST + bytes[k++] = byteBuffer[i]; + + sizeToBytes(byteBuffer, tdps->exactDataNum); + for(i = 0;iSZ_SIZE_TYPE;i++)//ST + bytes[k++] = byteBuffer[i]; + + sizeToBytes(byteBuffer, tdps->exactDataBytes_size); + for(i = 0;iSZ_SIZE_TYPE;i++)//ST + bytes[k++] = byteBuffer[i]; + + memcpy(&(bytes[k]), tdps->typeArray, tdps->typeArray_size); + k += tdps->typeArray_size; + + memcpy(&(bytes[k]), tdps->exactDataBytes, tdps->exactDataBytes_size); + k += tdps->exactDataBytes_size; +} + +//convert TightDataPointStorageI to bytes... +void convertTDPStoFlatBytes_int(TightDataPointStorageI *tdps, unsigned char** bytes, size_t *size) +{ + size_t i, k = 0; + unsigned char dsLengthBytes[8]; + + if(exe_params->SZ_SIZE_TYPE==4) + intToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//4 + else + longToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//8 + + unsigned char sameByte = tdps->allSameData==1?(unsigned char)1:(unsigned char)0; + sameByte = sameByte | (confparams_cpr->szMode << 1); + if(tdps->isLossless) + sameByte = (unsigned char) (sameByte | 0x10); + + int dataTypeSizeCode = convertDataTypeSize(tdps->dataTypeSize); + sameByte = (unsigned char) (sameByte | dataTypeSizeCode); + + if(exe_params->SZ_SIZE_TYPE==8) + sameByte = (unsigned char) (sameByte | 0x40); // 01000000, the 6th bit + + if(tdps->allSameData==1) + { + size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + tdps->exactDataBytes_size; + *bytes = (unsigned char *)malloc(sizeof(unsigned char)*totalByteLength); + + for (i = 0; i < 3; i++)//3 + (*bytes)[k++] = versionNumber[i]; + (*bytes)[k++] = sameByte;//1 + + convertSZParamsToBytes(confparams_cpr, &((*bytes)[k])); + k = k + MetaDataByteLength; + + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + (*bytes)[k++] = dsLengthBytes[i]; + + for (i = 0; i < tdps->exactDataBytes_size; i++) + (*bytes)[k++] = tdps->exactDataBytes[i]; + + *size = totalByteLength; + } + else + { + if(confparams_cpr->errorBoundMode>=PW_REL) + { + printf("Error: errorBoundMode >= PW_REL!! can't be...\n"); + exit(0); + } + + size_t totalByteLength = 3 + 1 + MetaDataByteLength + 1 + exe_params->SZ_SIZE_TYPE + 4 + 4 + 8 + 8 + + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + + tdps->typeArray_size + tdps->exactDataBytes_size; + + *bytes = (unsigned char *)malloc(sizeof(unsigned char)*totalByteLength); + + convertTDPStoBytes_int(tdps, *bytes, sameByte); + + *size = totalByteLength; + } +} + +void convertTDPStoFlatBytes_int_args(TightDataPointStorageI *tdps, unsigned char* bytes, size_t *size) +{ + size_t i, k = 0; + unsigned char dsLengthBytes[8]; + + if(exe_params->SZ_SIZE_TYPE==4) + intToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//4 + else + longToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//8 + + unsigned char sameByte = tdps->allSameData==1?(unsigned char)1:(unsigned char)0; + sameByte = sameByte | (confparams_cpr->szMode << 1); + if(tdps->isLossless) + sameByte = (unsigned char) (sameByte | 0x10); + if(exe_params->SZ_SIZE_TYPE==8) + sameByte = (unsigned char) (sameByte | 0x40); // 01000000, the 6th bit + + if(tdps->allSameData==1) + { + size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + tdps->exactDataBytes_size; + //*bytes = (unsigned char *)malloc(sizeof(unsigned char)*totalByteLength); + + for (i = 0; i < 3; i++)//3 + bytes[k++] = versionNumber[i]; + bytes[k++] = sameByte;//1 + + convertSZParamsToBytes(confparams_cpr, &(bytes[k])); + k = k + MetaDataByteLength; + + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)//ST + bytes[k++] = dsLengthBytes[i]; + for (i = 0; i < tdps->exactDataBytes_size; i++) + bytes[k++] = tdps->exactDataBytes[i]; + + *size = totalByteLength; + } + else + { + if(confparams_cpr->errorBoundMode>=PW_REL) + { + printf("Error: errorBoundMode>=PW_REL!! can't be....\n"); + exit(0); + } + + size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + 4 + 4 + 8 + 8 + + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + + tdps->typeArray_size + tdps->exactDataBytes_size; + + convertTDPStoBytes_int(tdps, bytes, sameByte); + + *size = totalByteLength; + } +} + +void free_TightDataPointStorageI(TightDataPointStorageI *tdps) +{ + if(tdps->typeArray!=NULL) + free(tdps->typeArray); + if(tdps->exactDataBytes!=NULL) + free(tdps->exactDataBytes); + free(tdps); +} + +void free_TightDataPointStorageI2(TightDataPointStorageI *tdps) +{ + free(tdps); +} + + diff --git a/deps/SZ/sz/src/TypeManager.c b/deps/SZ/sz/src/TypeManager.c new file mode 100644 index 0000000000000000000000000000000000000000..cf99a170c354906512972c890e1d07435cdbc0d3 --- /dev/null +++ b/deps/SZ/sz/src/TypeManager.c @@ -0,0 +1,503 @@ +/** + * @file TypeManager.c + * @author Sheng Di + * @date May, 2016 + * @brief TypeManager is used to manage the type array: parsing of the bytes and other types in between. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include "DynamicByteArray.h" +#include "sz.h" + +//int convertIntArray2ByteArray_fast_8b() + +size_t convertIntArray2ByteArray_fast_1b(unsigned char* intArray, size_t intArrayLength, unsigned char **result) +{ + size_t byteLength = 0; + size_t i, j; + if(intArrayLength%8==0) + byteLength = intArrayLength/8; + else + byteLength = intArrayLength/8+1; + + if(byteLength>0) + *result = (unsigned char*)malloc(byteLength*sizeof(unsigned char)); + else + *result = NULL; + size_t n = 0; + int tmp, type; + for(i = 0;i byteArrayLength*8) + { + printf("Error: intArrayLength > byteArrayLength*8\n"); + printf("intArrayLength=%zu, byteArrayLength = %zu", intArrayLength, byteArrayLength); + exit(0); + } + if(intArrayLength>0) + *intArray = (unsigned char*)malloc(intArrayLength*sizeof(unsigned char)); + else + *intArray = NULL; + + size_t n = 0, i; + int tmp; + for (i = 0; i < byteArrayLength-1; i++) + { + tmp = byteArray[i]; + (*intArray)[n++] = (tmp & 0x80) >> 7; + (*intArray)[n++] = (tmp & 0x40) >> 6; + (*intArray)[n++] = (tmp & 0x20) >> 5; + (*intArray)[n++] = (tmp & 0x10) >> 4; + (*intArray)[n++] = (tmp & 0x08) >> 3; + (*intArray)[n++] = (tmp & 0x04) >> 2; + (*intArray)[n++] = (tmp & 0x02) >> 1; + (*intArray)[n++] = (tmp & 0x01) >> 0; + } + + tmp = byteArray[i]; + if(n == intArrayLength) + return; + (*intArray)[n++] = (tmp & 0x80) >> 7; + if(n == intArrayLength) + return; + (*intArray)[n++] = (tmp & 0x40) >> 6; + if(n == intArrayLength) + return; + (*intArray)[n++] = (tmp & 0x20) >> 5; + if(n == intArrayLength) + return; + (*intArray)[n++] = (tmp & 0x10) >> 4; + if(n == intArrayLength) + return; + (*intArray)[n++] = (tmp & 0x08) >> 3; + if(n == intArrayLength) + return; + (*intArray)[n++] = (tmp & 0x04) >> 2; + if(n == intArrayLength) + return; + (*intArray)[n++] = (tmp & 0x02) >> 1; + if(n == intArrayLength) + return; + (*intArray)[n++] = (tmp & 0x01) >> 0; +} + +/** + * little endian + * [01|10|11|00|....]-->[01|10|11|00][....] + * @param timeStepType + * @return + */ +size_t convertIntArray2ByteArray_fast_2b(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char **result) +{ + size_t i, j, byteLength = 0; + if(timeStepTypeLength%4==0) + byteLength = timeStepTypeLength*2/8; + else + byteLength = timeStepTypeLength*2/8+1; + if(byteLength>0) + *result = (unsigned char*)malloc(byteLength*sizeof(unsigned char)); + else + *result = NULL; + size_t n = 0; + for(i = 0;i byteArrayLength*4) + { + printf("Error: stepLength > byteArray.length*4\n"); + printf("stepLength=%zu, byteArray.length=%zu\n", stepLength, byteArrayLength); + exit(0); + } + if(stepLength>0) + *intArray = (unsigned char*)malloc(stepLength*sizeof(unsigned char)); + else + *intArray = NULL; + size_t i, n = 0; + + for (i = 0; i < byteArrayLength; i++) { + unsigned char tmp = byteArray[i]; + (*intArray)[n++] = (tmp & 0xC0) >> 6; + if(n==stepLength) + break; + (*intArray)[n++] = (tmp & 0x30) >> 4; + if(n==stepLength) + break; + (*intArray)[n++] = (tmp & 0x0C) >> 2; + if(n==stepLength) + break; + (*intArray)[n++] = tmp & 0x03; + if(n==stepLength) + break; + } +} + +size_t convertIntArray2ByteArray_fast_3b(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char **result) +{ + size_t i = 0, k = 0, byteLength = 0, n = 0; + if(timeStepTypeLength%8==0) + byteLength = timeStepTypeLength*3/8; + else + byteLength = timeStepTypeLength*3/8+1; + + if(byteLength>0) + *result = (unsigned char*)malloc(byteLength*sizeof(unsigned char)); + else + *result = NULL; + int tmp = 0; + for(n = 0;n> 1); + (*result)[i++] = (unsigned char)tmp; + tmp = 0 | (timeStepType[n] << 7); + break; + case 3: + tmp = tmp | (timeStepType[n] << 4); + break; + case 4: + tmp = tmp | (timeStepType[n] << 1); + break; + case 5: + tmp = tmp | (timeStepType[n] >> 2); + (*result)[i++] = (unsigned char)tmp; + tmp = 0 | (timeStepType[n] << 6); + break; + case 6: + tmp = tmp | (timeStepType[n] << 3); + break; + case 7: + tmp = tmp | (timeStepType[n] << 0); + (*result)[i++] = (unsigned char)tmp; + tmp = 0; + break; + } + } + if(k!=7) //load the last one + (*result)[i] = (unsigned char)tmp; + + return byteLength; +} + +void convertByteArray2IntArray_fast_3b(size_t stepLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray) +{ + if(stepLength > byteArrayLength*8/3) + { + printf("Error: stepLength > byteArray.length*8/3, impossible case unless bugs elsewhere.\n"); + printf("stepLength=%zu, byteArray.length=%zu\n", stepLength, byteArrayLength); + exit(0); + } + if(stepLength>0) + *intArray = (unsigned char*)malloc(stepLength*sizeof(unsigned char)); + else + *intArray = NULL; + size_t i = 0, ii = 0, n = 0; + unsigned char tmp = byteArray[i]; + for(n=0;n> 5; + break; + case 1: + (*intArray)[n++] = (tmp & 0x1C) >> 2; + break; + case 2: + ii = (tmp & 0x03) << 1; + i++; + tmp = byteArray[i]; + ii |= (tmp & 0x80) >> 7; + (*intArray)[n++] = ii; + break; + case 3: + (*intArray)[n++] = (tmp & 0x70) >> 4; + break; + case 4: + (*intArray)[n++] = (tmp & 0x0E) >> 1; + break; + case 5: + ii = (tmp & 0x01) << 2; + i++; + tmp = byteArray[i]; + ii |= (tmp & 0xC0) >> 6; + (*intArray)[n++] = ii; + break; + case 6: + (*intArray)[n++] = (tmp & 0x38) >> 3; + break; + case 7: + (*intArray)[n++] = (tmp & 0x07); + i++; + tmp = byteArray[i]; + break; + } + } +} + +inline int getLeftMovingSteps(size_t k, unsigned char resiBitLength) +{ + return 8 - k%8 - resiBitLength; +} + +/** + * + * @param timeStepType is the resiMidBits + * @param resiBitLength is the length of resiMidBits for each element, (the number of resiBitLength == the # of unpredictable elements + * @return + */ +size_t convertIntArray2ByteArray_fast_dynamic(unsigned char* timeStepType, unsigned char resiBitLength, size_t nbEle, unsigned char **bytes) +{ + size_t i = 0, j = 0, k = 0; + int value; + DynamicByteArray* dba; + new_DBA(&dba, 1024); + int tmp = 0, leftMovSteps = 0; + for(j = 0;j> (-leftMovSteps)); + addDBA_Data(dba, (unsigned char)tmp); + tmp = 0 | (value << (8+leftMovSteps)); + } + else if(leftMovSteps > 0) + { + tmp = tmp | (value << leftMovSteps); + } + else //==0 + { + tmp = tmp | value; + addDBA_Data(dba, (unsigned char)tmp); + tmp = 0; + } + i++; + k += resiBitLength; + } + if(leftMovSteps != 0) + addDBA_Data(dba, (unsigned char)tmp); + convertDBAtoBytes(dba, bytes); + size_t size = dba->size; + free_DBA(dba); + return size; +} + +/** + * + * @param timeStepType is the resiMidBits + * @param resiBitLength is the length of resiMidBits for each element, (the number of resiBitLength == the # of unpredictable elements + * @return + */ +size_t convertIntArray2ByteArray_fast_dynamic2(unsigned char* timeStepType, unsigned char* resiBitLength, size_t resiBitLengthLength, unsigned char **bytes) +{ + size_t i = 0, j = 0, k = 0; + int value; + DynamicByteArray* dba; + new_DBA(&dba, 1024); + int tmp = 0, leftMovSteps = 0; + for(j = 0;j> (-leftMovSteps)); + addDBA_Data(dba, (unsigned char)tmp); + tmp = 0 | (value << (8+leftMovSteps)); + } + else if(leftMovSteps > 0) + { + tmp = tmp | (value << leftMovSteps); + } + else //==0 + { + tmp = tmp | value; + addDBA_Data(dba, (unsigned char)tmp); + tmp = 0; + } + i++; + k += rbl; + } + if(leftMovSteps != 0) + addDBA_Data(dba, (unsigned char)tmp); + convertDBAtoBytes(dba, bytes); + size_t size = dba->size; + free_DBA(dba); + return size; +} + +int computeBitNumRequired(size_t dataLength) +{ + if(exe_params->SZ_SIZE_TYPE==4) + return 32 - numberOfLeadingZeros_Int(dataLength); + else + return 64 - numberOfLeadingZeros_Long(dataLength); + +} + +void decompressBitArraybySimpleLZ77(int** result, unsigned char* bytes, size_t bytesLength, size_t totalLength, int validLength) +{ + size_t pairLength = (bytesLength*8)/(validLength+1); + size_t tmpLength = pairLength*2; + int tmpResult[tmpLength]; + size_t i, j, k = 0; + for(i = 0;i> (8-1-innerIndex)) & 0x01; + k++; + + int numResult = extractBytes(bytes, k, validLength); + + tmpResult[i+1] = numResult; + k = k + validLength; + } + + *result = (int*)malloc(sizeof(int)*totalLength); + k = 0; + for(i = 0;i +#include +#include +#include "VarSet.h" +#include "sz.h" + +void free_Variable_keepOriginalData(SZ_Variable* v) +{ + if(v->varName!=NULL) + free(v->varName); + if(v->compressedBytes!=NULL) + free(v->compressedBytes); + if(v->multisteps!=NULL) + free_multisteps(v->multisteps); + free(v); +} + +/** + * + * @deprecated + * */ +void free_Variable_keepCompressedBytes(SZ_Variable* v) +{ + if(v->varName!=NULL) + free(v->varName); + if(v->data!=NULL) + free(v->data); + if(v->multisteps!=NULL) + free_multisteps(v->multisteps); + free(v); +} + +void free_Variable_all(SZ_Variable* v) +{ + if(v->varName!=NULL) + free(v->varName); + if(v->data!=NULL) + free(v->data); + if(v->compressedBytes!=NULL) + free(v->compressedBytes); + if(v->multisteps!=NULL) + free_multisteps(v->multisteps); + free(v); +} + +void SZ_batchAddVar(int var_id, char* varName, int dataType, void* data, + int errBoundMode, double absErrBound, double relBoundRatio, double pwRelBoundRatio, + size_t r5, size_t r4, size_t r3, size_t r2, size_t r1) +{ + if(sz_varset==NULL) + { + sz_varset = (SZ_VarSet*)malloc(sizeof(SZ_VarSet)); + sz_varset->header = (SZ_Variable*)malloc(sizeof(SZ_Variable)); + sz_varset->header->next = NULL; + sz_varset->lastVar = sz_varset->header; + sz_varset->count = 0; + } + + SZ_Variable* var = (SZ_Variable*)malloc(sizeof(SZ_Variable)); + memset(var, 0, sizeof(SZ_Variable)); + var->var_id = var_id; + var->varName = (char*)malloc(strlen(varName)+1); + memcpy(var->varName, varName, strlen(varName)+1); + //var->varName = varName; + var->dataType = dataType; + var->r5 = r5; + var->r4 = r4; + var->r3 = r3; + var->r2 = r2; + var->r1 = r1; + var->errBoundMode = errBoundMode; + var->absErrBound = absErrBound; + var->relBoundRatio = relBoundRatio; + var->pwRelBoundRatio = pwRelBoundRatio; + var->data = data; + + var->multisteps = (sz_multisteps*)malloc(sizeof(sz_multisteps)); + memset(var->multisteps, 0, sizeof(sz_multisteps)); + + size_t dataLen = computeDataLength(r5, r4, r3, r2, r1); + if(dataType==SZ_FLOAT) + { + var->multisteps->hist_data = (float*)malloc(sizeof(float)*dataLen); + memset(var->multisteps->hist_data, 0, sizeof(float)*dataLen); + } + else if(dataType==SZ_DOUBLE) + { + var->multisteps->hist_data = (double*)malloc(sizeof(double)*dataLen); + memset(var->multisteps->hist_data, 0, sizeof(double)*dataLen); + } + var->compressedBytes = NULL; + var->next = NULL; + + sz_varset->count ++; + sz_varset->lastVar->next = var; + sz_varset->lastVar = var; +} + +int SZ_batchDelVar_ID(int var_id) +{ + int state = SZ_batchDelVar_ID_vset(sz_varset, var_id); + return state; +} + +int SZ_batchDelVar(char* varName) +{ + int state = SZ_batchDelVar_vset(sz_varset, varName); + return state; +} + +int SZ_batchDelVar_ID_vset(SZ_VarSet* vset, int var_id) +{ + int delSuccess = SZ_NSCS; + SZ_Variable* p = vset->header; + SZ_Variable* q = p->next; + while(q != NULL) + { + if(q->var_id == var_id) + { + p->next = q->next; + //free_Variable_all(q); + free_Variable_keepOriginalData(q); + vset->count --; + delSuccess = SZ_SCES; + if(q->next==NULL) //means that q is the last variable + vset->lastVar = p; + break; + } + + p = p->next; + q = q->next; + } + + return delSuccess; +} + +int SZ_batchDelVar_vset(SZ_VarSet* vset, char* varName) +{ + int delSuccess = SZ_NSCS; + SZ_Variable* p = vset->header; + SZ_Variable* q = p->next; + while(q != NULL) + { + int cmpResult = strcmp(q->varName, varName); + if(cmpResult==0) + { + p->next = q->next; + //free_Variable_all(q); + free_Variable_keepOriginalData(q); + vset->count --; + delSuccess = SZ_SCES; + break; + } + p = p->next; + q = q->next; + } + + return delSuccess; +} + +SZ_Variable* SZ_searchVar(char* varName) +{ + SZ_Variable* p = sz_varset->header->next; + while(p!=NULL) + { + int checkName = strcmp(p->varName, varName); + if(checkName==0) + return p; + p = p->next; + } + return NULL; +} + +void* SZ_getVarData(char* varName, size_t *r5, size_t *r4, size_t *r3, size_t *r2, size_t *r1) +{ + SZ_Variable* v = SZ_searchVar(varName); + *r5 = v->r5; + *r4 = v->r4; + *r3 = v->r3; + *r2 = v->r2; + *r1 = v->r1; + return (void*)v->data; +} + +/** + * + * int mode: SZ_MAINTAIN_VAR_DATA, Z_DESTROY_WHOLE_VARSET + * */ +void SZ_freeVarSet(int mode) +{ + free_VarSet_vset(sz_varset, mode); +} + +//free_VarSet will completely destroy the SZ_VarSet, so don't do it until you really don't need it any more! +/** + * + * int mode: SZ_MAINTAIN_VAR_DATA, Z_DESTROY_WHOLE_VARSET + * */ +void free_VarSet_vset(SZ_VarSet *vset, int mode) +{ + if(vset==NULL) + return; + SZ_Variable *p = vset->header; + while(p->next!=NULL) + { + SZ_Variable *q = p->next; + p->next = q->next; + if(mode==SZ_MAINTAIN_VAR_DATA) + free_Variable_keepOriginalData(q); + else if(mode==SZ_DESTROY_WHOLE_VARSET) + free_Variable_all(q); + } + free(sz_varset->header); + free(vset); +} + +void free_multisteps(sz_multisteps* multisteps) +{ + if(multisteps->hist_data!=NULL) + free(multisteps->hist_data); + free(multisteps); +} + +inline int checkVarID(unsigned char cur_var_id, unsigned char* var_ids, int var_count) +{ + int j = 0; + for(j=0;jheader->next; + while(p!=NULL) + { + if(var_id == p->var_id) + return p; + p = p->next; + } + return NULL; +} diff --git a/deps/SZ/sz/src/callZlib.c b/deps/SZ/sz/src/callZlib.c new file mode 100644 index 0000000000000000000000000000000000000000..4e4bb6f2729e401d7779bd62c5ee5601775992fc --- /dev/null +++ b/deps/SZ/sz/src/callZlib.c @@ -0,0 +1,527 @@ +/** + * @file callZlib.c + * @author Sheng Di + * @date June, 2016 + * @brief gzip compressor code: the interface to call zlib + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + + +#include +#include +#include +#include + +#if MAX_MEM_LEVEL >= 8 +#define DEF_MEM_LEVEL 8 +#else +#define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif + + +#define CHECK_ERR(err, msg) { \ + if (err != Z_OK && err != Z_STREAM_END) { \ + fprintf(stderr, "%s error: %d\n", msg, err); \ + return SZ_NSCS; \ + } \ +} + +int isZlibFormat(unsigned char magic1, unsigned char magic2) +{ + if(magic1==104&&magic2==5) //DC+BS + return 1; + if(magic1==104&&magic2==129) //DC+DC + return 1; + if(magic1==104&&magic2==222) //DC+BC + return 1; + if(magic1==120&&magic2==1) //BC+BS + return 1; + if(magic1==120&&magic2==94) //BC+? + return 1; + if(magic1==120&&magic2==156) //BC+DC + return 1; + if(magic1==120&&magic2==218) //BC+BS + return 1; + return 0; +} + +/*zlib_compress() is only valid for median-size data compression. */ +unsigned long zlib_compress(unsigned char* data, unsigned long dataLength, unsigned char** compressBytes, int level) +{ + z_stream stream = {0}; + + stream.next_in = data; + stream.avail_in = dataLength; +#ifdef MAXSEG_64K + /* Check for source > 64K on 16-bit machine: */ + if ((uLong)stream.avail_in != dataLength) return Z_BUF_ERROR; +#endif + + uLong estCmpLen = deflateBound(&stream, dataLength); + unsigned long outSize = estCmpLen; + + *compressBytes = (unsigned char*)malloc(sizeof(unsigned char)*estCmpLen); + int err = compress2(*compressBytes, &outSize, data, dataLength, level); + if(err!=Z_OK) + { + printf("Error: err_code=%d; the reason may be your data size is too large (>=2^32), which cannot be compressed by standalone zlib_compress. Sol: inflace_init, ....\n", err); + exit(0); + } + return outSize; +} + +unsigned long zlib_compress2(unsigned char* data, unsigned long dataLength, unsigned char** compressBytes, int level) +{ + unsigned long outSize; + + z_stream stream = {0}; + int err; + + stream.next_in = data; + stream.avail_in = dataLength; +#ifdef MAXSEG_64K + /* Check for source > 64K on 16-bit machine: */ + if ((uLong)stream.avail_in != dataLength) return Z_BUF_ERROR; +#endif + + uLong estCmpLen = deflateBound(&stream, dataLength); + *compressBytes = (unsigned char*)malloc(sizeof(unsigned char)*estCmpLen); + + stream.next_out = *compressBytes; + stream.avail_out = estCmpLen; + //stream.avail_out = dataLength*10; + //if ((uLong)stream.avail_out != dataLength*10) return Z_BUF_ERROR; + + stream.zalloc = (alloc_func)0; + stream.zfree = (free_func)0; + stream.opaque = (voidpf)0; +// stream.data_type = Z_TEXT; + + //err = deflateInit(&stream, level); //default windowBits == 15. + int windowBits = 14; //8-15 + if(confparams_cpr->szMode==SZ_BEST_COMPRESSION) + windowBits = 15; + + err = deflateInit2(&stream, level, Z_DEFLATED, windowBits, DEF_MEM_LEVEL, + Z_DEFAULT_STRATEGY);//Z_FIXED); //Z_DEFAULT_STRATEGY + if (err != Z_OK) return err; + + err = deflate(&stream, Z_FINISH); + if (err != Z_STREAM_END) { + deflateEnd(&stream); + return err == Z_OK ? Z_BUF_ERROR : err; + } + + err = deflateEnd(&stream); + + outSize = stream.total_out; + return outSize; +} + +unsigned long zlib_compress3(unsigned char* data, unsigned long dataLength, unsigned char* compressBytes, int level) +{ + unsigned long outSize = 0; + + z_stream stream = {0}; + int err; + + stream.next_in = data; + stream.avail_in = dataLength; +#ifdef MAXSEG_64K + /* Check for source > 64K on 16-bit machine: */ + if ((uLong)stream.avail_in != dataLength) return Z_BUF_ERROR; +#endif + + stream.next_out = compressBytes; + stream.avail_out = dataLength; + stream.zalloc = (alloc_func)0; + stream.zfree = (free_func)0; + stream.opaque = (voidpf)0; + + //err = deflateInit(&stream, level); //default windowBits == 15. + int windowBits = 14; //8-15 + if(confparams_cpr->szMode==SZ_BEST_COMPRESSION) + windowBits = 15; + + err = deflateInit2(&stream, level, Z_DEFLATED, windowBits, DEF_MEM_LEVEL, + Z_DEFAULT_STRATEGY);//Z_FIXED); //Z_DEFAULT_STRATEGY + if (err != Z_OK) return err; + + err = deflate(&stream, Z_FINISH); + if (err != Z_STREAM_END) { + deflateEnd(&stream); + return err == Z_OK ? Z_BUF_ERROR : err; + } + + err = deflateEnd(&stream); + + outSize = stream.total_out; + return outSize; +} + +unsigned long zlib_compress4(unsigned char* data, unsigned long dataLength, unsigned char** compressBytes, int level) +{ + z_stream c_stream = {0}; /* compression stream */ + int err = 0; + + c_stream.zalloc = (alloc_func)0; + c_stream.zfree = (free_func)0; + c_stream.opaque = (voidpf)0; + + int windowBits = 14; //8-15 + if(confparams_cpr->szMode==SZ_BEST_COMPRESSION) + windowBits = 15; + + err = deflateInit2(&c_stream, level, Z_DEFLATED, windowBits, DEF_MEM_LEVEL, + Z_DEFAULT_STRATEGY);//Z_FIXED); //Z_DEFAULT_STRATEGY + CHECK_ERR(err, "deflateInit"); + + uLong estCmpLen = deflateBound(&c_stream, dataLength); + *compressBytes = (unsigned char*)malloc(sizeof(unsigned char)*estCmpLen); + + c_stream.next_in = data; + c_stream.next_out = *compressBytes; + + while (c_stream.total_in < dataLength && c_stream.total_out < estCmpLen) { + c_stream.avail_in = c_stream.avail_out = SZ_ZLIB_BUFFER_SIZE; /* force small buffers */ + err = deflate(&c_stream, Z_NO_FLUSH); + CHECK_ERR(err, "deflate"); + } + /* Finish the stream, still forcing small buffers: */ + for (;;) { + c_stream.avail_out = 1; + err = deflate(&c_stream, Z_FINISH); + if (err == Z_STREAM_END) break; + CHECK_ERR(err, "deflate"); + } + + err = deflateEnd(&c_stream); + CHECK_ERR(err, "deflateEnd"); + + return c_stream.total_out; +} + +unsigned long zlib_compress5(unsigned char* data, unsigned long dataLength, unsigned char** compressBytes, int level) +{ + int ret, flush; + unsigned have; + z_stream strm; + unsigned char* in = data; + + /* allocate deflate state */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + ret = deflateInit(&strm, level); + //int windowBits = 15; + //ret = deflateInit2(&strm, level, Z_DEFLATED, windowBits, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);//Z_FIXED); //Z_DEFAULT_STRATEGY + + if (ret != Z_OK) + return ret; + + size_t p_size = 0, av_in = 0; + uLong estCmpLen = deflateBound(&strm, dataLength); + *compressBytes = (unsigned char*)malloc(sizeof(unsigned char)*estCmpLen); + unsigned char* out = *compressBytes; + + /* compress until end of file */ + do { + p_size += SZ_ZLIB_BUFFER_SIZE; + if(p_size>=dataLength) + { + av_in = dataLength - (p_size - SZ_ZLIB_BUFFER_SIZE); + flush = Z_FINISH; + } + else + { + av_in = SZ_ZLIB_BUFFER_SIZE; + flush = Z_NO_FLUSH; + } + strm.avail_in = av_in; + strm.next_in = in; + + /* run deflate() on input until output buffer not full, finish + compression if all of source has been read in */ + do { + strm.avail_out = SZ_ZLIB_BUFFER_SIZE; + strm.next_out = out; + ret = deflate(&strm, flush); /* no bad return value */ + + have = SZ_ZLIB_BUFFER_SIZE - strm.avail_out; + out += have; + } while (strm.avail_out == 0); + + in+=av_in; + + /* done when last data in file processed */ + } while (flush != Z_FINISH); + + /* clean up and return */ + (void)deflateEnd(&strm); + + return strm.total_out; +} + +unsigned long zlib_uncompress(unsigned char* compressBytes, unsigned long cmpSize, unsigned char** oriData, unsigned long targetOriSize) +{ + unsigned long outSize = targetOriSize; + *oriData = (unsigned char*)malloc(sizeof(unsigned char)*targetOriSize); + int status = uncompress(*oriData, &outSize, compressBytes, cmpSize); + if(status!=Z_OK) + { + printf("Error: Zlib decompression error; status=%d\n", status); + exit(0); + } + + return outSize; +} + +unsigned long zlib_uncompress2 (unsigned char* compressBytes, unsigned long cmpSize, unsigned char** oriData, unsigned long targetOriSize) +{ + z_stream stream = {0}; + + unsigned long outSize; + *oriData = (unsigned char*)malloc(sizeof(unsigned char)*targetOriSize); + + stream.zalloc = Z_NULL; + stream.zfree = Z_NULL; + stream.opaque = Z_NULL; +// stream.data_type = Z_TEXT; + + stream.next_in = compressBytes; + stream.avail_in = cmpSize; + /* Check for source > 64K on 16-bit machine: */ + if ((unsigned long)stream.avail_in != cmpSize) + { + printf("Error: zlib_uncompress2: stream.avail_in != cmpSize"); + //exit(1); + return SZ_NSCS; //-1 + } + + stream.next_out = *oriData; + stream.avail_out = targetOriSize; + //if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR; + + int err = inflateInit(&stream); + //int windowBits = 15; + //int err = inflateInit2(&stream, windowBits); + if (err != Z_OK) + { + printf("Error: zlib_uncompress2: err != Z_OK\n"); + return SZ_NSCS; + } + + err = inflate(&stream, Z_FINISH); + if (err != Z_STREAM_END) { + inflateEnd(&stream); + if (err == Z_NEED_DICT || (err == Z_BUF_ERROR && stream.avail_in == 0)) + return Z_DATA_ERROR; + return err; + } + outSize = stream.total_out; + inflateEnd(&stream); + return outSize; +} + +unsigned long zlib_uncompress3(unsigned char* compressBytes, unsigned long cmpSize, unsigned char** oriData, unsigned long targetOriSize) +{ + int status; + z_stream z_strm; /* decompression stream */ + + size_t nalloc = 65536*4; + + *oriData = (unsigned char*)malloc(sizeof(unsigned char)*targetOriSize); + memset(&z_strm, 0, sizeof(z_strm)); + + + /*d_stream.zalloc = (alloc_func)0; + d_stream.zfree = (free_func)0; + d_stream.opaque = (voidpf)0;*/ + + z_strm.next_in = compressBytes; + z_strm.avail_in = 0; + z_strm.next_out = *oriData; + z_strm.avail_out = targetOriSize; + + status = inflateInit(&z_strm); + CHECK_ERR(status, "inflateInit"); + + do{ + z_strm.avail_in = z_strm.avail_out = SZ_ZLIB_BUFFER_SIZE; /* force small buffers */ + /* Uncompress some data */ + status = inflate(&z_strm, Z_SYNC_FLUSH); + + /* Check if we are done uncompressing data */ + if (Z_STREAM_END==status) + break; /*done*/ + + if (Z_OK!=status) { + (void)inflateEnd(&z_strm); + printf("Error: inflate() failed\n"); + exit(0); + } + else + { + /* If we're not done and just ran out of buffer space, get more */ + if(0 == z_strm.avail_out) { + void *new_outbuf; /* Pointer to new output buffer */ + + /* Allocate a buffer twice as big */ + nalloc *= 2; + if(NULL == (new_outbuf = realloc(*oriData, nalloc))) { + (void)inflateEnd(&z_strm); + printf("Error: memory allocation failed for deflate uncompression\n"); + exit(0); + } /* end if */ + *oriData = new_outbuf; + + /* Update pointers to buffer for next set of uncompressed data */ + z_strm.next_out = (*oriData) + z_strm.total_out; + z_strm.avail_out = (uInt)(nalloc - z_strm.total_out); + } /* end if */ + } /* end else*/ + }while(status==Z_OK); + + status = inflateEnd(&z_strm); + CHECK_ERR(status, "inflateEnd"); + + return z_strm.total_out; +} + +unsigned long zlib_uncompress4(unsigned char* compressBytes, unsigned long cmpSize, unsigned char** oriData, unsigned long targetOriSize) +{ + int ret; + unsigned int have; + z_stream strm; + unsigned char *in = compressBytes; + unsigned char *out; + + *oriData = (unsigned char*)malloc(sizeof(unsigned char)*targetOriSize); + out = *oriData; + + /* allocate inflate state */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit(&strm); + if (ret != Z_OK) + { + return ret; + } + + size_t p_size = 0, av_in = 0; + /* decompress until deflate stream ends or end of file */ + do { + p_size += SZ_ZLIB_BUFFER_SIZE; + if(p_size>cmpSize) + av_in = cmpSize - (p_size - SZ_ZLIB_BUFFER_SIZE); + else + av_in = SZ_ZLIB_BUFFER_SIZE; + strm.avail_in = av_in; + + if (strm.avail_in == 0) + break; + strm.next_in = in; + + /* run inflate() on input until output buffer not full */ + do { + strm.avail_out = SZ_ZLIB_BUFFER_SIZE; + strm.next_out = out; + ret = inflate(&strm, Z_NO_FLUSH); + //assert(ret != Z_STREAM_ERROR); /* state not clobbered */ + switch (ret) { + case Z_NEED_DICT: + ret = Z_DATA_ERROR; /* and fall through */ + case Z_DATA_ERROR: + case Z_MEM_ERROR: + (void)inflateEnd(&strm); + return ret; + } + have = SZ_ZLIB_BUFFER_SIZE - strm.avail_out; + + out += have; + + } while (strm.avail_out == 0); + + in+=av_in; + /* done when inflate() says it's done */ + } while (ret != Z_STREAM_END); + + /* clean up and return */ + (void)inflateEnd(&strm); + + return strm.total_out; +} + +unsigned long zlib_uncompress65536bytes(unsigned char* compressBytes, unsigned long cmpSize, unsigned char** oriData) +{ + int err; + unsigned long targetOriSize = 65536; + z_stream d_stream = {0}; /* decompression stream */ + + *oriData = (unsigned char*)malloc(sizeof(unsigned char)*targetOriSize); + + d_stream.zalloc = (alloc_func)0; + d_stream.zfree = (free_func)0; + d_stream.opaque = (voidpf)0; + + d_stream.next_in = compressBytes; + d_stream.avail_in = 0; + d_stream.next_out = *oriData; + + err = inflateInit(&d_stream); + CHECK_ERR(err, "inflateInit"); + + while (d_stream.total_out < targetOriSize && d_stream.total_in < cmpSize) { + d_stream.avail_in = d_stream.avail_out = SZ_ZLIB_BUFFER_SIZE; /* force small buffers */ + //err = inflate(&d_stream, Z_NO_FLUSH); + err = inflate(&d_stream, Z_SYNC_FLUSH); + if (err == Z_STREAM_END) break; + if(err<0) + break; + } + + if(err<0) + return d_stream.total_out; + err = inflateEnd(&d_stream); + + CHECK_ERR(err, "inflateEnd"); + + return d_stream.total_out; +} + +unsigned long zlib_uncompress5(unsigned char* compressBytes, unsigned long cmpSize, unsigned char** oriData, unsigned long targetOriSize) +{ + int err; + z_stream d_stream = {0}; /* decompression stream */ + + *oriData = (unsigned char*)malloc(sizeof(unsigned char)*targetOriSize); + + d_stream.zalloc = (alloc_func)0; + d_stream.zfree = (free_func)0; + d_stream.opaque = (voidpf)0; + + d_stream.next_in = compressBytes; + d_stream.avail_in = 0; + d_stream.next_out = *oriData; + + err = inflateInit(&d_stream); + CHECK_ERR(err, "inflateInit"); + + while (d_stream.total_out < targetOriSize && d_stream.total_in < cmpSize) { + d_stream.avail_in = d_stream.avail_out = SZ_ZLIB_BUFFER_SIZE; /* force small buffers */ + //err = inflate(&d_stream, Z_NO_FLUSH); + err = inflate(&d_stream, Z_SYNC_FLUSH); + if (err == Z_STREAM_END) break; + CHECK_ERR(err, "inflate"); + } + + err = inflateEnd(&d_stream); + + CHECK_ERR(err, "inflateEnd"); + + return d_stream.total_out; +} diff --git a/deps/SZ/sz/src/conf.c b/deps/SZ/sz/src/conf.c new file mode 100644 index 0000000000000000000000000000000000000000..02198dbd6ce573fbb8759b7f200772cf7ca2a050 --- /dev/null +++ b/deps/SZ/sz/src/conf.c @@ -0,0 +1,459 @@ +/** + * @file conf.c + * @author Sheng Di (sdi1@anl.gov or disheng222@gmail.com) + * @date 2015. + * @brief Configuration loading functions for the SZ library. + * (C) 2015 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include "string.h" +#include "sz.h" +#include "iniparser.h" +#include "Huffman.h" +#include "pastri.h" + +/*-------------------------------------------------------------------------*/ +/** + @brief It reads the configuration given in the configuration file. + @return integer 1 if successfull. + + This function reads the configuration given in the SZ configuration + file and sets other required parameters. + + **/ + +/*struct node_t *pool; +node *qqq; +node *qq; +int n_nodes = 0, qend; +unsigned long **code; +unsigned char *cout; +int n_inode;*/ + +unsigned int roundUpToPowerOf2(unsigned int base) +{ + base -= 1; + + base = base | (base >> 1); + base = base | (base >> 2); + base = base | (base >> 4); + base = base | (base >> 8); + base = base | (base >> 16); + + return base + 1; +} + +void updateQuantizationInfo(int quant_intervals) +{ + exe_params->intvCapacity = quant_intervals; + exe_params->intvRadius = quant_intervals/2; +} + +double computeABSErrBoundFromPSNR(double psnr, double threshold, double value_range) +{ + double v1 = psnr + 10 * log10(1-2.0/3.0*threshold); + double v2 = v1/(-20); + double v3 = pow(10, v2); + return value_range * v3; +} + +double computeABSErrBoundFromNORM_ERR(double normErr, size_t nbEle) +{ + return sqrt(3.0/nbEle)*normErr; +} + + +/*-------------------------------------------------------------------------*/ +/** + * + * + * @return the status of loading conf. file: 1 (success) or 0 (error code); + * */ +int SZ_ReadConf(const char* sz_cfgFile) { + // Check access to SZ configuration file and load dictionary + //record the setting in confparams_cpr + confparams_cpr = (sz_params*)malloc(sizeof(sz_params)); + exe_params = (sz_exedata*)malloc(sizeof(sz_exedata)); + + int x = 1; + char sol_name[256]; + char *modeBuf; + char *errBoundMode; + char *endianTypeString; + dictionary *ini; + char *par; + + char *y = (char*)&x; + + if(*y==1) + sysEndianType = LITTLE_ENDIAN_SYSTEM; + else //=0 + sysEndianType = BIG_ENDIAN_SYSTEM; + + confparams_cpr->plus_bits = 3; + + if(sz_cfgFile == NULL) + { + dataEndianType = LITTLE_ENDIAN_DATA; + confparams_cpr->sol_ID = SZ; + confparams_cpr->max_quant_intervals = 65536; + confparams_cpr->maxRangeRadius = confparams_cpr->max_quant_intervals/2; + + exe_params->intvCapacity = confparams_cpr->maxRangeRadius*2; + exe_params->intvRadius = confparams_cpr->maxRangeRadius; + + confparams_cpr->quantization_intervals = 0; + exe_params->optQuantMode = 1; + confparams_cpr->predThreshold = 0.99; + confparams_cpr->sampleDistance = 100; + + confparams_cpr->szMode = SZ_BEST_COMPRESSION; + confparams_cpr->losslessCompressor = ZSTD_COMPRESSOR; //other option: GZIP_COMPRESSOR; + if(confparams_cpr->losslessCompressor==ZSTD_COMPRESSOR) + confparams_cpr->gzipMode = 3; //fast mode + else + confparams_cpr->gzipMode = 1; //high speed mode + + confparams_cpr->errorBoundMode = PSNR; + confparams_cpr->psnr = 90; + confparams_cpr->absErrBound = 1E-4; + confparams_cpr->relBoundRatio = 1E-4; + confparams_cpr->accelerate_pw_rel_compression = 1; + + confparams_cpr->pw_relBoundRatio = 1E-3; + confparams_cpr->segment_size = 36; + + confparams_cpr->pwr_type = SZ_PWR_MIN_TYPE; + + confparams_cpr->snapshotCmprStep = 5; + + confparams_cpr->withRegression = SZ_WITH_LINEAR_REGRESSION; + + confparams_cpr->randomAccess = 0; //0: no random access , 1: support random access + + confparams_cpr->protectValueRange = 0; + + return SZ_SCES; + } + + if (access(sz_cfgFile, F_OK) != 0) + { + printf("[SZ] Configuration file NOT accessible.\n"); + return SZ_NSCS; + } + + //printf("[SZ] Reading SZ configuration file (%s) ...\n", sz_cfgFile); + ini = iniparser_load(sz_cfgFile); + if (ini == NULL) + { + printf("[SZ] Iniparser failed to parse the conf. file.\n"); + return SZ_NSCS; + } + + endianTypeString = iniparser_getstring(ini, "ENV:dataEndianType", "LITTLE_ENDIAN_DATA"); + if(strcmp(endianTypeString, "LITTLE_ENDIAN_DATA")==0) + dataEndianType = LITTLE_ENDIAN_DATA; + else if(strcmp(endianTypeString, "BIG_ENDIAN_DATA")==0) + dataEndianType = BIG_ENDIAN_DATA; + else + { + printf("Error: Wrong dataEndianType: please set it correctly in sz.config.\n"); + iniparser_freedict(ini); + return SZ_NSCS; + } + + // Reading/setting detection parameters + + par = iniparser_getstring(ini, "ENV:sol_name", NULL); + snprintf(sol_name, 256, "%s", par); + + if(strcmp(sol_name, "SZ")==0) + confparams_cpr->sol_ID = SZ; + else if(strcmp(sol_name, "PASTRI")==0) + confparams_cpr->sol_ID = PASTRI; + else if(strcmp(sol_name, "SZ_Transpose")==0) + confparams_cpr->sol_ID = SZ_Transpose; + else{ + printf("[SZ] Error: wrong solution name (please check sz.config file), sol=%s\n", sol_name); + iniparser_freedict(ini); + return SZ_NSCS; + } + + if(confparams_cpr->sol_ID==SZ || confparams_cpr->sol_ID==SZ_Transpose) + { + int max_quant_intervals = iniparser_getint(ini, "PARAMETER:max_quant_intervals", 65536); + confparams_cpr->max_quant_intervals = max_quant_intervals; + + int quantization_intervals = (int)iniparser_getint(ini, "PARAMETER:quantization_intervals", 0); + confparams_cpr->quantization_intervals = quantization_intervals; + if(quantization_intervals>0) + { + updateQuantizationInfo(quantization_intervals); + confparams_cpr->max_quant_intervals = max_quant_intervals = quantization_intervals; + exe_params->optQuantMode = 0; + } + else //==0 + { + confparams_cpr->maxRangeRadius = max_quant_intervals/2; + + exe_params->intvCapacity = confparams_cpr->maxRangeRadius*2; + exe_params->intvRadius = confparams_cpr->maxRangeRadius; + + exe_params->optQuantMode = 1; + } + + if(quantization_intervals%2!=0) + { + printf("Error: quantization_intervals must be an even number!\n"); + iniparser_freedict(ini); + return SZ_NSCS; + } + + confparams_cpr->predThreshold = (float)iniparser_getdouble(ini, "PARAMETER:predThreshold", 0); + confparams_cpr->sampleDistance = (int)iniparser_getint(ini, "PARAMETER:sampleDistance", 0); + + modeBuf = iniparser_getstring(ini, "PARAMETER:szMode", NULL); + if(modeBuf==NULL) + { + printf("[SZ] Error: Null szMode setting (please check sz.config file)\n"); + iniparser_freedict(ini); + return SZ_NSCS; + } + else if(strcmp(modeBuf, "SZ_BEST_SPEED")==0) + confparams_cpr->szMode = SZ_BEST_SPEED; + else if(strcmp(modeBuf, "SZ_DEFAULT_COMPRESSION")==0) + confparams_cpr->szMode = SZ_DEFAULT_COMPRESSION; + else if(strcmp(modeBuf, "SZ_BEST_COMPRESSION")==0) + confparams_cpr->szMode = SZ_BEST_COMPRESSION; + else + { + printf("[SZ] Error: Wrong szMode setting (please check sz.config file)\n"); + iniparser_freedict(ini); + return SZ_NSCS; + } + + modeBuf = iniparser_getstring(ini, "PARAMETER:losslessCompressor", "ZSTD_COMPRESSOR"); + if(strcmp(modeBuf, "GZIP_COMPRESSOR")==0) + confparams_cpr->losslessCompressor = GZIP_COMPRESSOR; + else if(strcmp(modeBuf, "ZSTD_COMPRESSOR")==0) + confparams_cpr->losslessCompressor = ZSTD_COMPRESSOR; + else + { + printf("[SZ] Error: Wrong losslessCompressor setting (please check sz.config file)\n");\ + printf("No Such a lossless compressor: %s\n", modeBuf); + iniparser_freedict(ini); + return SZ_NSCS; + } + + modeBuf = iniparser_getstring(ini, "PARAMETER:withLinearRegression", "YES"); + if(strcmp(modeBuf, "YES")==0 || strcmp(modeBuf, "yes")==0) + confparams_cpr->withRegression = SZ_WITH_LINEAR_REGRESSION; + else + confparams_cpr->withRegression = SZ_NO_REGRESSION; + + modeBuf = iniparser_getstring(ini, "PARAMETER:gzipMode", "Gzip_BEST_SPEED"); + if(modeBuf==NULL) + { + printf("[SZ] Error: Null Gzip mode setting (please check sz.config file)\n"); + iniparser_freedict(ini); + return SZ_NSCS; + } + else if(strcmp(modeBuf, "Gzip_NO_COMPRESSION")==0) + confparams_cpr->gzipMode = 0; + else if(strcmp(modeBuf, "Gzip_BEST_SPEED")==0) + confparams_cpr->gzipMode = 1; + else if(strcmp(modeBuf, "Gzip_BEST_COMPRESSION")==0) + confparams_cpr->gzipMode = 9; + else if(strcmp(modeBuf, "Gzip_DEFAULT_COMPRESSION")==0) + confparams_cpr->gzipMode = -1; + else + { + printf("[SZ] Error: Wrong gzip Mode (please check sz.config file)\n"); + return SZ_NSCS; + } + + modeBuf = iniparser_getstring(ini, "PARAMETER:zstdMode", "Zstd_HIGH_SPEED"); + if(modeBuf==NULL) + { + printf("[SZ] Error: Null Zstd mode setting (please check sz.config file)\n"); + iniparser_freedict(ini); + return SZ_NSCS; + } + else if(strcmp(modeBuf, "Zstd_BEST_SPEED")==0) + confparams_cpr->gzipMode = 1; + else if(strcmp(modeBuf, "Zstd_HIGH_SPEED")==0) + confparams_cpr->gzipMode = 3; + else if(strcmp(modeBuf, "Zstd_HIGH_COMPRESSION")==0) + confparams_cpr->gzipMode = 19; + else if(strcmp(modeBuf, "Zstd_BEST_COMPRESSION")==0) + confparams_cpr->gzipMode = 22; + else if(strcmp(modeBuf, "Zstd_DEFAULT_COMPRESSION")==0) + confparams_cpr->gzipMode = 3; + else + { + printf("[SZ] Error: Wrong zstd Mode (please check sz.config file)\n"); + return SZ_NSCS; + } + + modeBuf = iniparser_getstring(ini, "PARAMETER:protectValueRange", "YES"); + if(strcmp(modeBuf, "YES")==0) + confparams_cpr->protectValueRange = 1; + else + confparams_cpr->protectValueRange = 0; + + confparams_cpr->randomAccess = (int)iniparser_getint(ini, "PARAMETER:randomAccess", 0); + + //TODO + confparams_cpr->snapshotCmprStep = (int)iniparser_getint(ini, "PARAMETER:snapshotCmprStep", 5); + + errBoundMode = iniparser_getstring(ini, "PARAMETER:errorBoundMode", NULL); + if(errBoundMode==NULL) + { + printf("[SZ] Error: Null error bound setting (please check sz.config file)\n"); + iniparser_freedict(ini); + return SZ_NSCS; + } + else if(strcmp(errBoundMode,"ABS")==0||strcmp(errBoundMode,"abs")==0) + confparams_cpr->errorBoundMode=ABS; + else if(strcmp(errBoundMode, "REL")==0||strcmp(errBoundMode,"rel")==0) + confparams_cpr->errorBoundMode=REL; + else if(strcmp(errBoundMode, "VR_REL")==0||strcmp(errBoundMode, "vr_rel")==0) + confparams_cpr->errorBoundMode=REL; + else if(strcmp(errBoundMode, "ABS_AND_REL")==0||strcmp(errBoundMode, "abs_and_rel")==0) + confparams_cpr->errorBoundMode=ABS_AND_REL; + else if(strcmp(errBoundMode, "ABS_OR_REL")==0||strcmp(errBoundMode, "abs_or_rel")==0) + confparams_cpr->errorBoundMode=ABS_OR_REL; + else if(strcmp(errBoundMode, "PW_REL")==0||strcmp(errBoundMode, "pw_rel")==0) + confparams_cpr->errorBoundMode=PW_REL; + else if(strcmp(errBoundMode, "PSNR")==0||strcmp(errBoundMode, "psnr")==0) + confparams_cpr->errorBoundMode=PSNR; + else if(strcmp(errBoundMode, "ABS_AND_PW_REL")==0||strcmp(errBoundMode, "abs_and_pw_rel")==0) + confparams_cpr->errorBoundMode=ABS_AND_PW_REL; + else if(strcmp(errBoundMode, "ABS_OR_PW_REL")==0||strcmp(errBoundMode, "abs_or_pw_rel")==0) + confparams_cpr->errorBoundMode=ABS_OR_PW_REL; + else if(strcmp(errBoundMode, "REL_AND_PW_REL")==0||strcmp(errBoundMode, "rel_and_pw_rel")==0) + confparams_cpr->errorBoundMode=REL_AND_PW_REL; + else if(strcmp(errBoundMode, "REL_OR_PW_REL")==0||strcmp(errBoundMode, "rel_or_pw_rel")==0) + confparams_cpr->errorBoundMode=REL_OR_PW_REL; + else if(strcmp(errBoundMode, "NORM")==0||strcmp(errBoundMode, "norm")==0) + confparams_cpr->errorBoundMode=NORM; + else + { + printf("[SZ] Error: Wrong error bound mode (please check sz.config file)\n"); + iniparser_freedict(ini); + return SZ_NSCS; + } + + confparams_cpr->absErrBound = (double)iniparser_getdouble(ini, "PARAMETER:absErrBound", 0); + confparams_cpr->relBoundRatio = (double)iniparser_getdouble(ini, "PARAMETER:relBoundRatio", 0); + confparams_cpr->psnr = (double)iniparser_getdouble(ini, "PARAMETER:psnr", 0); + confparams_cpr->normErr = (double)iniparser_getdouble(ini, "PARAMETER:normErr", 0); + confparams_cpr->pw_relBoundRatio = (double)iniparser_getdouble(ini, "PARAMETER:pw_relBoundRatio", 0); + confparams_cpr->segment_size = (int)iniparser_getint(ini, "PARAMETER:segment_size", 0); + confparams_cpr->accelerate_pw_rel_compression = (int)iniparser_getint(ini, "PARAMETER:accelerate_pw_rel_compression", 1); + + modeBuf = iniparser_getstring(ini, "PARAMETER:pwr_type", "MIN"); + + if(strcmp(modeBuf, "MIN")==0) + confparams_cpr->pwr_type = SZ_PWR_MIN_TYPE; + else if(strcmp(modeBuf, "AVG")==0) + confparams_cpr->pwr_type = SZ_PWR_AVG_TYPE; + else if(strcmp(modeBuf, "MAX")==0) + confparams_cpr->pwr_type = SZ_PWR_MAX_TYPE; + else if(modeBuf!=NULL) + { + printf("[SZ] Error: Wrong pwr_type setting (please check sz.config file).\n"); + iniparser_freedict(ini); + return SZ_NSCS; + } + else //by default + confparams_cpr->pwr_type = SZ_PWR_AVG_TYPE; + + //initialization for Huffman encoding + //SZ_Reset(); + } + else if(confparams_cpr->sol_ID == PASTRI) + {//load parameters for PSTRI + pastri_par.bf[0] = (int)iniparser_getint(ini, "PARAMETER:basisFunction_0", 0); + pastri_par.bf[1] = (int)iniparser_getint(ini, "PARAMETER:basisFunction_1", 0); + pastri_par.bf[2] = (int)iniparser_getint(ini, "PARAMETER:basisFunction_2", 0); + pastri_par.bf[3] = (int)iniparser_getint(ini, "PARAMETER:basisFunction_3", 0); + pastri_par.numBlocks = (int)iniparser_getint(ini, "PARAMETER:numBlocks", 0); + confparams_cpr->absErrBound = pastri_par.originalEb = (double)iniparser_getdouble(ini, "PARAMETER:absErrBound", 1E-3); + } + + iniparser_freedict(ini); + return SZ_SCES; +} + +/*-------------------------------------------------------------------------*/ +/** + @brief It reads and tests the configuration given. + @return integer 1 if successfull. + + This function reads the configuration file. Then test that the + configuration parameters are correct (including directories). + + **/ +/*-------------------------------------------------------------------------*/ +int SZ_LoadConf(const char* sz_cfgFile) { + int res = SZ_ReadConf(sz_cfgFile); + if (res != SZ_SCES) + { + printf("[SZ] ERROR: Impossible to read configuration.\n"); + return SZ_NSCS; + } + return SZ_SCES; +} + +int checkVersion(char* version) +{ + int i = 0; + for(;i<3;i++) + if(version[i]!=versionNumber[i]) + return 0; + return 1; +} + +inline int computeVersion(int major, int minor, int revision) +{ + return major*10000+minor*100+revision; +} + +int checkVersion2(char* version) +{ + int major = version[0]; + int minor = version[1]; + int revision = version[2]; + + int preVersion = 20108; + int givenVersion = computeVersion(major, minor, revision); + //int currentVersion = computeVersion(SZ_VER_MAJOR, SZ_VER_MINOR, SZ_VER_REVISION); + if(givenVersion < preVersion) //only for old version (older than 2.1.8), we will check whether version is consistent exactly. + return checkVersion(version); + return 1; +} + +void initSZ_TSC() +{ + sz_tsc = (sz_tsc_metadata*)malloc(sizeof(sz_tsc_metadata)); + memset(sz_tsc, 0, sizeof(sz_tsc_metadata)); + /*sprintf(sz_tsc->metadata_filename, "sz_tsc_metainfo.txt"); + sz_tsc->metadata_file = fopen(sz_tsc->metadata_filename, "wb"); + if (sz_tsc->metadata_file == NULL) + { + printf("Failed to open sz_tsc_metainfo.txt file for writing metainfo.\n"); + exit(1); + } + fputs("#metadata of the time-step based compression\n", sz_tsc->metadata_file); */ +} + +/*double fabs(double value) +{ + if(value<0) + return -value; + else + return value; +}*/ diff --git a/deps/SZ/sz/src/dataCompression.c b/deps/SZ/sz/src/dataCompression.c new file mode 100644 index 0000000000000000000000000000000000000000..0051c542158010f1cfd9896b664a39a0181eced1 --- /dev/null +++ b/deps/SZ/sz/src/dataCompression.c @@ -0,0 +1,980 @@ +/** + * @file double_compression.c + * @author Sheng Di, Dingwen Tao, Xin Liang, Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang + * @date April, 2016 + * @brief Compression Technique for double array + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include +#include "sz.h" +#include "DynamicByteArray.h" +#include "DynamicIntArray.h" +#include "TightDataPointStorageD.h" +#include "CompressElement.h" +#include "dataCompression.h" + +int computeByteSizePerIntValue(long valueRangeSize) +{ + if(valueRangeSize<=256) + return 1; + else if(valueRangeSize<=65536) + return 2; + else if(valueRangeSize<=4294967296) //2^32 + return 4; + else + return 8; +} + +long computeRangeSize_int(void* oriData, int dataType, size_t size, int64_t* valueRangeSize) +{ + size_t i = 0; + long max = 0, min = 0; + + if(dataType==SZ_UINT8) + { + unsigned char* data = (unsigned char*)oriData; + unsigned char data_; + min = data[0], max = min; + computeMinMax(data); + } + else if(dataType == SZ_INT8) + { + char* data = (char*)oriData; + char data_; + min = data[0], max = min; + computeMinMax(data); + } + else if(dataType == SZ_UINT16) + { + unsigned short* data = (unsigned short*)oriData; + unsigned short data_; + min = data[0], max = min; + computeMinMax(data); + } + else if(dataType == SZ_INT16) + { + short* data = (short*)oriData; + short data_; + min = data[0], max = min; + computeMinMax(data); + } + else if(dataType == SZ_UINT32) + { + unsigned int* data = (unsigned int*)oriData; + unsigned int data_; + min = data[0], max = min; + computeMinMax(data); + } + else if(dataType == SZ_INT32) + { + int* data = (int*)oriData; + int data_; + min = data[0], max = min; + computeMinMax(data); + } + else if(dataType == SZ_UINT64) + { + unsigned long* data = (unsigned long*)oriData; + unsigned long data_; + min = data[0], max = min; + computeMinMax(data); + } + else if(dataType == SZ_INT64) + { + long* data = (long *)oriData; + long data_; + min = data[0], max = min; + computeMinMax(data); + } + + *valueRangeSize = max - min; + return min; +} + +float computeRangeSize_float(float* oriData, size_t size, float* valueRangeSize, float* medianValue) +{ + size_t i = 0; + float min = oriData[0]; + float max = min; + for(i=1;idata) + min = data; + else if(maxdata) + min = data; + else if(maxdata) + min = data; + else if(maxdata) + min = data; + else if(maxdata) + min = data; + else if(maxdata) + min = data; + else if(maxb) + return a; + else + return b; +} + +float min_f(float a, float b) +{ + if(ab) + return a; + else + return b; +} + +double getRealPrecision_double(double valueRangeSize, int errBoundMode, double absErrBound, double relBoundRatio, int *status) +{ + int state = SZ_SCES; + double precision = 0; + if(errBoundMode==ABS||errBoundMode==ABS_OR_PW_REL||errBoundMode==ABS_AND_PW_REL) + precision = absErrBound; + else if(errBoundMode==REL||errBoundMode==REL_OR_PW_REL||errBoundMode==REL_AND_PW_REL) + precision = relBoundRatio*valueRangeSize; + else if(errBoundMode==ABS_AND_REL) + precision = min_d(absErrBound, relBoundRatio*valueRangeSize); + else if(errBoundMode==ABS_OR_REL) + precision = max_d(absErrBound, relBoundRatio*valueRangeSize); + else if(errBoundMode==PW_REL) + precision = 0; + else + { + printf("Error: error-bound-mode is incorrect!\n"); + state = SZ_BERR; + } + *status = state; + return precision; +} + +double getRealPrecision_float(float valueRangeSize, int errBoundMode, double absErrBound, double relBoundRatio, int *status) +{ + int state = SZ_SCES; + double precision = 0; + if(errBoundMode==ABS||errBoundMode==ABS_OR_PW_REL||errBoundMode==ABS_AND_PW_REL) + precision = absErrBound; + else if(errBoundMode==REL||errBoundMode==REL_OR_PW_REL||errBoundMode==REL_AND_PW_REL) + precision = relBoundRatio*valueRangeSize; + else if(errBoundMode==ABS_AND_REL) + precision = min_f(absErrBound, relBoundRatio*valueRangeSize); + else if(errBoundMode==ABS_OR_REL) + precision = max_f(absErrBound, relBoundRatio*valueRangeSize); + else if(errBoundMode==PW_REL) + precision = 0; + else + { + printf("Error: error-bound-mode is incorrect!\n"); + state = SZ_BERR; + } + *status = state; + return precision; +} + +double getRealPrecision_int(long valueRangeSize, int errBoundMode, double absErrBound, double relBoundRatio, int *status) +{ + int state = SZ_SCES; + double precision = 0; + if(errBoundMode==ABS||errBoundMode==ABS_OR_PW_REL||errBoundMode==ABS_AND_PW_REL) + precision = absErrBound; + else if(errBoundMode==REL||errBoundMode==REL_OR_PW_REL||errBoundMode==REL_AND_PW_REL) + precision = relBoundRatio*valueRangeSize; + else if(errBoundMode==ABS_AND_REL) + precision = min_f(absErrBound, relBoundRatio*valueRangeSize); + else if(errBoundMode==ABS_OR_REL) + precision = max_f(absErrBound, relBoundRatio*valueRangeSize); + else if(errBoundMode==PW_REL) + precision = -1; + else + { + printf("Error: error-bound-mode is incorrect!\n"); + state = SZ_BERR; + } + *status = state; + return precision; +} + +void symTransform_8bytes(unsigned char data[8]) +{ + unsigned char tmp = data[0]; + data[0] = data[7]; + data[7] = tmp; + + tmp = data[1]; + data[1] = data[6]; + data[6] = tmp; + + tmp = data[2]; + data[2] = data[5]; + data[5] = tmp; + + tmp = data[3]; + data[3] = data[4]; + data[4] = tmp; +} + +inline void symTransform_2bytes(unsigned char data[2]) +{ + unsigned char tmp = data[0]; + data[0] = data[1]; + data[1] = tmp; +} + +inline void symTransform_4bytes(unsigned char data[4]) +{ + unsigned char tmp = data[0]; + data[0] = data[3]; + data[3] = tmp; + + tmp = data[1]; + data[1] = data[2]; + data[2] = tmp; +} + +inline void compressInt8Value(int8_t tgtValue, int8_t minValue, int byteSize, unsigned char* bytes) +{ + uint8_t data = tgtValue - minValue; + memcpy(bytes, &data, byteSize); //byteSize==1 +} + +inline void compressInt16Value(int16_t tgtValue, int16_t minValue, int byteSize, unsigned char* bytes) +{ + uint16_t data = tgtValue - minValue; + unsigned char tmpBytes[2]; + int16ToBytes_bigEndian(tmpBytes, data); + memcpy(bytes, tmpBytes + 2 - byteSize, byteSize); +} + +inline void compressInt32Value(int32_t tgtValue, int32_t minValue, int byteSize, unsigned char* bytes) +{ + uint32_t data = tgtValue - minValue; + unsigned char tmpBytes[4]; + int32ToBytes_bigEndian(tmpBytes, data); + memcpy(bytes, tmpBytes + 4 - byteSize, byteSize); +} + +inline void compressInt64Value(int64_t tgtValue, int64_t minValue, int byteSize, unsigned char* bytes) +{ + uint64_t data = tgtValue - minValue; + unsigned char tmpBytes[8]; + int64ToBytes_bigEndian(tmpBytes, data); + memcpy(bytes, tmpBytes + 8 - byteSize, byteSize); +} + +inline void compressUInt8Value(uint8_t tgtValue, uint8_t minValue, int byteSize, unsigned char* bytes) +{ + uint8_t data = tgtValue - minValue; + memcpy(bytes, &data, byteSize); //byteSize==1 +} + +inline void compressUInt16Value(uint16_t tgtValue, uint16_t minValue, int byteSize, unsigned char* bytes) +{ + uint16_t data = tgtValue - minValue; + unsigned char tmpBytes[2]; + int16ToBytes_bigEndian(tmpBytes, data); + memcpy(bytes, tmpBytes + 2 - byteSize, byteSize); +} + +inline void compressUInt32Value(uint32_t tgtValue, uint32_t minValue, int byteSize, unsigned char* bytes) +{ + uint32_t data = tgtValue - minValue; + unsigned char tmpBytes[4]; + int32ToBytes_bigEndian(tmpBytes, data); + memcpy(bytes, tmpBytes + 4 - byteSize, byteSize); +} + +inline void compressUInt64Value(uint64_t tgtValue, uint64_t minValue, int byteSize, unsigned char* bytes) +{ + uint64_t data = tgtValue - minValue; + unsigned char tmpBytes[8]; + int64ToBytes_bigEndian(tmpBytes, data); + memcpy(bytes, tmpBytes + 8 - byteSize, byteSize); +} + +inline void compressSingleFloatValue(FloatValueCompressElement *vce, float tgtValue, float precision, float medianValue, + int reqLength, int reqBytesLength, int resiBitsLength) +{ + float normValue = tgtValue - medianValue; + + lfloat lfBuf; + lfBuf.value = normValue; + + int ignBytesLength = 32 - reqLength; + if(ignBytesLength<0) + ignBytesLength = 0; + + int tmp_int = lfBuf.ivalue; + intToBytes_bigEndian(vce->curBytes, tmp_int); + + lfBuf.ivalue = (lfBuf.ivalue >> ignBytesLength) << ignBytesLength; + + //float tmpValue = lfBuf.value; + + vce->data = lfBuf.value+medianValue; + vce->curValue = tmp_int; + vce->reqBytesLength = reqBytesLength; + vce->resiBitsLength = resiBitsLength; +} + +void compressSingleFloatValue_MSST19(FloatValueCompressElement *vce, float tgtValue, float precision, int reqLength, int reqBytesLength, int resiBitsLength) +{ + float normValue = tgtValue; + + lfloat lfBuf; + lfBuf.value = normValue; + + int ignBytesLength = 32 - reqLength; + if(ignBytesLength<0) + ignBytesLength = 0; + + int tmp_int = lfBuf.ivalue; + intToBytes_bigEndian(vce->curBytes, tmp_int); + + lfBuf.ivalue = (lfBuf.ivalue >> ignBytesLength) << ignBytesLength; + + //float tmpValue = lfBuf.value; + + vce->data = lfBuf.value; + vce->curValue = tmp_int; + vce->reqBytesLength = reqBytesLength; + vce->resiBitsLength = resiBitsLength; +} + +void compressSingleDoubleValue_MSST19(DoubleValueCompressElement *vce, double tgtValue, double precision, int reqLength, int reqBytesLength, int resiBitsLength) +{ + ldouble lfBuf; + lfBuf.value = tgtValue; + + int ignBytesLength = 64 - reqLength; + if(ignBytesLength<0) + ignBytesLength = 0; + + long tmp_long = lfBuf.lvalue; + longToBytes_bigEndian(vce->curBytes, tmp_long); + + lfBuf.lvalue = (lfBuf.lvalue >> ignBytesLength) << ignBytesLength; + + //float tmpValue = lfBuf.value; + + vce->data = lfBuf.value; + vce->curValue = tmp_long; + vce->reqBytesLength = reqBytesLength; + vce->resiBitsLength = resiBitsLength; +} + +void compressSingleDoubleValue(DoubleValueCompressElement *vce, double tgtValue, double precision, double medianValue, + int reqLength, int reqBytesLength, int resiBitsLength) +{ + double normValue = tgtValue - medianValue; + + ldouble lfBuf; + lfBuf.value = normValue; + + int ignBytesLength = 64 - reqLength; + if(ignBytesLength<0) + ignBytesLength = 0; + + long tmp_long = lfBuf.lvalue; + longToBytes_bigEndian(vce->curBytes, tmp_long); + + lfBuf.lvalue = (lfBuf.lvalue >> ignBytesLength)<data = lfBuf.value+medianValue; + vce->curValue = tmp_long; + vce->reqBytesLength = reqBytesLength; + vce->resiBitsLength = resiBitsLength; +} + +int compIdenticalLeadingBytesCount_double(unsigned char* preBytes, unsigned char* curBytes) +{ + int i, n = 0; + for(i=0;i<8;i++) + if(preBytes[i]==curBytes[i]) + n++; + else + break; + if(n>3) n = 3; + return n; +} + +inline int compIdenticalLeadingBytesCount_float(unsigned char* preBytes, unsigned char* curBytes) +{ + int i, n = 0; + for(i=0;i<4;i++) + if(preBytes[i]==curBytes[i]) + n++; + else + break; + if(n>3) n = 3; + return n; +} + +//TODO double-check the correctness... +inline void addExactData(DynamicByteArray *exactMidByteArray, DynamicIntArray *exactLeadNumArray, + DynamicIntArray *resiBitArray, LossyCompressionElement *lce) +{ + int i; + int leadByteLength = lce->leadingZeroBytes; + addDIA_Data(exactLeadNumArray, leadByteLength); + unsigned char* intMidBytes = lce->integerMidBytes; + int integerMidBytesLength = lce->integerMidBytes_Length; + int resMidBitsLength = lce->resMidBitsLength; + if(intMidBytes!=NULL||resMidBitsLength!=0) + { + if(intMidBytes!=NULL) + for(i = 0;iresidualMidBits); + } +} + +/** + * @deprecated + * @return: the length of the coefficient array. + * */ +int getPredictionCoefficients(int layers, int dimension, int **coeff_array, int *status) +{ + size_t size = 0; + switch(dimension) + { + case 1: + switch(layers) + { + case 1: + *coeff_array = (int*)malloc(sizeof(int)); + (*coeff_array)[0] = 1; + size = 1; + break; + case 2: + *coeff_array = (int*)malloc(2*sizeof(int)); + (*coeff_array)[0] = 2; + (*coeff_array)[1] = -1; + size = 2; + break; + case 3: + *coeff_array = (int*)malloc(3*sizeof(int)); + (*coeff_array)[0] = 3; + (*coeff_array)[1] = -3; + (*coeff_array)[2] = 1; + break; + } + break; + case 2: + switch(layers) + { + case 1: + + break; + case 2: + + break; + case 3: + + break; + } + break; + case 3: + switch(layers) + { + case 1: + + break; + case 2: + + break; + case 3: + + break; + } + break; + default: + printf("Error: dimension must be no greater than 3 in the current version.\n"); + *status = SZ_DERR; + } + *status = SZ_SCES; + return size; +} + +int computeBlockEdgeSize_2D(int segmentSize) +{ + int i = 1; + for(i=1; isegmentSize) + break; + } + return i; + //return (int)(sqrt(segmentSize)+1); +} + +int computeBlockEdgeSize_3D(int segmentSize) +{ + int i = 1; + for(i=1; isegmentSize) + break; + } + return i; + //return (int)(pow(segmentSize, 1.0/3)+1); +} + +//convert random-access version based bytes to output bytes +int initRandomAccessBytes(unsigned char* raBytes) +{ + int k = 0, i = 0; + for (i = 0; i < 3; i++)//3 + raBytes[k++] = versionNumber[i]; + int sameByte = 0x80; //indicating this is regression-based compression mode + if(exe_params->SZ_SIZE_TYPE==8) + sameByte = (unsigned char) (sameByte | 0x40); // 01000000, the 6th bit + if(confparams_cpr->randomAccess) + sameByte = (unsigned char) (sameByte | 0x02); // 00000010, random access + //sameByte = sameByte | (confparams_cpr->szMode << 1); + if(confparams_cpr->protectValueRange) + sameByte = (unsigned char) (sameByte | 0x04); //00000100, protect value range + + raBytes[k++] = sameByte; + + convertSZParamsToBytes(confparams_cpr, &(raBytes[k])); + if(confparams_cpr->dataType==SZ_FLOAT) + k = k + MetaDataByteLength; + else if(confparams_cpr->dataType==SZ_DOUBLE) + k = k + MetaDataByteLength_double; + + return k; +} + +//The following functions are float-precision version of dealing with the unpredictable data points +int generateLossyCoefficients_float(float* oriData, double precision, size_t nbEle, int* reqBytesLength, int* resiBitsLength, float* medianValue, float* decData) +{ + float valueRangeSize; + + computeRangeSize_float(oriData, nbEle, &valueRangeSize, medianValue); + short radExpo = getExponent_float(valueRangeSize/2); + + int reqLength; + computeReqLength_float(precision, radExpo, &reqLength, medianValue); + + *reqBytesLength = reqLength/8; + *resiBitsLength = reqLength%8; + + size_t i = 0; + for(i = 0;i < nbEle;i++) + { + float normValue = oriData[i] - *medianValue; + + lfloat lfBuf; + lfBuf.value = normValue; + + int ignBytesLength = 32 - reqLength; + if(ignBytesLength<0) + ignBytesLength = 0; + + lfBuf.ivalue = (lfBuf.ivalue >> ignBytesLength) << ignBytesLength; + + //float tmpValue = lfBuf.value; + + decData[i] = lfBuf.value + *medianValue; + } + return reqLength; +} + +/** + * @param float* oriData: inplace argument (input / output) + * + * */ +int compressExactDataArray_float(float* oriData, double precision, size_t nbEle, unsigned char** leadArray, unsigned char** midArray, unsigned char** resiArray, +int reqLength, int reqBytesLength, int resiBitsLength, float medianValue) +{ + //allocate memory for coefficient compression arrays + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + unsigned char preDataBytes[4] = {0,0,0,0}; + + //allocate memory for vce and lce + FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + size_t i = 0; + for(i = 0;i < nbEle;i++) + { + compressSingleFloatValue(vce, oriData[i], precision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + oriData[i] = vce->data; + } + convertDIAtoInts(exactLeadNumArray, leadArray); + convertDBAtoBytes(exactMidByteArray,midArray); + convertDIAtoInts(resiBitArray, resiArray); + + size_t midArraySize = exactMidByteArray->size; + + free(vce); + free(lce); + + free_DIA(exactLeadNumArray); + free_DBA(exactMidByteArray); + free_DIA(resiBitArray); + + return midArraySize; +} + +void decompressExactDataArray_float(unsigned char* leadNum, unsigned char* exactMidBytes, unsigned char* residualMidBits, size_t nbEle, int reqLength, float medianValue, float** decData) +{ + *decData = (float*)malloc(nbEle*sizeof(float)); + size_t i = 0, j = 0, k = 0, l = 0, p = 0, curByteIndex = 0; + float exactData = 0; + unsigned char preBytes[4] = {0,0,0,0}; + unsigned char curBytes[4]; + int resiBits; + unsigned char leadingNum; + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + + for(i = 0; i 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*decData)[i] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } +} + +//double-precision version of dealing with unpredictable data points in sz 2.0 +int generateLossyCoefficients_double(double* oriData, double precision, size_t nbEle, int* reqBytesLength, int* resiBitsLength, double* medianValue, double* decData) +{ + double valueRangeSize; + + computeRangeSize_double(oriData, nbEle, &valueRangeSize, medianValue); + short radExpo = getExponent_double(valueRangeSize/2); + + int reqLength; + computeReqLength_double(precision, radExpo, &reqLength, medianValue); + + *reqBytesLength = reqLength/8; + *resiBitsLength = reqLength%8; + + size_t i = 0; + for(i = 0;i < nbEle;i++) + { + double normValue = oriData[i] - *medianValue; + + ldouble ldBuf; + ldBuf.value = normValue; + + int ignBytesLength = 64 - reqLength; + if(ignBytesLength<0) + ignBytesLength = 0; + + ldBuf.lvalue = (ldBuf.lvalue >> ignBytesLength) << ignBytesLength; + + decData[i] = ldBuf.value + *medianValue; + } + return reqLength; +} + +/** + * @param double* oriData: inplace argument (input / output) + * + * */ +int compressExactDataArray_double(double* oriData, double precision, size_t nbEle, unsigned char** leadArray, unsigned char** midArray, unsigned char** resiArray, +int reqLength, int reqBytesLength, int resiBitsLength, double medianValue) +{ + //allocate memory for coefficient compression arrays + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + unsigned char preDataBytes[8] = {0,0,0,0,0,0,0,0}; + + //allocate memory for vce and lce + DoubleValueCompressElement *vce = (DoubleValueCompressElement*)malloc(sizeof(DoubleValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + size_t i = 0; + for(i = 0;i < nbEle;i++) + { + compressSingleDoubleValue(vce, oriData[i], precision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + oriData[i] = vce->data; + } + convertDIAtoInts(exactLeadNumArray, leadArray); + convertDBAtoBytes(exactMidByteArray,midArray); + convertDIAtoInts(resiBitArray, resiArray); + + size_t midArraySize = exactMidByteArray->size; + + free(vce); + free(lce); + + free_DIA(exactLeadNumArray); + free_DBA(exactMidByteArray); + free_DIA(resiBitArray); + + return midArraySize; +} + +void decompressExactDataArray_double(unsigned char* leadNum, unsigned char* exactMidBytes, unsigned char* residualMidBits, size_t nbEle, int reqLength, double medianValue, double** decData) +{ + *decData = (double*)malloc(nbEle*sizeof(double)); + size_t i = 0, j = 0, k = 0, l = 0, p = 0, curByteIndex = 0; + double exactData = 0; + unsigned char preBytes[8] = {0,0,0,0,0,0,0,0}; + unsigned char curBytes[8]; + int resiBits; + unsigned char leadingNum; + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + + for(i = 0; i 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*decData)[i] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } +} diff --git a/deps/SZ/sz/src/dictionary.c b/deps/SZ/sz/src/dictionary.c new file mode 100644 index 0000000000000000000000000000000000000000..3f0f5cfa63a862fa515e9e2d21674ad61b7c2f6f --- /dev/null +++ b/deps/SZ/sz/src/dictionary.c @@ -0,0 +1,398 @@ +/*-------------------------------------------------------------------------*/ +/** + @file dictionary.c + @author N. Devillard + @brief Implements a dictionary for string variables. + + This module implements a simple dictionary object, i.e. a list + of string/string associations. This object is useful to store e.g. + informations retrieved from a configuration file (ini files). +*/ +/*--------------------------------------------------------------------------*/ + +/*--------------------------------------------------------------------------- + Includes + ---------------------------------------------------------------------------*/ +#include "dictionary.h" + +#include +#include +#include +#include + +/** Maximum value size for integers and doubles. */ +#define MAXVALSZ 1024 + +/** Minimal allocated number of entries in a dictionary */ +#define DICTMINSZ 128 + +/** Invalid key token */ +#define DICT_INVALID_KEY ((char*)-1) + +/*--------------------------------------------------------------------------- + Private functions + ---------------------------------------------------------------------------*/ + +/* Doubles the allocated size associated to a pointer */ +/* 'size' is the current allocated size. */ +static void * mem_double(void * ptr, int size) +{ + void * newptr ; + + newptr = calloc(2*size, 1); + if (newptr==NULL) { + return NULL ; + } + memcpy(newptr, ptr, size); + free(ptr); + return newptr ; +} + +/*-------------------------------------------------------------------------*/ +/** + @brief Duplicate a string + @param s String to duplicate + @return Pointer to a newly allocated string, to be freed with free() + + This is a replacement for strdup(). This implementation is provided + for systems that do not have it. + */ +/*--------------------------------------------------------------------------*/ +static char * xstrdup(const char * s) +{ + char * t ; + if (!s) + return NULL ; + t = (char*)malloc(strlen(s)+1) ; + if (t) { + strcpy(t,s); + } + return t ; +} + +/*--------------------------------------------------------------------------- + Function codes + ---------------------------------------------------------------------------*/ +/*-------------------------------------------------------------------------*/ +/** + @brief Compute the hash key for a string. + @param key Character string to use for key. + @return 1 unsigned int on at least 32 bits. + + This hash function has been taken from an Article in Dr Dobbs Journal. + This is normally a collision-free function, distributing keys evenly. + The key is stored anyway in the struct so that collision can be avoided + by comparing the key itself in last resort. + */ +/*--------------------------------------------------------------------------*/ +unsigned dictionary_hash(const char * key) +{ + int len ; + unsigned hash ; + int i ; + + len = strlen(key); + for (hash=0, i=0 ; i>6) ; + } + hash += (hash <<3); + hash ^= (hash >>11); + hash += (hash <<15); + return hash ; +} + +/*-------------------------------------------------------------------------*/ +/** + @brief Create a new dictionary object. + @param size Optional initial size of the dictionary. + @return 1 newly allocated dictionary objet. + + This function allocates a new dictionary object of given size and returns + it. If you do not know in advance (roughly) the number of entries in the + dictionary, give size=0. + */ +/*--------------------------------------------------------------------------*/ +dictionary * dictionary_new(int size) +{ + dictionary * d ; + + /* If no size was specified, allocate space for DICTMINSZ */ + if (sizesize = size ; + d->val = (char **)calloc(size, sizeof(char*)); + d->key = (char **)calloc(size, sizeof(char*)); + d->hash = (unsigned int *)calloc(size, sizeof(unsigned)); + return d ; +} + +/*-------------------------------------------------------------------------*/ +/** + @brief Delete a dictionary object + @param d dictionary object to deallocate. + @return void + + Deallocate a dictionary object and all memory associated to it. + */ +/*--------------------------------------------------------------------------*/ +void dictionary_del(dictionary * d) +{ + int i ; + + if (d==NULL) return ; + for (i=0 ; isize ; i++) { + if (d->key[i]!=NULL) + free(d->key[i]); + if (d->val[i]!=NULL) + free(d->val[i]); + } + free(d->val); + free(d->key); + free(d->hash); + free(d); + return ; +} + +/*-------------------------------------------------------------------------*/ +/** + @brief Get a value from a dictionary. + @param d dictionary object to search. + @param key Key to look for in the dictionary. + @param def Default value to return if key not found. + @return 1 pointer to internally allocated character string. + + This function locates a key in a dictionary and returns a pointer to its + value, or the passed 'def' pointer if no such key can be found in + dictionary. The returned character pointer points to data internal to the + dictionary object, you should not try to free it or modify it. + */ +/*--------------------------------------------------------------------------*/ +char * dictionary_get(dictionary * d, const char * key, char * def) +{ + unsigned hash ; + int i ; + + hash = dictionary_hash(key); + for (i=0 ; isize ; i++) { + if (d->key[i]==NULL) + continue ; + /* Compare hash */ + if (hash==d->hash[i]) { + /* Compare string, to avoid hash collisions */ + if (!strcmp(key, d->key[i])) { + return d->val[i] ; + } + } + } + return def ; +} + +/*-------------------------------------------------------------------------*/ +/** + @brief Set a value in a dictionary. + @param d dictionary object to modify. + @param key Key to modify or add. + @param val Value to add. + @return int 0 if Ok, anything else otherwise + + If the given key is found in the dictionary, the associated value is + replaced by the provided one. If the key cannot be found in the + dictionary, it is added to it. + + It is Ok to provide a NULL value for val, but NULL values for the dictionary + or the key are considered as errors: the function will return immediately + in such a case. + + Notice that if you dictionary_set a variable to NULL, a call to + dictionary_get will return a NULL value: the variable will be found, and + its value (NULL) is returned. In other words, setting the variable + content to NULL is equivalent to deleting the variable from the + dictionary. It is not possible (in this implementation) to have a key in + the dictionary without value. + + This function returns non-zero in case of failure. + */ +/*--------------------------------------------------------------------------*/ +int dictionary_set(dictionary * d, const char * key, const char * val) +{ + int i ; + unsigned hash ; + + if (d==NULL || key==NULL) return -1 ; + + /* Compute hash for this key */ + hash = dictionary_hash(key) ; + /* Find if value is already in dictionary */ + if (d->n>0) { + for (i=0 ; isize ; i++) { + if (d->key[i]==NULL) + continue ; + if (hash==d->hash[i]) { /* Same hash value */ + if (!strcmp(key, d->key[i])) { /* Same key */ + /* Found a value: modify and return */ + if (d->val[i]!=NULL) + free(d->val[i]); + d->val[i] = val ? xstrdup(val) : NULL ; + /* Value has been modified: return */ + return 0 ; + } + } + } + } + /* Add a new value */ + /* See if dictionary needs to grow */ + if (d->n==d->size) { + + /* Reached maximum size: reallocate dictionary */ + d->val = (char **)mem_double(d->val, d->size * sizeof(char*)) ; + d->key = (char **)mem_double(d->key, d->size * sizeof(char*)) ; + d->hash = (unsigned int *)mem_double(d->hash, d->size * sizeof(unsigned)) ; + if ((d->val==NULL) || (d->key==NULL) || (d->hash==NULL)) { + /* Cannot grow dictionary */ + return -1 ; + } + /* Double size */ + d->size *= 2 ; + } + + /* Insert key in the first empty slot. Start at d->n and wrap at + d->size. Because d->n < d->size this will necessarily + terminate. */ + for (i=d->n ; d->key[i] ; ) { + if(++i == d->size) i = 0; + } + /* Copy key */ + d->key[i] = xstrdup(key); + d->val[i] = val ? xstrdup(val) : NULL ; + d->hash[i] = hash; + d->n ++ ; + return 0 ; +} + +/*-------------------------------------------------------------------------*/ +/** + @brief Delete a key in a dictionary + @param d dictionary object to modify. + @param key Key to remove. + @return void + + This function deletes a key in a dictionary. Nothing is done if the + key cannot be found. + */ +/*--------------------------------------------------------------------------*/ +void dictionary_unset(dictionary * d, const char * key) +{ + unsigned hash ; + int i ; + + if (key == NULL) { + return; + } + + hash = dictionary_hash(key); + for (i=0 ; isize ; i++) { + if (d->key[i]==NULL) + continue ; + /* Compare hash */ + if (hash==d->hash[i]) { + /* Compare string, to avoid hash collisions */ + if (!strcmp(key, d->key[i])) { + /* Found key */ + break ; + } + } + } + if (i>=d->size) + /* Key not found */ + return ; + + free(d->key[i]); + d->key[i] = NULL ; + if (d->val[i]!=NULL) { + free(d->val[i]); + d->val[i] = NULL ; + } + d->hash[i] = 0 ; + d->n -- ; + return ; +} + +/*-------------------------------------------------------------------------*/ +/** + @brief Dump a dictionary to an opened file pointer. + @param d Dictionary to dump + @param f Opened file pointer. + @return void + + Dumps a dictionary onto an opened file pointer. Key pairs are printed out + as @c [Key]=[Value], one per line. It is Ok to provide stdout or stderr as + output file pointers. + */ +/*--------------------------------------------------------------------------*/ +void dictionary_dump(dictionary * d, FILE * out) +{ + int i ; + + if (d==NULL || out==NULL) return ; + if (d->n<1) { + fprintf(out, "empty dictionary\n"); + return ; + } + for (i=0 ; isize ; i++) { + if (d->key[i]) { + fprintf(out, "%20s\t[%s]\n", + d->key[i], + d->val[i] ? d->val[i] : "UNDEF"); + } + } + return ; +} + + +/* Test code */ +#ifdef TESTDIC +#define NVALS 20000 +int main(int argc, char *argv[]) +{ + dictionary * d ; + char * val ; + int i ; + char cval[90] ; + + /* Allocate dictionary */ + printf("allocating...\n"); + d = dictionary_new(0); + + /* Set values in dictionary */ + printf("setting %d values...\n", NVALS); + for (i=0 ; in != 0) { + printf("error deleting values\n"); + } + printf("deallocating...\n"); + dictionary_del(d); + return 0 ; +} +#endif +/* vim: set ts=4 et sw=4 tw=75 */ diff --git a/deps/SZ/sz/src/exafelSZ.c b/deps/SZ/sz/src/exafelSZ.c new file mode 100644 index 0000000000000000000000000000000000000000..cc4e52bf508fe1eddf06ea827f9fa59ae38e27e0 --- /dev/null +++ b/deps/SZ/sz/src/exafelSZ.c @@ -0,0 +1,597 @@ +#ifdef __cplusplus +extern "C" { +#endif + +#include "sz.h" + +void exafelSZ_params_process(exafelSZ_params*pr, size_t panels, size_t rows, size_t cols){ + pr->binnedRows=(rows+pr->binSize-1)/pr->binSize; + pr->binnedCols=(cols+pr->binSize-1)/pr->binSize; + + pr->peakRadius=(pr->peakSize-1)/2; +} + +void exafelSZ_params_checkDecomp(exafelSZ_params*pr, size_t panels, size_t rows, size_t cols){ + if(pr->calibPanel==NULL){ + printf("ERROR: calibPanel is NULL : calibPanel=%ld\n",(long)pr->calibPanel); + assert(0); + } + if(pr->binSize<1 || pr->tolerance<0 || pr->szDim<1 || pr->szDim>3){ + printf("ERROR: Something wrong with the following:\n"); + printf("binSize=%d\n",(int)pr->binSize); + printf("tolerance=%d\n",(int)pr->tolerance); + printf("szDim=%d\n",(int)pr->szDim); + assert(0); + } + if(!(pr->peakSize%2)){ + printf("ERROR: peakSize = %d cannot be even. It must be odd!\n",(int)pr->peakSize); + assert(0); + } + //if(nEvents<1 || panels<1 || rows<1 || cols<1){ + if(panels<1 || rows<1 || cols<1){ + printf("ERROR: Something wrong with the following:\n"); + printf("panels=%d\n",(int)panels); + printf("rows=%d\n",(int)rows); + printf("cols=%d\n",(int)cols); + assert(0); + } +} + +void exafelSZ_params_checkComp(exafelSZ_params*pr, size_t panels, size_t rows, size_t cols){ + if(pr->peaksSegs==NULL || pr->peaksRows==NULL || pr->peaksCols==NULL){ + printf("ERROR: One or more of the following are NULL : peaksSegs , peaksRows , peaksCols\n"); + assert(0); + } + exafelSZ_params_checkDecomp(pr, panels, rows, cols); +} + +void exafelSZ_params_print(exafelSZ_params*pr){ + printf("Configuration (exafelSZ_params) :\n"); + printf("binSize: %d\n",pr->binSize); + printf("tolerance:%e\n",pr->tolerance); + printf("szDim:%d\n",pr->szDim); + printf("peakSize:%d\n",pr->peakSize); + //printf("nEvents:%d\n",pr->nEvents); + //printf("panels:%d\n",pr->panels); + //printf("rows:%d\n",pr->rows); + //printf("cols:%d\n",pr->cols); + printf("\n"); + printf("CALCULATED VARIABLES\n"); + printf("binnedRows:%ld\n",pr->binnedRows); + printf("binnedCols:%ld\n",pr->binnedCols); + printf("peakRadius:%d\n",pr->peakRadius); + printf("\n"); + // outs<<"Configuration (exafelSZ_params) : "<binnedRows*pr->binnedCols*sizeof(float)) ; + //float *binnedData=(float*)malloc(nEvents*panels*rows*cols*sizeof(float)) ; + + size_t e,p,r,c,pk,ri,ci,br,bc,roii,bi; + /* + printf("AMG : exafelSZ_Compress : pr->numPeaks = %d\n",pr->numPeaks); + printf("S:\n"); + for(e=0;enumPeaks;e++) + printf("%d ",pr->peaksSegs[e]); + printf("\nR:\n"); + for(e=0;enumPeaks;e++) + printf("%d ",pr->peaksRows[e]); + printf("\nC:\n"); + for(e=0;enumPeaks;e++) + printf("%d ",pr->peaksCols[e]); + printf("\n"); + */ + + //Generate the ROI mask: NOTE: 0 means affirmative in ROI mask! This comes from the python scripts! + //First, initialize with calibration panel: + for(e=0;ecalibPanel[calcIdx_2D(r,c,cols)]; //calibPanel is a single segment copied over all the event(image) + roiM[calcIdx_4D(e,p,r,c,panels,rows,cols)]=pr->calibPanel[calcIdx_3D(p,r,c,rows,cols)]; //calibPanel is as big as the event(image) itself + } + } + } + } + //uint64_t peaksBytePos=0; //Position in the peaks buffer + //Now process the peaks and generate the mask: + uint64_t nPeaksTotal=0; //Total number of peaks + for(e=0;epeaks[peaksBytePos]); + //peaksBytePos+=8; + + //peaksBytePos+=8;//Skip the second one! This is due to the problem in Python. + + nPeaksTotal+=pr->numPeaks; + for(pk=0;pknumPeaks;pk++){ + //uint16_t p_=*(uint16_t*)(&pr->peaks[peaksBytePos]); //Panel for the current peak + //peaksBytePos+=2; + //uint16_t r_=*(uint16_t*)(&pr->peaks[peaksBytePos]); //Row for the current peak + //peaksBytePos+=2; + //uint16_t c_=*(uint16_t*)(&pr->peaks[peaksBytePos]); //Col for the current peak + //peaksBytePos+=2; + + uint16_t p_=pr->peaksSegs[pk]; + uint16_t r_=pr->peaksRows[pk]; + uint16_t c_=pr->peaksCols[pk]; + + if(p_>=panels){ + printf("ERROR: Peak coordinate out of bounds: Panel=%d, Valid range: 0,%d\n",(int)p_,(int)panels-1); + assert(0); + printf("Skipping this peak...\n"); + continue; + } + if(r_>=rows){ + printf("ERROR: Peak coordinate out of bounds: Row=%d, Valid range: 0,%d\n",(int)r_,(int)rows-1); + assert(0); + printf("Skipping this peak...\n"); + continue; + } + if(c_>=cols){ + printf("ERROR: Peak coordinate out of bounds: Col=%d, Valid range: 0,%d\n",(int)c_,(int)cols-1); + assert(0); + printf("Skipping this peak...\n"); + continue; + } + + for(ri=r_-pr->peakRadius;ri<=r_+pr->peakRadius;ri++){ //ri: row index. Just a temporary variable. + for(ci=c_-pr->peakRadius;ci<=c_+pr->peakRadius;ci++){ //ci: column index. Just a temporary variable. + if(ribinnedRows;r++){ //Row of the binnedData + for(c=0;cbinnedCols;c++){ //Column of the binnedData + float sum=0; + int nPts=0; + for(br=0;brbinSize;br++) //Bin Row (from origData) + for(bc=0;bcbinSize;bc++) //Bin Column (from origData) + if(r*pr->binSize+brbinSize+bcbinSize+br<<" "<binSize+bc<binSize+br,c*pr->binSize+bc,panels,rows,cols)]; + nPts++; + } + // cout<<"p:"<binnedRows,pr->binnedCols)]=sum/nPts; + } + } + } + } + + //Additional compression using SZ: + size_t szCompressedSize=0; + unsigned char* szComp; + + switch(pr->szDim){ + case 1: + // szComp=sz_compress_3D(binnedData, 0, 0, nEvents * panels * pr->binnedRows * pr->binnedCols, pr->tolerance, szCompressedSize); //1D + szComp=SZ_compress_args(SZ_FLOAT, binnedData, &szCompressedSize, ABS, pr->tolerance, 0, 0, 0, 0,0,0, nEvents * panels * pr->binnedRows * pr->binnedCols); + break; + case 2: + // szComp=sz_compress_3D(binnedData, 0, nEvents * panels * pr->binnedRows, pr->binnedCols, pr->tolerance, szCompressedSize); //2D + szComp=SZ_compress_args(SZ_FLOAT, binnedData, &szCompressedSize, ABS, pr->tolerance, 0, 0, 0, 0,0, nEvents * panels * pr->binnedRows, pr->binnedCols); + break; + case 3: + // szComp=sz_compress_3D(binnedData, nEvents * panels, pr->binnedRows, pr->binnedCols, pr->tolerance, szCompressedSize); //3D + szComp=SZ_compress_args(SZ_FLOAT, binnedData, &szCompressedSize, ABS, pr->tolerance, 0, 0, 0, 0, nEvents * panels, pr->binnedRows, pr->binnedCols); + break; + default: + printf("ERROR: Wrong szDim : %d It must be 1,2 or 3.\n",(int)pr->szDim); + assert(0); + } + + /* + Compressed buffer format: (Types are indicated in parenthesis) + WRITE: nPeaksTotal(uint64_t) (Total number of peaks in this batch) + for(e=0;epeaks[peaksBytePos]); + //peaksBytePos+=8; + ////peaksBytePos+=8;//Skip the second one. This is due to the error in Python! + + //*(uint64_t*)(&compressedBuffer[bytePos])=nPeaks; + *(uint64_t*)(&compressedBuffer[bytePos])=pr->numPeaks; + bytePos+=8; + //for(pk=0;pknumPeaks;pk++){ + //*(uint16_t*)(&compressedBuffer[bytePos])=*(uint16_t*)(&pr->peaks[peaksBytePos]); //Panel for the current peak + //bytePos+=2; + //peaksBytePos+=2; + //*(uint16_t*)(&compressedBuffer[bytePos])=*(uint16_t*)(&pr->peaks[peaksBytePos]); //Row for the current peak + //bytePos+=2; + //peaksBytePos+=2; + //*(uint16_t*)(&compressedBuffer[bytePos])=*(uint16_t*)(&pr->peaks[peaksBytePos]); //Column for the current peak + //bytePos+=2; + //peaksBytePos+=2; + + *(uint16_t*)(&compressedBuffer[bytePos])=pr->peaksSegs[pk]; //Panel for the current peak + bytePos+=2; + *(uint16_t*)(&compressedBuffer[bytePos])=pr->peaksRows[pk]; //Row for the current peak + bytePos+=2; + *(uint16_t*)(&compressedBuffer[bytePos])=pr->peaksCols[pk]; //Column for the current peak + bytePos+=2; + } + } + // cout<<"peaks"<=rows*cols){ + printf("ERROR: calcIdx_2D(r,c,cols) = calcIdx_2D(%d,%d,%d) = %d",(int)r,(int)c,(int)cols,(int)calcIdx_2D(r,c,cols)); + printf(" is NOT in the correct range: [0,%ld]",(int)rows*cols-1); + assert(0); + } + if(calcIdx_4D(e,p,r,c,panels,rows,cols)<0 ||calcIdx_4D(e,p,r,c,panels,rows,cols)>=nEvents*panels*rows*cols){ + printf("ERROR: calcIdx_4D(e,p,r,c,panels,rows,cols) = calcIdx_4D(%d,%d,%d,%d,%d,%d,%d) = %d",(int)e,(int)p,(int)r,(int)c,(int)panels,(int)rows,(int)cols,(int)calcIdx_4D(e,p,r,c,panels,rows,cols)); + assert(0); + } + //roiM[calcIdx_4D(e,p,r,c,panels,rows,cols)]=pr->calibPanel[calcIdx_2D(r,c,cols)]; //calibPanel is a single segment copied over all the event(image) + roiM[calcIdx_4D(e,p,r,c,panels,rows,cols)]=pr->calibPanel[calcIdx_3D(p,r,c,rows,cols)]; //calibPanel is as big as the event(image) itself + } + } + } + } + uint64_t peaksBytePos=0; //Position in the peaks buffer + //Now process the peaks and generate the mask: + for(e=0;e=panels){ + printf("ERROR: Peak coordinate out of bounds: Panel=%d, Valid range: 0,%d\n",(int)p_,(int)panels-1); + assert(0); + printf("Skipping this peak...\n"); + continue; + } + if(r_>=rows){ + printf("ERROR: Peak coordinate out of bounds: Row=%d, Valid range: 0,%d\n",(int)r_,(int)rows-1); + assert(0); + printf("Skipping this peak...\n"); + continue; + } + if(c_>=cols){ + printf("ERROR: Peak coordinate out of bounds: Col=%d, Valid range: 0,%d\n",(int)c_,(int)cols-1); + assert(0); + printf("Skipping this peak...\n"); + continue; + } + + for(ri=r_-pr->peakRadius;ri<=r_+pr->peakRadius;ri++){ //ri: row index. Just a temporary variable. + for(ci=c_-pr->peakRadius;ci<=c_+pr->peakRadius;ci++){ //ci: column index. Just a temporary variable. + if(ri>=0 && ri=0 && ciszDim){ + case 1: + szDecomp=SZ_decompress(SZ_FLOAT,szComp,_szCompressedSize,0,0,0,0, nEvents * panels * pr->binnedRows * pr->binnedCols); + break; + case 2: + szDecomp=SZ_decompress(SZ_FLOAT,szComp,_szCompressedSize,0,0,0, nEvents * panels * pr->binnedRows, pr->binnedCols); + break; + case 3: + szDecomp=SZ_decompress(SZ_FLOAT,szComp,_szCompressedSize,0,0,nEvents * panels, pr->binnedRows, pr->binnedCols); + break; + default: + printf("ERROR: Wrong szDim : %d It must be 1,2 or 3.\n",(int)pr->szDim); + assert(0); + } + //szDecomp=(void*)malloc(nEvents*panels*rows*cols*sizeof(float)); + + // double max_err = 0; + // for(int i=0; ibinnedRows * pr->binnedCols; i++){ + // double err = fabs(szDecomp[i]-binnedData[i]); + // if(err > max_err) max_err = err; + // } + // cout << "Max err = \t\t\t" << max_err << endl; + + + //De-binning: + for(e=0;ebinnedRows;r++) //Row of the binnedData + for(c=0;cbinnedCols;c++) //Column of the binnedData + for(br=0;brbinSize;br++) //Bin Row (from origData) + for(bc=0;bcbinSize;bc++) //Bin Column (from origData) + if(r*pr->binSize+brbinSize+bcbinSize+br,c*pr->binSize+bc,panels,rows,cols)] = szDecomp[calcIdx_4D(e,p,r,c,panels,pr->binnedRows,pr->binnedCols)]; + } + //Restore ROI: + uint64_t current=0; + for(e=0;e +#include "iniparser.h" + +/*---------------------------- Defines -------------------------------------*/ +#define ASCIILINESZ (1024) +#define INI_INVALID_KEY ((char*)-1) + +/*--------------------------------------------------------------------------- + Private to this module + ---------------------------------------------------------------------------*/ +/** + * This enum stores the status for each parsed line (internal use only). + */ +typedef enum _line_status_ { + LINE_UNPROCESSED, + LINE_ERROR, + LINE_EMPTY, + LINE_COMMENT, + LINE_SECTION, + LINE_VALUE +} line_status ; + +/*-------------------------------------------------------------------------*/ +/** + @brief Convert a string to lowercase. + @param s String to convert. + @return ptr to statically allocated string. + + This function returns a pointer to a statically allocated string + containing a lowercased version of the input string. Do not free + or modify the returned string! Since the returned string is statically + allocated, it will be modified at each function call (not re-entrant). + */ +/*--------------------------------------------------------------------------*/ +static char * strlwc(const char * s) +{ + static char l[ASCIILINESZ+1]; + int i ; + + if (s==NULL) return NULL ; + memset(l, 0, ASCIILINESZ+1); + i=0 ; + while (s[i] && i l) { + if (!isspace((int)*(last-1))) + break ; + last -- ; + } + *last = (char)0; + return (char*)l ; +} + +/*-------------------------------------------------------------------------*/ +/** + @brief Get number of sections in a dictionary + @param d Dictionary to examine + @return int Number of sections found in dictionary + + This function returns the number of sections found in a dictionary. + The test to recognize sections is done on the string stored in the + dictionary: a section name is given as "section" whereas a key is + stored as "section:key", thus the test looks for entries that do not + contain a colon. + + This clearly fails in the case a section name contains a colon, but + this should simply be avoided. + + This function returns -1 in case of error. + */ +/*--------------------------------------------------------------------------*/ +int iniparser_getnsec(dictionary * d) +{ + int i ; + int nsec ; + + if (d==NULL) return -1 ; + nsec=0 ; + for (i=0 ; isize ; i++) { + if (d->key[i]==NULL) + continue ; + if (strchr(d->key[i], ':')==NULL) { + nsec ++ ; + } + } + return nsec ; +} + +/*-------------------------------------------------------------------------*/ +/** + @brief Get name for section n in a dictionary. + @param d Dictionary to examine + @param n Section number (from 0 to nsec-1). + @return Pointer to char string + + This function locates the n-th section in a dictionary and returns + its name as a pointer to a string statically allocated inside the + dictionary. Do not free or modify the returned string! + + This function returns NULL in case of error. + */ +/*--------------------------------------------------------------------------*/ +char * iniparser_getsecname(dictionary * d, int n) +{ + int i ; + int foundsec ; + + if (d==NULL || n<0) return NULL ; + foundsec=0 ; + for (i=0 ; isize ; i++) { + if (d->key[i]==NULL) + continue ; + if (strchr(d->key[i], ':')==NULL) { + foundsec++ ; + if (foundsec>n) + break ; + } + } + if (foundsec<=n) { + return NULL ; + } + return d->key[i] ; +} + +/*-------------------------------------------------------------------------*/ +/** + @brief Dump a dictionary to an opened file pointer. + @param d Dictionary to dump. + @param f Opened file pointer to dump to. + @return void + + This function prints out the contents of a dictionary, one element by + line, onto the provided file pointer. It is OK to specify @c stderr + or @c stdout as output files. This function is meant for debugging + purposes mostly. + */ +/*--------------------------------------------------------------------------*/ +void iniparser_dump(dictionary * d, FILE * f) +{ + int i ; + + if (d==NULL || f==NULL) return ; + for (i=0 ; isize ; i++) { + if (d->key[i]==NULL) + continue ; + if (d->val[i]!=NULL) { + fprintf(f, "[%s]=[%s]\n", d->key[i], d->val[i]); + } else { + fprintf(f, "[%s]=UNDEF\n", d->key[i]); + } + } + return ; +} + +/*-------------------------------------------------------------------------*/ +/** + @brief Save a dictionary to a loadable ini file + @param d Dictionary to dump + @param f Opened file pointer to dump to + @return void + + This function dumps a given dictionary into a loadable ini file. + It is Ok to specify @c stderr or @c stdout as output files. + */ +/*--------------------------------------------------------------------------*/ +void iniparser_dump_ini(dictionary * d, FILE * f) +{ + int i ; + int nsec ; + char * secname ; + + if (d==NULL || f==NULL) return ; + + nsec = iniparser_getnsec(d); + if (nsec<1) { + /* No section in file: dump all keys as they are */ + for (i=0 ; isize ; i++) { + if (d->key[i]==NULL) + continue ; + fprintf(f, "%s = %s\n", d->key[i], d->val[i]); + } + return ; + } + for (i=0 ; isize ; j++) { + if (d->key[j]==NULL) + continue ; + if (!strncmp(d->key[j], keym, seclen+1)) { + fprintf(f, + "%-30s = %s\n", + d->key[j]+seclen+1, + d->val[j] ? d->val[j] : ""); + } + } + fprintf(f, "\n"); + return ; +} + +/*-------------------------------------------------------------------------*/ +/** + @brief Get the number of keys in a section of a dictionary. + @param d Dictionary to examine + @param s Section name of dictionary to examine + @return Number of keys in section + */ +/*--------------------------------------------------------------------------*/ +int iniparser_getsecnkeys(dictionary * d, char * s) +{ + int seclen, nkeys ; + char keym[ASCIILINESZ+1]; + int j ; + + nkeys = 0; + + if (d==NULL) return nkeys; + if (! iniparser_find_entry(d, s)) return nkeys; + + seclen = (int)strlen(s); + sprintf(keym, "%s:", s); + + for (j=0 ; jsize ; j++) { + if (d->key[j]==NULL) + continue ; + if (!strncmp(d->key[j], keym, seclen+1)) + nkeys++; + } + + return nkeys; + +} + +/*-------------------------------------------------------------------------*/ +/** + @brief Get the number of keys in a section of a dictionary. + @param d Dictionary to examine + @param s Section name of dictionary to examine + @return pointer to statically allocated character strings + + This function queries a dictionary and finds all keys in a given section. + Each pointer in the returned char pointer-to-pointer is pointing to + a string allocated in the dictionary; do not free or modify them. + + This function returns NULL in case of error. + */ +/*--------------------------------------------------------------------------*/ +char ** iniparser_getseckeys(dictionary * d, char * s) +{ + + char **keys; + + int i, j ; + char keym[ASCIILINESZ+1]; + int seclen, nkeys ; + + keys = NULL; + + if (d==NULL) return keys; + if (! iniparser_find_entry(d, s)) return keys; + + nkeys = iniparser_getsecnkeys(d, s); + + keys = (char**) malloc(nkeys*sizeof(char*)); + + seclen = (int)strlen(s); + sprintf(keym, "%s:", s); + + i = 0; + + for (j=0 ; jsize ; j++) { + if (d->key[j]==NULL) + continue ; + if (!strncmp(d->key[j], keym, seclen+1)) { + keys[i] = d->key[j]; + i++; + } + } + + return keys; + +} + +/*-------------------------------------------------------------------------*/ +/** + @brief Get the string associated to a key + @param d Dictionary to search + @param key Key string to look for + @param def Default value to return if key not found. + @return pointer to statically allocated character string + + This function queries a dictionary for a key. A key as read from an + ini file is given as "section:key". If the key cannot be found, + the pointer passed as 'def' is returned. + The returned char pointer is pointing to a string allocated in + the dictionary, do not free or modify it. + */ +/*--------------------------------------------------------------------------*/ +char * iniparser_getstring(dictionary * d, const char * key, char * def) +{ + char * lc_key ; + char * sval ; + + if (d==NULL || key==NULL) + return def ; + + lc_key = strlwc(key); + sval = dictionary_get(d, lc_key, def); + return sval ; +} + +/*-------------------------------------------------------------------------*/ +/** + @brief Get the string associated to a key, convert to an int + @param d Dictionary to search + @param key Key string to look for + @param notfound Value to return in case of error + @return integer + + This function queries a dictionary for a key. A key as read from an + ini file is given as "section:key". If the key cannot be found, + the notfound value is returned. + + Supported values for integers include the usual C notation + so decimal, octal (starting with 0) and hexadecimal (starting with 0x) + are supported. Examples: + + "42" -> 42 + "042" -> 34 (octal -> decimal) + "0x42" -> 66 (hexa -> decimal) + + Warning: the conversion may overflow in various ways. Conversion is + totally outsourced to strtol(), see the associated man page for overflow + handling. + + Credits: Thanks to A. Becker for suggesting strtol() + */ +/*--------------------------------------------------------------------------*/ +int iniparser_getint(dictionary * d, const char * key, int notfound) +{ + char * str ; + + str = iniparser_getstring(d, key, INI_INVALID_KEY); + if (str==INI_INVALID_KEY) return notfound ; + return (int)strtol(str, NULL, 0); +} + +/*-------------------------------------------------------------------------*/ +/** + @brief Get the string associated to a key, convert to a long + @param d Dictionary to search + @param key Key string to look for + @param notfound Value to return in case of error + @return long + + Credits: This function bases completely on int iniparser_getint and was + slightly modified to return long instead of int. + */ +/*--------------------------------------------------------------------------*/ +long iniparser_getlint(dictionary * d, const char * key, int notfound) +{ + char * str ; + + str = iniparser_getstring(d, key, INI_INVALID_KEY); + if (str==INI_INVALID_KEY) return notfound ; + return strtol(str, NULL, 0); +} + +/*-------------------------------------------------------------------------*/ +/** + @brief Get the string associated to a key, convert to a double + @param d Dictionary to search + @param key Key string to look for + @param notfound Value to return in case of error + @return double + + This function queries a dictionary for a key. A key as read from an + ini file is given as "section:key". If the key cannot be found, + the notfound value is returned. + */ +/*--------------------------------------------------------------------------*/ +double iniparser_getdouble(dictionary * d, const char * key, double notfound) +{ + char * str ; + + str = iniparser_getstring(d, key, INI_INVALID_KEY); + if (str==INI_INVALID_KEY) return notfound ; + return atof(str); +} + +/*-------------------------------------------------------------------------*/ +/** + @brief Get the string associated to a key, convert to a boolean + @param d Dictionary to search + @param key Key string to look for + @param notfound Value to return in case of error + @return integer + + This function queries a dictionary for a key. A key as read from an + ini file is given as "section:key". If the key cannot be found, + the notfound value is returned. + + A true boolean is found if one of the following is matched: + + - A string starting with 'y' + - A string starting with 'Y' + - A string starting with 't' + - A string starting with 'T' + - A string starting with '1' + + A false boolean is found if one of the following is matched: + + - A string starting with 'n' + - A string starting with 'N' + - A string starting with 'f' + - A string starting with 'F' + - A string starting with '0' + + The notfound value returned if no boolean is identified, does not + necessarily have to be 0 or 1. + */ +/*--------------------------------------------------------------------------*/ +int iniparser_getboolean(dictionary * d, const char * key, int notfound) +{ + char * c ; + int ret ; + + c = iniparser_getstring(d, key, INI_INVALID_KEY); + if (c==INI_INVALID_KEY) return notfound ; + if (c[0]=='y' || c[0]=='Y' || c[0]=='1' || c[0]=='t' || c[0]=='T') { + ret = 1 ; + } else if (c[0]=='n' || c[0]=='N' || c[0]=='0' || c[0]=='f' || c[0]=='F') { + ret = 0 ; + } else { + ret = notfound ; + } + return ret; +} + +/*-------------------------------------------------------------------------*/ +/** + @brief Finds out if a given entry exists in a dictionary + @param ini Dictionary to search + @param entry Name of the entry to look for + @return integer 1 if entry exists, 0 otherwise + + Finds out if a given entry exists in the dictionary. Since sections + are stored as keys with NULL associated values, this is the only way + of querying for the presence of sections in a dictionary. + */ +/*--------------------------------------------------------------------------*/ +int iniparser_find_entry( + dictionary * ini, + const char * entry +) +{ + int found=0 ; + if (iniparser_getstring(ini, entry, INI_INVALID_KEY)!=INI_INVALID_KEY) { + found = 1 ; + } + return found ; +} + +/*-------------------------------------------------------------------------*/ +/** + @brief Set an entry in a dictionary. + @param ini Dictionary to modify. + @param entry Entry to modify (entry name) + @param val New value to associate to the entry. + @return int 0 if Ok, -1 otherwise. + + If the given entry can be found in the dictionary, it is modified to + contain the provided value. If it cannot be found, -1 is returned. + It is Ok to set val to NULL. + */ +/*--------------------------------------------------------------------------*/ +int iniparser_set(dictionary * ini, const char * entry, const char * val) +{ + return dictionary_set(ini, strlwc(entry), val) ; +} + +/*-------------------------------------------------------------------------*/ +/** + @brief Delete an entry in a dictionary + @param ini Dictionary to modify + @param entry Entry to delete (entry name) + @return void + + If the given entry can be found, it is deleted from the dictionary. + */ +/*--------------------------------------------------------------------------*/ +void iniparser_unset(dictionary * ini, const char * entry) +{ + dictionary_unset(ini, strlwc(entry)); +} + +/*-------------------------------------------------------------------------*/ +/** + @brief Load a single line from an INI file + @param input_line Input line, may be concatenated multi-line input + @param section Output space to store section + @param key Output space to store key + @param value Output space to store value + @return line_status value + */ +/*--------------------------------------------------------------------------*/ +static line_status iniparser_line( + const char * input_line, + char * section, + char * key, + char * value) +{ + line_status sta ; + char line[ASCIILINESZ+1]; + int len ; + + memset(line, 0, ASCIILINESZ + 1); + len = (int)strlen(strstrip(input_line)); + if (len > ASCIILINESZ) + len = ASCIILINESZ; + strncpy(line, strstrip(input_line), len); + len = (int)strlen(line); + + sta = LINE_UNPROCESSED ; + if (len<1) { + /* Empty line */ + sta = LINE_EMPTY ; + } else if (line[0]=='#' || line[0]==';') { + /* Comment line */ + sta = LINE_COMMENT ; + } else if (line[0]=='[' && line[len-1]==']') { + /* Section name */ + sscanf(line, "[%[^]]", section); + strcpy(section, strstrip(section)); + strcpy(section, strlwc(section)); + sta = LINE_SECTION ; + } else if (sscanf (line, "%[^=] = \"%[^\"]\"", key, value) == 2 + || sscanf (line, "%[^=] = '%[^\']'", key, value) == 2 + || sscanf (line, "%[^=] = %[^;#]", key, value) == 2) { + /* Usual key=value, with or without comments */ + strcpy(key, strstrip(key)); + strcpy(key, strlwc(key)); + strcpy(value, strstrip(value)); + /* + * sscanf cannot handle '' or "" as empty values + * this is done here + */ + if (!strcmp(value, "\"\"") || (!strcmp(value, "''"))) { + value[0]=0 ; + } + sta = LINE_VALUE ; + } else if (sscanf(line, "%[^=] = %[;#]", key, value)==2 + || sscanf(line, "%[^=] %[=]", key, value) == 2) { + /* + * Special cases: + * key= + * key=; + * key=# + */ + strcpy(key, strstrip(key)); + strcpy(key, strlwc(key)); + value[0]=0 ; + sta = LINE_VALUE ; + } else { + /* Generate syntax error */ + sta = LINE_ERROR ; + printf("===== > %s ===> %s\n", input_line, line); + } + return sta ; +} + +/*-------------------------------------------------------------------------*/ +/** + @brief Parse an ini file and return an allocated dictionary object + @param ininame Name of the ini file to read. + @return Pointer to newly allocated dictionary + + This is the parser for ini files. This function is called, providing + the name of the file to be read. It returns a dictionary object that + should not be accessed directly, but through accessor functions + instead. + + The returned dictionary must be freed using iniparser_freedict(). + */ +/*--------------------------------------------------------------------------*/ +dictionary * iniparser_load(const char * ininame) +{ + FILE * in ; + + char line [ASCIILINESZ+1] ; + char section [ASCIILINESZ+1] ; + char key [ASCIILINESZ+1] ; + char tmp [ASCIILINESZ+1] ; + char val [ASCIILINESZ+1] ; + + int last=0 ; + int len ; + int lineno=0 ; + int errs=0; + + dictionary * dict ; + + if ((in=fopen(ininame, "r"))==NULL) { + fprintf(stderr, "iniparser: cannot open %s\n", ininame); + return NULL ; + } + + dict = dictionary_new(0) ; + if (!dict) { + fclose(in); + return NULL ; + } + + memset(line, 0, ASCIILINESZ); + memset(section, 0, ASCIILINESZ); + memset(key, 0, ASCIILINESZ); + memset(val, 0, ASCIILINESZ); + last=0 ; + + while (fgets(line+last, ASCIILINESZ-last, in)!=NULL) { + lineno++ ; + len = (int)strlen(line)-1; + if (len==0) + continue; + /* Safety check against buffer overflows */ + if (line[len]!='\n') { + fprintf(stderr, + "iniparser: input line too long in %s (%d)\n", + ininame, + lineno); + dictionary_del(dict); + fclose(in); + return NULL ; + } + /* Get rid of \n and spaces at end of line */ + while ((len>=0) && + ((line[len]=='\n') || (isspace(line[len])))) { + line[len]=0 ; + len-- ; + } + /* Detect multi-line */ + if (line[len]=='\\') { + /* Multi-line value */ + last=len ; + continue ; + } else { + last=0 ; + } + switch (iniparser_line(line, section, key, val)) { + case LINE_EMPTY: + case LINE_COMMENT: + break ; + + case LINE_SECTION: + errs = dictionary_set(dict, section, NULL); + break ; + + case LINE_VALUE: + sprintf(tmp, "%s:%s", section, key); + errs = dictionary_set(dict, tmp, val) ; + break ; + + case LINE_ERROR: + fprintf(stderr, "iniparser: syntax error in %s (%d):\n", + ininame, + lineno); + fprintf(stderr, "-> %s\n", line); + errs++ ; + break; + + default: + break ; + } + memset(line, 0, ASCIILINESZ); + last=0; + if (errs<0) { + fprintf(stderr, "iniparser: memory allocation failure\n"); + break ; + } + } + if (errs) { + dictionary_del(dict); + dict = NULL ; + } + fclose(in); + return dict ; +} + +/*-------------------------------------------------------------------------*/ +/** + @brief Free all memory associated to an ini dictionary + @param d Dictionary to free + @return void + + Free all memory associated to an ini dictionary. + It is mandatory to call this function before the dictionary object + gets out of the current context. + */ +/*--------------------------------------------------------------------------*/ +void iniparser_freedict(dictionary * d) +{ + dictionary_del(d); +} + +/* vim: set ts=4 et sw=4 tw=75 */ diff --git a/deps/SZ/sz/src/pastri.c b/deps/SZ/sz/src/pastri.c new file mode 100644 index 0000000000000000000000000000000000000000..7c6908b5f35360351c74bd994e4cf540cf54953c --- /dev/null +++ b/deps/SZ/sz/src/pastri.c @@ -0,0 +1,87 @@ +#include "pastri.h" +#include "pastriD.h" +#include "pastriF.h" + +void SZ_pastriReadParameters(char paramsFilename[512],pastri_params *paramsPtr){ + FILE *paramsF; + paramsF=fopen(paramsFilename,"r"); + + if(paramsF==NULL){ + printf("ERROR: Parameters file cannot be opened.\n"); + printf("Filename: %s\n",paramsFilename); + assert(0); + } + + fscanf(paramsF,"%d %d %d %d %lf %d %d",¶msPtr->bf[0],¶msPtr->bf[1],¶msPtr->bf[2],¶msPtr->bf[3],¶msPtr->originalEb,¶msPtr->dataSize,¶msPtr->numBlocks); + //printf("Params: %d %d %d %d %.3e %d\n",paramsPtr->bf[0],paramsPtr->bf[1],paramsPtr->bf[2],paramsPtr->bf[3],paramsPtr->originalEb,paramsPtr->numBlocks); + fclose(paramsF); +} + +void SZ_pastriPreprocessParameters(pastri_params *p){ + //Preprocess by calculating some pastri_params: + //Calculate sbSize, sbNum, etc.: + p->idxRange[0]=(p->bf[0]+1)*(p->bf[0]+2)/2; + p->idxRange[1]=(p->bf[1]+1)*(p->bf[1]+2)/2; + p->idxRange[2]=(p->bf[2]+1)*(p->bf[2]+2)/2; + p->idxRange[3]=(p->bf[3]+1)*(p->bf[3]+2)/2; + p->sbSize=p->idxRange[2]*p->idxRange[3]; + p->sbNum=p->idxRange[0]*p->idxRange[1]; + p->bSize=p->sbSize*p->sbNum; + p->usedEb=p->originalEb*0.999; //This is needed just to eliminate some rounding errors. It has almost no effect on compression rate/ratios. +} + +void SZ_pastriCompressBatch(pastri_params *p,unsigned char *originalBuf, unsigned char** compressedBufP,size_t *compressedBytes){ + (*compressedBufP) = (unsigned char*)calloc(p->numBlocks*p->bSize*p->dataSize,sizeof(char)); + int bytes; //bytes for this block + int i; + size_t bytePos=0; //Current byte pos in the outBuf + + memcpy(*compressedBufP, p, sizeof(pastri_params)); + bytePos+=sizeof(pastri_params); + + for(i=0;inumBlocks;i++){ + if(p->dataSize==8){ + pastri_double_Compress(originalBuf + (i*p->bSize*p->dataSize),p,(*compressedBufP) + bytePos,&bytes); + }else if(p->dataSize==4){ + pastri_float_Compress(originalBuf + (i*p->bSize*p->dataSize),p,(*compressedBufP) + bytePos,&bytes); + } + bytePos+=bytes; + //printf("bytes:%d\n",bytes); + } + *compressedBytes=bytePos; + //printf("totalBytesWritten:%d\n",*compressedBytes); +} + +void SZ_pastriDecompressBatch(unsigned char*compressedBuf, pastri_params *p, unsigned char** decompressedBufP ,size_t *decompressedBytes){ + int bytePos=0; //Current byte pos in the outBuf + memcpy(p, compressedBuf, sizeof(pastri_params)); + bytePos+=sizeof(pastri_params); + + (*decompressedBufP) = (unsigned char*)malloc(p->numBlocks*p->bSize*p->dataSize*sizeof(char)); + int bytes; //bytes for this block + int i; + + for(i=0;inumBlocks;i++){ + if(p->dataSize==8){ + pastri_double_Decompress(compressedBuf + bytePos,p->dataSize,p,(*decompressedBufP) + (i*p->bSize*p->dataSize),&bytes); + }else if(p->dataSize==4){ + pastri_float_Decompress(compressedBuf + bytePos,p->dataSize,p,(*decompressedBufP) + (i*p->bSize*p->dataSize),&bytes); + } + + bytePos += bytes; + //printf("bytes:%d\n",bytes); + } + //printf("totalBytesRead:%d\n",bytePos); + *decompressedBytes=p->numBlocks*p->bSize*p->dataSize; +} + +void SZ_pastriCheckBatch(pastri_params *p,unsigned char*originalBuf,unsigned char*decompressedBuf){ + int i; + for(i=0;inumBlocks;i++){ + if(p->dataSize==8){ + pastri_double_Check(originalBuf+(i*p->bSize*p->dataSize),p->dataSize,decompressedBuf+(i*p->bSize*p->dataSize),p); + }else if(p->dataSize==4){ + pastri_float_Check(originalBuf+(i*p->bSize*p->dataSize),p->dataSize,decompressedBuf+(i*p->bSize*p->dataSize),p); + } + } +} diff --git a/deps/SZ/sz/src/rw.c b/deps/SZ/sz/src/rw.c new file mode 100644 index 0000000000000000000000000000000000000000..c023645597dcbe2bbaede608e53d9eb7bbc529b0 --- /dev/null +++ b/deps/SZ/sz/src/rw.c @@ -0,0 +1,1070 @@ +/** + * @file rw.c + * @author Sheng Di + * @date April, 2015 + * @brief io interface for fortrance + * (C) 2015 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include +#include + +#include "rw.h" +#include "sz.h" + +int checkFileExistance(char* filePath) +{ + if( access( filePath, F_OK ) != -1 ) { + // file exists + return 1; + } else { + // file doesn't exist + return 0; + } +} + +float** create2DArray_float(size_t m, size_t n) +{ + size_t i=0; + float **data = (float**)malloc(sizeof(float*)*m); + for(i=0;i +#include +#include +#include "rw.h" + +void checkfilesizec_(char *srcFilePath, int *len, size_t *filesize) +{ + int i; + int status; + char s[*len+1]; + for(i=0;i<*len;i++) + s[i]=srcFilePath[i]; + s[*len]='\0'; + *filesize = checkFileSize(s, &status); +} + +void readbytefile_(char *srcFilePath, int *len, unsigned char *bytes, size_t *byteLength) +{ + size_t i; + int ierr; + char s[*len+1]; + for(i=0;i<*len;i++) + s[i]=srcFilePath[i]; + s[*len]='\0'; + unsigned char *tmp_bytes = readByteData(s, byteLength, &ierr); + memcpy(bytes, tmp_bytes, *byteLength); + free(tmp_bytes); +} + +void readdoublefile_(char *srcFilePath, int *len, double *data, size_t *nbEle) +{ + size_t i; + int ierr; + char s[*len+1]; + for(i=0;i<*len;i++) + s[i]=srcFilePath[i]; + s[*len]='\0'; + double *tmp_data = readDoubleData(s, nbEle, &ierr); + memcpy(data, tmp_data, *nbEle); + free(tmp_data); +} + +void readfloatfile_(char *srcFilePath, int *len, float *data, size_t *nbEle) +{ + size_t i; + int ierr; + char s[*len+1]; + for(i=0;i<*len;i++) + s[i]=srcFilePath[i]; + s[*len]='\0'; + float *tmp_data = readFloatData(s, nbEle, &ierr); + memcpy(data, tmp_data, *nbEle); + free(tmp_data); +} + +void writebytefile_(unsigned char *bytes, size_t *byteLength, char *tgtFilePath, int *len) +{ + size_t i; + int ierr; + char s[*len+1]; + for(i=0;i<*len;i++) + s[i]=tgtFilePath[i]; + s[*len]='\0'; + writeByteData(bytes, *byteLength, s, &ierr); +} + +void writedoublefile_(double *data, size_t *nbEle, char *tgtFilePath, int *len) +{ + size_t i; + int ierr; + char s[*len+1]; + for(i=0;i<*len;i++) + s[i]=tgtFilePath[i]; + s[*len]='\0'; + writeDoubleData(data, *nbEle, s, &ierr); +} + +void writefloatfile_(float *data, size_t *nbEle, char *tgtFilePath, int *len) +{ + size_t i; + int ierr; + char s[*len+1]; + for(i=0;i<*len;i++) + s[i]=tgtFilePath[i]; + s[*len]='\0'; + writeFloatData(data, *nbEle, s, &ierr); +} diff --git a/deps/SZ/sz/src/sz.c b/deps/SZ/sz/src/sz.c new file mode 100644 index 0000000000000000000000000000000000000000..0567a6fc4486bdced2180a8175e5fea292c5ce4e --- /dev/null +++ b/deps/SZ/sz/src/sz.c @@ -0,0 +1,1353 @@ +/** + * @file sz.c + * @author Sheng Di and Dingwen Tao + * @date Aug, 2016 + * @brief SZ_Init, Compression and Decompression functions + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + + +#include +#include +#include +#include +#include "sz.h" +#include "CompressElement.h" +#include "DynamicByteArray.h" +#include "DynamicIntArray.h" +#include "TightDataPointStorageD.h" +#include "TightDataPointStorageF.h" +#include "zlib.h" +#include "rw.h" +#include "Huffman.h" +#include "conf.h" +#include "utility.h" +#include "exafelSZ.h" +//#include "CurveFillingCompressStorage.h" + +int versionNumber[4] = {SZ_VER_MAJOR,SZ_VER_MINOR,SZ_VER_BUILD,SZ_VER_REVISION}; +//int SZ_SIZE_TYPE = 8; + +int dataEndianType = LITTLE_ENDIAN_DATA; //*endian type of the data read from disk +int sysEndianType; //*sysEndianType is actually set automatically. + +//the confparams should be separate between compression and decopmression, in case of mutual-affection when calling compression/decompression alternatively +sz_params *confparams_cpr = NULL; //used for compression +sz_params *confparams_dec = NULL; //used for decompression + +sz_exedata *exe_params = NULL; + +/*following global variables are desgined for time-series based compression*/ +/*sz_varset is not used in the single-snapshot data compression*/ +SZ_VarSet* sz_varset = NULL; +sz_multisteps *multisteps = NULL; +sz_tsc_metadata *sz_tsc = NULL; + +//only for Pastri compressor +#ifdef PASTRI +pastri_params pastri_par; +#endif + +HuffmanTree* SZ_Reset() +{ + return createDefaultHuffmanTree(); +} + +int SZ_Init(const char *configFilePath) +{ + int loadFileResult = SZ_LoadConf(configFilePath); + if(loadFileResult==SZ_NSCS) + return SZ_NSCS; + + exe_params->SZ_SIZE_TYPE = sizeof(size_t); + + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + { + initSZ_TSC(); + } + return SZ_SCES; +} + +int SZ_Init_Params(sz_params *params) +{ + SZ_Init(NULL); + + if(params->losslessCompressor!=GZIP_COMPRESSOR && params->losslessCompressor!=ZSTD_COMPRESSOR) + params->losslessCompressor = ZSTD_COMPRESSOR; + + if(params->max_quant_intervals > 0) + params->maxRangeRadius = params->max_quant_intervals/2; + + memcpy(confparams_cpr, params, sizeof(sz_params)); + + if(params->quantization_intervals%2!=0) + { + printf("Error: quantization_intervals must be an even number!\n"); + return SZ_NSCS; + } + + return SZ_SCES; +} + +int computeDimension(size_t r5, size_t r4, size_t r3, size_t r2, size_t r1) +{ + int dimension; + if(r1==0) + { + dimension = 0; + } + else if(r2==0) + { + dimension = 1; + } + else if(r3==0) + { + dimension = 2; + } + else if(r4==0) + { + dimension = 3; + } + else if(r5==0) + { + dimension = 4; + } + else + { + dimension = 5; + } + return dimension; +} + +size_t computeDataLength(size_t r5, size_t r4, size_t r3, size_t r2, size_t r1) +{ + size_t dataLength; + if(r1==0) + { + dataLength = 0; + } + else if(r2==0) + { + dataLength = r1; + } + else if(r3==0) + { + dataLength = r1*r2; + } + else if(r4==0) + { + dataLength = r1*r2*r3; + } + else if(r5==0) + { + dataLength = r1*r2*r3*r4; + } + else + { + dataLength = r1*r2*r3*r4*r5; + } + return dataLength; +} + +/*-------------------------------------------------------------------------*/ +/** + @brief Perform Compression + @param data data to be compressed + @param outSize the size (in bytes) after compression + @param r5,r4,r3,r2,r1 the sizes of each dimension (supporting only 5 dimensions at most in this version. + @return compressed data (in binary stream) or NULL(0) if any errors + + **/ +/*-------------------------------------------------------------------------*/ +unsigned char* SZ_compress_args(int dataType, void *data, size_t *outSize, int errBoundMode, double absErrBound, +double relBoundRatio, double pwrBoundRatio, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1) +{ + if(confparams_cpr == NULL) + SZ_Init(NULL); + else if(exe_params == NULL) + { + exe_params = (sz_exedata*)malloc(sizeof(sz_exedata)); + memset(exe_params, 0, sizeof(sz_exedata)); + } + if(exe_params->intvCapacity == 0) + { + exe_params->intvCapacity = confparams_cpr->maxRangeRadius*2; + exe_params->intvRadius = confparams_cpr->maxRangeRadius; + exe_params->optQuantMode = 1; + } + + confparams_cpr->dataType = dataType; + if(dataType==SZ_FLOAT) + { + unsigned char *newByteData = NULL; + + SZ_compress_args_float(-1, confparams_cpr->withRegression, &newByteData, (float *)data, r5, r4, r3, r2, r1, + outSize, errBoundMode, absErrBound, relBoundRatio, pwrBoundRatio); + + return newByteData; + } + else if(dataType==SZ_DOUBLE) + { + unsigned char *newByteData; + SZ_compress_args_double(-1, confparams_cpr->withRegression, &newByteData, (double *)data, r5, r4, r3, r2, r1, + outSize, errBoundMode, absErrBound, relBoundRatio, pwrBoundRatio); + + return newByteData; + } + else if(dataType==SZ_INT64) + { + unsigned char *newByteData; + SZ_compress_args_int64(&newByteData, data, r5, r4, r3, r2, r1, outSize, errBoundMode, absErrBound, relBoundRatio); + return newByteData; + } + else if(dataType==SZ_INT32) //int type + { + unsigned char *newByteData; + SZ_compress_args_int32(&newByteData, data, r5, r4, r3, r2, r1, outSize, errBoundMode, absErrBound, relBoundRatio); + return newByteData; + } + else if(dataType==SZ_INT16) + { + unsigned char *newByteData; + SZ_compress_args_int16(&newByteData, data, r5, r4, r3, r2, r1, outSize, errBoundMode, absErrBound, relBoundRatio); + return newByteData; + } + else if(dataType==SZ_INT8) + { + unsigned char *newByteData; + SZ_compress_args_int8(&newByteData, data, r5, r4, r3, r2, r1, outSize, errBoundMode, absErrBound, relBoundRatio); + return newByteData; + } + else if(dataType==SZ_UINT64) + { + unsigned char *newByteData; + SZ_compress_args_uint64(&newByteData, data, r5, r4, r3, r2, r1, outSize, errBoundMode, absErrBound, relBoundRatio); + return newByteData; + } + else if(dataType==SZ_UINT32) //int type + { + unsigned char *newByteData; + SZ_compress_args_uint32(&newByteData, data, r5, r4, r3, r2, r1, outSize, errBoundMode, absErrBound, relBoundRatio); + return newByteData; + } + else if(dataType==SZ_UINT16) + { + unsigned char *newByteData; + SZ_compress_args_uint16(&newByteData, data, r5, r4, r3, r2, r1, outSize, errBoundMode, absErrBound, relBoundRatio); + return newByteData; + } + else if(dataType==SZ_UINT8) + { + unsigned char *newByteData; + SZ_compress_args_uint8(&newByteData, data, r5, r4, r3, r2, r1, outSize, errBoundMode, absErrBound, relBoundRatio); + return newByteData; + } + else + { + printf("Error: dataType can only be SZ_FLOAT, SZ_DOUBLE, SZ_INT8/16/32/64 or SZ_UINT8/16/32/64.\n"); + return NULL; + } +} + +int SZ_compress_args2(int dataType, void *data, unsigned char* compressed_bytes, size_t *outSize, +int errBoundMode, double absErrBound, double relBoundRatio, double pwrBoundRatio, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1) +{ + unsigned char* bytes = SZ_compress_args(dataType, data, outSize, errBoundMode, absErrBound, relBoundRatio, pwrBoundRatio, r5, r4, r3, r2, r1); + memcpy(compressed_bytes, bytes, *outSize); + free(bytes); + return SZ_SCES; +} + +int SZ_compress_args3(int dataType, void *data, unsigned char* compressed_bytes, size_t *outSize, int errBoundMode, double absErrBound, double relBoundRatio, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, +size_t s5, size_t s4, size_t s3, size_t s2, size_t s1, +size_t e5, size_t e4, size_t e3, size_t e2, size_t e1) +{ + confparams_cpr->dataType = dataType; + if(dataType==SZ_FLOAT) + { + SZ_compress_args_float_subblock(compressed_bytes, (float *)data, + r5, r4, r3, r2, r1, + s5, s4, s3, s2, s1, + e5, e4, e3, e2, e1, + outSize, errBoundMode, absErrBound, relBoundRatio); + + return SZ_SCES; + } + else if(dataType==SZ_DOUBLE) + { + SZ_compress_args_double_subblock(compressed_bytes, (double *)data, + r5, r4, r3, r2, r1, + s5, s4, s3, s2, s1, + e5, e4, e3, e2, e1, + outSize, errBoundMode, absErrBound, relBoundRatio); + + return SZ_SCES; + } + else + { + printf("Error (in SZ_compress_args3): dataType can only be SZ_FLOAT or SZ_DOUBLE.\n"); + return SZ_NSCS; + } +} + +unsigned char *SZ_compress(int dataType, void *data, size_t *outSize, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1) +{ + unsigned char *newByteData = SZ_compress_args(dataType, data, outSize, confparams_cpr->errorBoundMode, confparams_cpr->absErrBound, confparams_cpr->relBoundRatio, + confparams_cpr->pw_relBoundRatio, r5, r4, r3, r2, r1); + return newByteData; +} + +////////////////// +/*-------------------------------------------------------------------------*/ +/** + @brief Perform Compression + @param data data to be compressed + @param reservedValue the reserved value + @param outSize the size (in bytes) after compression + @param r5,r4,r3,r2,r1 the sizes of each dimension (supporting only 5 dimensions at most in this version. + @return compressed data (in binary stream) + + **/ +/*-------------------------------------------------------------------------*/ +unsigned char *SZ_compress_rev_args(int dataType, void *data, void *reservedValue, size_t *outSize, int errBoundMode, double absErrBound, double relBoundRatio, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1) +{ + unsigned char *newByteData; + //TODO + printf("SZ compression with reserved data is TO BE DONE LATER.\n"); + exit(0); + + return newByteData; +} + +int SZ_compress_rev_args2(int dataType, void *data, void *reservedValue, unsigned char* compressed_bytes, size_t *outSize, int errBoundMode, double absErrBound, double relBoundRatio, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1) +{ + confparams_cpr->dataType = dataType; + unsigned char* bytes = SZ_compress_rev_args(dataType, data, reservedValue, outSize, errBoundMode, absErrBound, relBoundRatio, r5, r4, r3, r2, r1); + memcpy(compressed_bytes, bytes, *outSize); + free(bytes); //free(bytes) is removed , because of dump error at MIRA system (PPC architecture), fixed? + return 0; +} + +unsigned char *SZ_compress_rev(int dataType, void *data, void *reservedValue, size_t *outSize, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1) +{ + unsigned char *newByteData; + //TODO + printf("SZ compression with reserved data is TO BE DONE LATER.\n"); + exit(0); + + return newByteData; +} + +void *SZ_decompress(int dataType, unsigned char *bytes, size_t byteLength, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1) +{ + if(confparams_dec==NULL) + confparams_dec = (sz_params*)malloc(sizeof(sz_params)); + memset(confparams_dec, 0, sizeof(sz_params)); + if(exe_params==NULL) + exe_params = (sz_exedata*)malloc(sizeof(sz_exedata)); + memset(exe_params, 0, sizeof(sz_exedata)); + exe_params->SZ_SIZE_TYPE = 8; + + int x = 1; + char *y = (char*)&x; + if(*y==1) + sysEndianType = LITTLE_ENDIAN_SYSTEM; + else //=0 + sysEndianType = BIG_ENDIAN_SYSTEM; + + if(dataType == SZ_FLOAT) + { + float *newFloatData; + SZ_decompress_args_float(&newFloatData, r5, r4, r3, r2, r1, bytes, byteLength, 0, NULL); + return newFloatData; + } + else if(dataType == SZ_DOUBLE) + { + double *newDoubleData; + SZ_decompress_args_double(&newDoubleData, r5, r4, r3, r2, r1, bytes, byteLength, 0, NULL); + return newDoubleData; + } + else if(dataType == SZ_INT8) + { + int8_t *newInt8Data; + SZ_decompress_args_int8(&newInt8Data, r5, r4, r3, r2, r1, bytes, byteLength); + return newInt8Data; + } + else if(dataType == SZ_INT16) + { + int16_t *newInt16Data; + SZ_decompress_args_int16(&newInt16Data, r5, r4, r3, r2, r1, bytes, byteLength); + return newInt16Data; + } + else if(dataType == SZ_INT32) + { + int32_t *newInt32Data; + SZ_decompress_args_int32(&newInt32Data, r5, r4, r3, r2, r1, bytes, byteLength); + return newInt32Data; + } + else if(dataType == SZ_INT64) + { + int64_t *newInt64Data; + SZ_decompress_args_int64(&newInt64Data, r5, r4, r3, r2, r1, bytes, byteLength); + return newInt64Data; + } + else if(dataType == SZ_UINT8) + { + uint8_t *newUInt8Data; + SZ_decompress_args_uint8(&newUInt8Data, r5, r4, r3, r2, r1, bytes, byteLength); + return newUInt8Data; + } + else if(dataType == SZ_UINT16) + { + uint16_t *newUInt16Data; + SZ_decompress_args_uint16(&newUInt16Data, r5, r4, r3, r2, r1, bytes, byteLength); + return newUInt16Data; + } + else if(dataType == SZ_UINT32) + { + uint32_t *newUInt32Data; + SZ_decompress_args_uint32(&newUInt32Data, r5, r4, r3, r2, r1, bytes, byteLength); + return newUInt32Data; + } + else if(dataType == SZ_UINT64) + { + uint64_t *newUInt64Data; + SZ_decompress_args_uint64(&newUInt64Data, r5, r4, r3, r2, r1, bytes, byteLength); + return newUInt64Data; + } + else + { + printf("Error: data type cannot be the types other than SZ_FLOAT or SZ_DOUBLE\n"); + return NULL; + } +} + +/** + * + * + * return number of elements or -1 if any errors + * */ +size_t SZ_decompress_args(int dataType, unsigned char *bytes, size_t byteLength, void* decompressed_array, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1) +{ + //size_t i; + size_t nbEle = computeDataLength(r5,r4,r3,r2,r1); + + if(dataType == SZ_FLOAT) + { + float* data = (float *)SZ_decompress(dataType, bytes, byteLength, r5, r4, r3, r2, r1); + float* data_array = (float *)decompressed_array; + memcpy(data_array, data, nbEle*sizeof(float)); + //for(i=0;iszMode = (sameRByte & 0x06)>>1; + isLossless = (sameRByte & 0x10)>>4; + + int isRegressionBased = (sameRByte >> 7) & 0x01; + + if(exe_params==NULL) + { + exe_params = (sz_exedata *)malloc(sizeof(struct sz_exedata)); + memset(exe_params, 0, sizeof(struct sz_exedata)); + } + exe_params->SZ_SIZE_TYPE = ((sameRByte & 0x40)>>6)==1?8:4; + + if(confparams_dec==NULL) + { + confparams_dec = (sz_params*)malloc(sizeof(sz_params)); + memset(confparams_dec, 0, sizeof(sz_params)); + } + + convertBytesToSZParams(&(bytes[index]), confparams_dec); + /*sz_params* params = convertBytesToSZParams(&(bytes[index])); + if(confparams_dec!=NULL) + free(confparams_dec); + confparams_dec = params;*/ + if(confparams_dec->dataType==SZ_FLOAT) + index += MetaDataByteLength; + else if(confparams_dec->dataType==SZ_DOUBLE) + index += MetaDataByteLength_double; + + if(confparams_dec->dataType!=SZ_FLOAT && confparams_dec->dataType!= SZ_DOUBLE) //if this type is an Int type + index++; //jump to the dataLength info byte address + dataSeriesLength = bytesToSize(&(bytes[index]));// 4 or 8 + index += exe_params->SZ_SIZE_TYPE; + //index += 4; //max_quant_intervals + + sz_metadata* metadata = (sz_metadata*)malloc(sizeof(struct sz_metadata)); + + metadata->versionNumber[0] = versions[0]; + metadata->versionNumber[1] = versions[1]; + metadata->versionNumber[2] = versions[2]; + metadata->isConstant = isConstant; + metadata->isLossless = isLossless; + metadata->sizeType = exe_params->SZ_SIZE_TYPE; + metadata->dataSeriesLength = dataSeriesLength; + + metadata->conf_params = confparams_dec; + + int defactoNBBins = 0; //real # bins + if(isConstant==0 && isLossless==0) + { + if(isRegressionBased==1) + { + unsigned char* raBytes = &(bytes[index]); + defactoNBBins = bytesToInt_bigEndian(raBytes + sizeof(int) + sizeof(double)); + } + else + { + int radExpoL = 0, segmentL = 0, pwrErrBoundBytesL = 0; + if(metadata->conf_params->errorBoundMode >= PW_REL) + { + radExpoL = 1; + segmentL = exe_params->SZ_SIZE_TYPE; + pwrErrBoundBytesL = 4; + } + + int mdbl = confparams_dec->dataType==SZ_FLOAT?MetaDataByteLength:MetaDataByteLength_double; + int offset_typearray = 3 + 1 + mdbl + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrErrBoundBytesL + 4 + (4 + confparams_dec->dataType*4) + 1 + 8 + + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + 4; + defactoNBBins = bytesToInt_bigEndian(bytes+offset_typearray); + } + + } + + metadata->defactoNBBins = defactoNBBins; + return metadata; +} + +void SZ_printMetadata(sz_metadata* metadata) +{ + printf("=================SZ Compression Meta Data=================\n"); + printf("Version: \t %d.%d.%d\n", metadata->versionNumber[0], metadata->versionNumber[1], metadata->versionNumber[2]); + printf("Constant data?: \t %s\n", metadata->isConstant==1?"YES":"NO"); + printf("Lossless?: \t %s\n", metadata->isLossless==1?"YES":"NO"); + printf("Size type (size of # elements): \t %d bytes\n", metadata->sizeType); + printf("Num of elements: \t %zu\n", metadata->dataSeriesLength); + + sz_params* params = metadata->conf_params; + + if(params->sol_ID == SZ) + printf("compressor Name: \t SZ\n"); + else if(params->sol_ID == SZ_Transpose) + printf("compressor Name: \t SZ_Transpose\n"); + else + printf("compressor Name: \t Other compressor\n"); + switch(params->dataType) + { + case SZ_FLOAT: + printf("Data type: \t FLOAT\n"); + printf("min value of raw data: \t %f\n", params->fmin); + printf("max value of raw data: \t %f\n", params->fmax); + break; + case SZ_DOUBLE: + printf("Data type: \t DOUBLE\n"); + printf("min value of raw data: \t %f\n", params->dmin); + printf("max value of raw data: \t %f\n", params->dmax); + break; + case SZ_INT8: + printf("Data type: \t INT8\n"); + break; + case SZ_INT16: + printf("Data type: \t INT16\n"); + break; + case SZ_INT32: + printf("Data type: \t INT32\n"); + break; + case SZ_INT64: + printf("Data type: \t INT64\n"); + break; + case SZ_UINT8: + printf("Data type: \t UINT8\n"); + break; + case SZ_UINT16: + printf("Data type: \t UINT16\n"); + break; + case SZ_UINT32: + printf("Data type: \t UINT32\n"); + break; + case SZ_UINT64: + printf("Data type: \t UINT64\n"); + break; + } + + if(exe_params->optQuantMode==1) + { + printf("quantization_intervals: \t 0\n"); + printf("max_quant_intervals: \t %d\n", params->max_quant_intervals); + printf("actual used # intervals: \t %d\n", metadata->defactoNBBins); + } + else + { + printf("quantization_intervals: \t %d\n", params->quantization_intervals); + printf("max_quant_intervals: \t - %d\n", params->max_quant_intervals); + } + + printf("dataEndianType (prior raw data):\t %s\n", dataEndianType==BIG_ENDIAN_DATA?"BIG_ENDIAN":"LITTLE_ENDIAN"); + printf("sysEndianType (at compression): \t %s\n", sysEndianType==1?"BIG_ENDIAN":"LITTLE_ENDIAN"); + printf("sampleDistance: \t %d\n", params->sampleDistance); + printf("predThreshold: \t %f\n", params->predThreshold); + switch(params->szMode) + { + case SZ_BEST_SPEED: + printf("szMode: \t SZ_BEST_SPEED (without Gzip)\n"); + break; + case SZ_BEST_COMPRESSION: + printf("szMode: \t SZ_BEST_COMPRESSION (with Zstd or Gzip)\n"); + break; + } + switch(params->gzipMode) + { + case Z_BEST_SPEED: + printf("gzipMode: \t Z_BEST_SPEED\n"); + break; + case Z_DEFAULT_COMPRESSION: + printf("gzipMode: \t Z_BEST_SPEED\n"); + break; + case Z_BEST_COMPRESSION: + printf("gzipMode: \t Z_BEST_COMPRESSION\n"); + break; + } + + switch(params->errorBoundMode) + { + case ABS: + printf("errBoundMode: \t ABS\n"); + printf("absErrBound: \t %f\n", params->absErrBound); + break; + case REL: + printf("errBoundMode: \t REL (based on value_range extent)\n"); + printf("relBoundRatio: \t %f\n", params->relBoundRatio); + break; + case ABS_AND_REL: + printf("errBoundMode: \t ABS_AND_REL\n"); + printf("absErrBound: \t %f\n", params->absErrBound); + printf("relBoundRatio: \t %f\n", params->relBoundRatio); + break; + case ABS_OR_REL: + printf("errBoundMode: \t ABS_OR_REL\n"); + printf("absErrBound: \t %f\n", params->absErrBound); + printf("relBoundRatio: \t %f\n", params->relBoundRatio); + break; + case PSNR: + printf("errBoundMode: \t PSNR\n"); + printf("psnr: \t %f\n", params->psnr); + break; + case PW_REL: + printf("errBoundMode: \t PW_REL\n"); + break; + case ABS_AND_PW_REL: + printf("errBoundMode: \t ABS_AND_PW_REL\n"); + printf("absErrBound: \t %f\n", params->absErrBound); + break; + case ABS_OR_PW_REL: + printf("errBoundMode: \t ABS_OR_PW_REL\n"); + printf("absErrBound: \t %f\n", params->absErrBound); + break; + case REL_AND_PW_REL: + printf("errBoundMode: \t REL_AND_PW_REL\n"); + printf("range_relBoundRatio: \t %f\n", params->relBoundRatio); + break; + case REL_OR_PW_REL: + printf("errBoundMode: \t REL_OR_PW_REL\n"); + printf("range_relBoundRatio: \t %f\n", params->relBoundRatio); + break; + } + + if(params->errorBoundMode>=PW_REL && params->errorBoundMode<=REL_OR_PW_REL) + { + printf("pw_relBoundRatio: \t %f\n", params->pw_relBoundRatio); + //printf("segment_size: \t %d\n", params->segment_size); + switch(params->pwr_type) + { + case SZ_PWR_MIN_TYPE: + printf("pwrType: \t SZ_PWR_MIN_TYPE\n"); + break; + case SZ_PWR_AVG_TYPE: + printf("pwrType: \t SZ_PWR_AVG_TYPE\n"); + break; + case SZ_PWR_MAX_TYPE: + printf("pwrType: \t SZ_PWR_MAX_TYPE\n"); + break; + } + } +} + +/*-----------------------------------batch data compression--------------------------------------*/ + +void filloutDimArray(size_t* dim, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1) +{ + if(r2==0) + dim[0] = r1; + else if(r3==0) + { + dim[0] = r2; + dim[1] = r1; + } + else if(r4==0) + { + dim[0] = r3; + dim[1] = r2; + dim[2] = r1; + } + else if(r5==0) + { + dim[0] = r4; + dim[1] = r3; + dim[2] = r2; + dim[3] = r1; + } + else + { + dim[0] = r5; + dim[1] = r4; + dim[2] = r3; + dim[3] = r2; + dim[4] = r1; + } +} + +size_t compute_total_batch_size() +{ + size_t eleNum = 0, totalSize = 0; + SZ_Variable* p = sz_varset->header; + while(p->next!=NULL) + { + eleNum = computeDataLength(p->next->r5, p->next->r4, p->next->r3, p->next->r2, p->next->r1); + if(p->next->dataType==SZ_FLOAT) + totalSize += (eleNum*4); + else + totalSize += (eleNum*8); + p=p->next; + } + return totalSize; +} + +void SZ_registerVar(int var_id, char* varName, int dataType, void* data, + int errBoundMode, double absErrBound, double relBoundRatio, double pwRelBoundRatio, + size_t r5, size_t r4, size_t r3, size_t r2, size_t r1) +{ + if(sz_tsc==NULL) + initSZ_TSC(); + + //char str[256]; + SZ_batchAddVar(var_id, varName, dataType, data, + errBoundMode, absErrBound, relBoundRatio, pwRelBoundRatio, r5, r4, r3, r2, r1); + //sprintf(str, "%d: %s : %zuX%zuX%zuX%zu%zu : %d : %f : %f : %f\n", sz_varset->count - 1, varName, r5, r4, r3, r2, r1, errBoundMode, absErrBound, relBoundRatio, pwRelBoundRatio); + //fputs(str, sz_tsc->metadata_file); +} + +int SZ_deregisterVar_ID(int var_id) +{ + int state = SZ_batchDelVar_ID(var_id); + return state; +} + +int SZ_deregisterVar(char* varName) +{ + int state = SZ_batchDelVar(varName); + return state; +} + +#ifdef HAVE_TIMECMPR +/** + * process multiple variables + * */ +int SZ_compress_ts_select_var(int cmprType, unsigned char* var_ids, unsigned char var_count, unsigned char** newByteData, size_t *outSize) +{ + confparams_cpr->szMode = SZ_TEMPORAL_COMPRESSION; + confparams_cpr->predictionMode = SZ_PREVIOUS_VALUE_ESTIMATE; + + SZ_VarSet* vset = sz_varset; + int i = 0, j = 0, totalSize = 0; + + SZ_Variable* vp[256]; + + SZ_Variable* v = vset->header->next; + for(i = 0;icount;i++) + { + int found = checkVarID(v->var_id, var_ids, var_count); + if (found) + { + multisteps = v->multisteps; + if(v->dataType==SZ_FLOAT) + { + SZ_compress_args_float(cmprType, confparams_cpr->withRegression, &(v->compressedBytes), (float*)v->data, v->r5, v->r4, v->r3, v->r2, v->r1, &(v->compressedSize), v->errBoundMode, v->absErrBound, v->relBoundRatio, v->pwRelBoundRatio); + } + else if(v->dataType==SZ_DOUBLE) + { + SZ_compress_args_double(cmprType, confparams_cpr->withRegression, &(v->compressedBytes), (double*)v->data, v->r5, v->r4, v->r3, v->r2, v->r1, &(v->compressedSize), v->errBoundMode, v->absErrBound, v->relBoundRatio, v->pwRelBoundRatio); + } + + totalSize += v->compressedSize; + v->compressType = multisteps->compressionType; + vp[j] = v; + j++; + } + v = v->next; + } + + *outSize = sizeof(int) + sizeof(unsigned short) + totalSize + var_count*(3*sizeof(unsigned char)+sizeof(size_t)); + *newByteData = (unsigned char*)malloc(*outSize); + unsigned char* p = *newByteData; + + intToBytes_bigEndian(p, sz_tsc->currentStep); + p+=4; + shortToBytes(p, var_count); + p+=2; + + for(i=0;ivar_id; //1 byte + p++; + *p = (unsigned char)v->compressType; //1 byte + p++; + *p = (unsigned char)v->dataType; //1 byte + p++; + sizeToBytes(p, v->compressedSize); //size_t + p += sizeof(size_t); + memcpy(p, v->compressedBytes, v->compressedSize); //outSize_[i] + p += v->compressedSize; + } + + sz_tsc->currentStep ++; + + return SZ_SCES; +} + +/** + * process all variables + * */ +int SZ_compress_ts(int cmprType, unsigned char** newByteData, size_t *outSize) +{ + confparams_cpr->szMode = SZ_TEMPORAL_COMPRESSION; + confparams_cpr->predictionMode = SZ_PREVIOUS_VALUE_ESTIMATE; + + SZ_VarSet* vset = sz_varset; + + //char *metadata_str = (char*)malloc(vset->count*256); + //memset(metadata_str, 0, vset->count*256); + //sprintf(metadata_str, "step %d", sz_tsc->currentStep); + + int i = 0, totalSize = 0; + + SZ_Variable* v = vset->header->next; + for(i=0;icount;i++) + { + multisteps = v->multisteps; //assign the v's multisteps to the global variable 'multisteps', which will be used in the following compression. + + if(v->dataType==SZ_FLOAT) + { + SZ_compress_args_float(cmprType, confparams_cpr->withRegression, &(v->compressedBytes), (float*)v->data, v->r5, v->r4, v->r3, v->r2, v->r1, &(v->compressedSize), v->errBoundMode, v->absErrBound, v->relBoundRatio, v->pwRelBoundRatio); + } + else if(v->dataType==SZ_DOUBLE) + { + SZ_compress_args_double(cmprType, confparams_cpr->withRegression, &(v->compressedBytes), (double*)v->data, v->r5, v->r4, v->r3, v->r2, v->r1, &(v->compressedSize), v->errBoundMode, v->absErrBound, v->relBoundRatio, v->pwRelBoundRatio); + } + //sprintf(metadata_str, "%s:%d,%d,%zu", metadata_str, i, multisteps->lastSnapshotStep, outSize_[i]); + + totalSize += v->compressedSize; + v->compressType = multisteps->compressionType; + v = v->next; + } + + //sprintf(metadata_str, "%s\n", metadata_str); + //fputs(metadata_str, sz_tsc->metadata_file); + //free(metadata_str); + + //sizeof(int)==current time step; 2*sizeof(char)+sizeof(size_t)=={compressionType + datatype + compression_data_size}; + //sizeof(char)==# variables + *outSize = sizeof(int) + sizeof(unsigned short) + totalSize + vset->count*(3*sizeof(unsigned char)+sizeof(size_t)); + *newByteData = (unsigned char*)malloc(*outSize); + unsigned char* p = *newByteData; + + intToBytes_bigEndian(p, sz_tsc->currentStep); + p+=4; + shortToBytes(p, vset->count); + p+=2; + + v = vset->header->next; + + for(i=0;icount;i++) + { + *p = v->var_id; //1 byte + p++; + *p = (unsigned char)v->compressType; //1 byte + p++; + *p = (unsigned char)v->dataType; //1 byte + p++; + sizeToBytes(p, v->compressedSize); //size_t + p += sizeof(size_t); + + memcpy(p, v->compressedBytes, v->compressedSize); //outSize_[i] + p += v->compressedSize; + v = v->next; + } + + sz_tsc->currentStep ++; + //free(outSize_); + + return SZ_SCES; +} + +void SZ_decompress_ts(unsigned char *bytes, size_t bytesLength) +{ + if(confparams_dec==NULL) + confparams_dec = (sz_params*)malloc(sizeof(sz_params)); + memset(confparams_dec, 0, sizeof(sz_params)); + confparams_dec->szMode = SZ_TEMPORAL_COMPRESSION; + confparams_dec->predictionMode = SZ_PREVIOUS_VALUE_ESTIMATE; + + if(exe_params==NULL) + exe_params = (sz_exedata*)malloc(sizeof(sz_exedata)); + memset(exe_params, 0, sizeof(sz_exedata)); + + int x = 1; + char *y = (char*)&x; + if(*y==1) + sysEndianType = LITTLE_ENDIAN_SYSTEM; + else //=0 + sysEndianType = BIG_ENDIAN_SYSTEM; + + int i = 0; + size_t r5 = 0, r4 = 0, r3 = 0, r2 = 0, r1 = 0; + unsigned char* q = bytes; + sz_tsc->currentStep = bytesToInt_bigEndian(q); + q += 4; + unsigned short nbVars = (unsigned short)bytesToShort(q); + q += 2; + + float *newFloatData = NULL; + double *newDoubleData = NULL; + + for(i=0;imultisteps; + multisteps->compressionType = *(q++); + unsigned char dataType = *(q++); + size_t cmpSize = bytesToSize(q); + q += sizeof(size_t); + + if(p==NULL) + q += cmpSize; + else + { + sz_multisteps* multisteps = p->multisteps; + r5 = p->r5; + r4 = p->r4; + r3 = p->r3; + r2 = p->r2; + r1 = p->r1; + size_t dataLen = computeDataLength(r5, r4, r3, r2, r1); + + unsigned char* cmpBytes = q; + switch(dataType) + { + case SZ_FLOAT: + SZ_decompress_args_float(&newFloatData, r5, r4, r3, r2, r1, cmpBytes, cmpSize, multisteps->compressionType, multisteps->hist_data); + memcpy(p->data, newFloatData, dataLen*sizeof(float)); + free(newFloatData); + break; + case SZ_DOUBLE: + SZ_decompress_args_double(&newDoubleData, r5, r4, r3, r2, r1, cmpBytes, cmpSize, multisteps->compressionType, multisteps->hist_data); + memcpy(p->data, newDoubleData, dataLen*sizeof(double)); + free(newDoubleData); + break; + default: + printf("Error: data type cannot be the types other than SZ_FLOAT or SZ_DOUBLE\n"); + return; + } + + q += cmpSize; + } + } +} + +void SZ_decompress_ts_select_var(unsigned char* var_ids, unsigned char var_count, unsigned char *bytes, size_t bytesLength) +{ + if(confparams_dec==NULL) + confparams_dec = (sz_params*)malloc(sizeof(sz_params)); + memset(confparams_dec, 0, sizeof(sz_params)); + confparams_dec->szMode = SZ_TEMPORAL_COMPRESSION; + confparams_dec->predictionMode = SZ_PREVIOUS_VALUE_ESTIMATE; + + if(exe_params==NULL) + exe_params = (sz_exedata*)malloc(sizeof(sz_exedata)); + memset(exe_params, 0, sizeof(sz_exedata)); + + int x = 1; + char *y = (char*)&x; + if(*y==1) + sysEndianType = LITTLE_ENDIAN_SYSTEM; + else //=0 + sysEndianType = BIG_ENDIAN_SYSTEM; + + int i = 0; + size_t r5 = 0, r4 = 0, r3 = 0, r2 = 0, r1 = 0; + unsigned char* q = bytes; + sz_tsc->currentStep = bytesToInt_bigEndian(q); + q += 4; + unsigned short nbVars = (unsigned short)bytesToShort(q); + q += 2; + + float *newFloatData = NULL; + double *newDoubleData = NULL; + + for(i=0;imultisteps; + multisteps->compressionType = *(q++); + unsigned char dataType = *(q++); + size_t cmpSize = bytesToSize(q); + q += sizeof(size_t); + + if(p==NULL || selected == 0) //p==NULL means the variable was not registered during compression ; selected==0 means that the variable is not selected + q += cmpSize; + else // p!=NULL && selected == 1 + { + sz_multisteps* multisteps = p->multisteps; + r5 = p->r5; + r4 = p->r4; + r3 = p->r3; + r2 = p->r2; + r1 = p->r1; + size_t dataLen = computeDataLength(r5, r4, r3, r2, r1); + + unsigned char* cmpBytes = q; + switch(dataType) + { + case SZ_FLOAT: + SZ_decompress_args_float(&newFloatData, r5, r4, r3, r2, r1, cmpBytes, cmpSize, multisteps->compressionType, multisteps->hist_data); + memcpy(p->data, newFloatData, dataLen*sizeof(float)); + free(newFloatData); + break; + case SZ_DOUBLE: + SZ_decompress_args_double(&newDoubleData, r5, r4, r3, r2, r1, cmpBytes, cmpSize, multisteps->compressionType, multisteps->hist_data); + memcpy(p->data, newDoubleData, dataLen*sizeof(double)); + free(newDoubleData); + break; + default: + printf("Error: data type cannot be the types other than SZ_FLOAT or SZ_DOUBLE\n"); + return; + } + + q += cmpSize; + } + } +} +#endif + + +void SZ_Finalize() +{ +#ifdef HAVE_TIMECMPR + if(sz_varset!=NULL) + SZ_freeVarSet(SZ_MAINTAIN_VAR_DATA); +#endif + + if(confparams_dec!=NULL) + { + free(confparams_dec); + confparams_dec = NULL; + } + if(confparams_cpr!=NULL) + { + free(confparams_cpr); + confparams_cpr = NULL; + } + if(exe_params!=NULL) + { + free(exe_params); + exe_params = NULL; + } + +//#ifdef HAVE_TIMECMPR +// if(sz_tsc!=NULL && sz_tsc->metadata_file!=NULL) +// fclose(sz_tsc->metadata_file); +//#endif +} + + +/** + * + * Inits the compressor for SZ_compress_customize + * + * with SZ_Init(NULL) if not previously initialized and no params passed + * with SZ_InitParam(userPara) otherwise if params are passed + * and doesn't not initialize otherwise + * + * @param sz_params* userPara : the user configuration or null + * @param sz_params* confparams : the current configuration + */ +static void sz_maybe_init_with_user_params(struct sz_params* userPara, struct sz_params* current_params) { + if(userPara==NULL && current_params == NULL) + SZ_Init(NULL); + else if(userPara != NULL) + SZ_Init_Params((sz_params*)userPara); +} + + +/** + * + * The interface for the user-customized compression method + * + * @param char* comprName : the name of the specific compression approach + * @param void* userPara : the pointer of the user-customized data stracture containing the cusotmized compressors' requried input parameters + * @param int dataType : data type (SZ_FLOAT, SZ_DOUBLE, SZ_INT8, SZ_UINT8, SZ_INT16, SZ_UINT16, ....) + * @param void* data : input dataset + * @param size_t r5 : the size of dimension 5 + * @param size_t r4 : the size of dimension 4 + * @param size_t r3 : the size of dimension 3 + * @param size_t r2 : the size of dimension 2 + * @param size_t r1 : the size of dimension 1 + * @param size_t outSize : the number of bytes after compression + * @param int *status : the execution status of the compression operation (success: SZ_SCES or fail: SZ_NSCS) + * + * */ +unsigned char* SZ_compress_customize(const char* cmprName, void* userPara, int dataType, void* data, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, int *status) +{ + unsigned char* result = NULL; + if(strcmp(cmprName, "SZ2.0")==0 || strcmp(cmprName, "SZ2.1")==0 || strcmp(cmprName, "SZ")==0) + { + sz_maybe_init_with_user_params(userPara, confparams_cpr); + result = SZ_compress(dataType, data, outSize, r5, r4, r3, r2, r1); + *status = SZ_SCES; + } + else if(strcmp(cmprName, "SZ1.4")==0) + { + sz_maybe_init_with_user_params(userPara, confparams_cpr); + confparams_cpr->withRegression = SZ_NO_REGRESSION; + + result = SZ_compress(dataType, data, outSize, r5, r4, r3, r2, r1); + *status = SZ_SCES; + } + else if(strcmp(cmprName, "SZ_Transpose")==0) + { + void* transData = transposeData(data, dataType, r5, r4, r3, r2, r1); + sz_maybe_init_with_user_params(userPara, confparams_cpr); + size_t n = computeDataLength(r5, r4, r3, r2, r1); + result = SZ_compress(dataType, transData, outSize, 0, 0, 0, 0, n); + } + else if(strcmp(cmprName, "ExaFEL")==0){ + assert(dataType==SZ_FLOAT); + assert(r5==0); + result = exafelSZ_Compress(userPara,data, r4, r3, r2, r1,outSize); + *status = SZ_SCES; + } + else + { + *status = SZ_NSCS; + } + return result; +} + +unsigned char* SZ_compress_customize_threadsafe(const char* cmprName, void* userPara, int dataType, void* data, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, int *status) +{ + unsigned char* result = NULL; + if(strcmp(cmprName, "SZ2.0")==0 || strcmp(cmprName, "SZ2.1")==0 || strcmp(cmprName, "SZ")==0) + { + SZ_Init(NULL); + struct sz_params* para = (struct sz_params*)userPara; + + if(dataType==SZ_FLOAT) + { + SZ_compress_args_float(-1, SZ_WITH_LINEAR_REGRESSION, &result, (float *)data, r5, r4, r3, r2, r1, + outSize, para->errorBoundMode, para->absErrBound, para->relBoundRatio, para->pw_relBoundRatio); + } + else if(dataType==SZ_DOUBLE) + { + SZ_compress_args_double(-1, SZ_WITH_LINEAR_REGRESSION, &result, (double *)data, r5, r4, r3, r2, r1, + outSize, para->errorBoundMode, para->absErrBound, para->relBoundRatio, para->pw_relBoundRatio); + } + + *status = SZ_SCES; + return result; + } + else if(strcmp(cmprName, "SZ1.4")==0) + { + SZ_Init(NULL); + struct sz_params* para = (struct sz_params*)userPara; + + if(dataType==SZ_FLOAT) + { + SZ_compress_args_float(-1, SZ_NO_REGRESSION, &result, (float *)data, r5, r4, r3, r2, r1, + outSize, para->errorBoundMode, para->absErrBound, para->relBoundRatio, para->pw_relBoundRatio); + } + else if(dataType==SZ_DOUBLE) + { + SZ_compress_args_double(-1, SZ_NO_REGRESSION, &result, (double *)data, r5, r4, r3, r2, r1, + outSize, para->errorBoundMode, para->absErrBound, para->relBoundRatio, para->pw_relBoundRatio); + } + + *status = SZ_SCES; + return result; + } + else if(strcmp(cmprName, "SZ_Transpose")==0) + { + void* transData = transposeData(data, dataType, r5, r4, r3, r2, r1); + struct sz_params* para = (struct sz_params*)userPara; + + size_t n = computeDataLength(r5, r4, r3, r2, r1); + + result = SZ_compress_args(dataType, transData, outSize, para->errorBoundMode, para->absErrBound, para->relBoundRatio, para->pw_relBoundRatio, 0, 0, 0, 0, n); + + *status = SZ_SCES; + } + else if(strcmp(cmprName, "ExaFEL")==0){ //not sure if this part is thread safe! + assert(dataType==SZ_FLOAT); + assert(r5==0); + result = exafelSZ_Compress(userPara,data, r4, r3, r2, r1,outSize); + *status = SZ_SCES; + } + else + { + *status = SZ_NSCS; + } + return result; +} + + +/** + * + * The interface for the user-customized decompression method + * + * @param char* comprName : the name of the specific compression approach + * @param void* userPara : the pointer of the user-customized data stracture containing the cusotmized compressors' requried input parameters + * @param int dataType : data type (SZ_FLOAT, SZ_DOUBLE, SZ_INT8, SZ_UINT8, SZ_INT16, SZ_UINT16, ....) + * @param unsigned char* bytes : input bytes (the compressed data) + * @param size_t r5 : the size of dimension 5 + * @param size_t r4 : the size of dimension 4 + * @param size_t r3 : the size of dimension 3 + * @param size_t r2 : the size of dimension 2 + * @param size_t r1 : the size of dimension 1 + * @param int *status : the execution status of the compression operation (success: SZ_SCES or fail: SZ_NSCS) + * + * */ +void* SZ_decompress_customize(const char* cmprName, void* userPara, int dataType, unsigned char* bytes, size_t byteLength, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, int *status) +{ + void* result = NULL; + if(strcmp(cmprName, "SZ2.0")==0 || strcmp(cmprName, "SZ")==0 || strcmp(cmprName, "SZ1.4")==0) + { + result = SZ_decompress(dataType, bytes, byteLength, r5, r4, r3, r2, r1); + * status = SZ_SCES; + } + else if(strcmp(cmprName, "SZ_Transpose")==0) + { + size_t n = computeDataLength(r5, r4, r3, r2, r1); + void* tmpData = SZ_decompress(dataType, bytes, byteLength, 0, 0, 0, 0, n); + result = detransposeData(tmpData, dataType, r5, r4, r3, r2, r1); + } + else if(strcmp(cmprName, "ExaFEL")==0){ + assert(dataType==SZ_FLOAT); + assert(r5==0); + result = exafelSZ_Decompress(userPara,bytes, r4, r3, r2, r1,byteLength); + *status = SZ_SCES; + } + else + { + *status = SZ_NSCS; + } + return result; +} + + +void* SZ_decompress_customize_threadsafe(const char* cmprName, void* userPara, int dataType, unsigned char* bytes, size_t byteLength, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, int *status) +{ + return SZ_decompress_customize(cmprName, userPara, dataType, bytes, byteLength, r5, r4, r3, r2, r1, status); +} diff --git a/deps/SZ/sz/src/sz_double.c b/deps/SZ/sz/src/sz_double.c new file mode 100644 index 0000000000000000000000000000000000000000..483f6fb7eceeb83371c18a0c5bec94820921638d --- /dev/null +++ b/deps/SZ/sz/src/sz_double.c @@ -0,0 +1,6836 @@ +/** + * @file sz_double.c + * @author Sheng Di, Dingwen Tao, Xin Liang, Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang + * @date Aug, 2016 + * @brief SZ_Init, Compression and Decompression functions + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include +#include +#include +#include "sz.h" +#include "CompressElement.h" +#include "DynamicByteArray.h" +#include "DynamicIntArray.h" +#include "TightDataPointStorageD.h" +#include "sz_double.h" +#include "sz_double_pwr.h" +#include "szd_double.h" +#include "szd_double_pwr.h" +#include "zlib.h" +#include "rw.h" +#include "sz_double_ts.h" +#include "utility.h" +#include "CacheTable.h" +#include "MultiLevelCacheTableWideInterval.h" +#include "sz_stats.h" + +unsigned char* SZ_skip_compress_double(double* data, size_t dataLength, size_t* outSize) +{ + *outSize = dataLength*sizeof(double); + unsigned char* out = (unsigned char*)malloc(dataLength*sizeof(double)); + memcpy(out, data, dataLength*sizeof(double)); + return out; +} + +inline void computeReqLength_double(double realPrecision, short radExpo, int* reqLength, double* medianValue) +{ + short reqExpo = getPrecisionReqLength_double(realPrecision); + *reqLength = 12+radExpo - reqExpo; //radExpo-reqExpo == reqMantiLength + if(*reqLength<12) + *reqLength = 12; + if(*reqLength>64) + { + *reqLength = 64; + *medianValue = 0; + } +} + +inline short computeReqLength_double_MSST19(double realPrecision) +{ + short reqExpo = getPrecisionReqLength_double(realPrecision); + return 12-reqExpo; +} + +unsigned int optimize_intervals_double_1D(double *oriData, size_t dataLength, double realPrecision) +{ + size_t i = 0, radiusIndex; + double pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance; + for(i=2;isampleDistance==0) + { + //pred_value = 2*oriData[i-1] - oriData[i-2]; + pred_value = oriData[i-1]; + pred_err = fabs(pred_value - oriData[i]); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2); + return powerOf2; +} + +unsigned int optimize_intervals_double_2D(double *oriData, size_t r1, size_t r2, double realPrecision) +{ + size_t i,j, index; + size_t radiusIndex; + double pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r2+j; + pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1]; + pred_err = fabs(pred_value - oriData[index]); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + //printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + return powerOf2; +} + +unsigned int optimize_intervals_double_3D(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision) +{ + size_t i,j,k, index; + size_t radiusIndex; + size_t r23=r2*r3; + double pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)*(r3-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r23+j*r3+k; + pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] + - oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1]; + pred_err = fabs(pred_value - oriData[index]); + radiusIndex = (pred_err/realPrecision+1)/2; + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2); + return powerOf2; +} + +unsigned int optimize_intervals_double_4D(double *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision) +{ + size_t i,j,k,l, index; + size_t radiusIndex; + size_t r234=r2*r3*r4; + size_t r34=r3*r4; + double pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)*(r3-1)*(r4-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r234+j*r34+k*r4+l; + pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r34] + - oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1]; + pred_err = fabs(pred_value - oriData[index]); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + return powerOf2; +} + +TightDataPointStorageD* SZ_compress_double_1D_MDQ(double *oriData, +size_t dataLength, double realPrecision, double valueRangeSize, double medianValue_d) +{ +#ifdef HAVE_TIMECMPR + double* decData = NULL; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData = (double*)(multisteps->hist_data); +#endif + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + quantization_intervals = optimize_intervals_double_1D_opt(oriData, dataLength, realPrecision); + else + quantization_intervals = exe_params->intvCapacity; + //updateQuantizationInfo(quantization_intervals); + int intvRadius = quantization_intervals/2; + + size_t i; + int reqLength; + double medianValue = medianValue_d; + short radExpo = getExponent_double(valueRangeSize/2); + + computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + double* spaceFillingValue = oriData; // + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + unsigned char preDataBytes[8]; + longToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + double last3CmprsData[3] = {0}; + + DoubleValueCompressElement *vce = (DoubleValueCompressElement*)malloc(sizeof(DoubleValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + //add the first data + type[0] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + listAdd_double(last3CmprsData, vce->data); +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[0] = vce->data; +#endif + + //add the second data + type[1] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + listAdd_double(last3CmprsData, vce->data); +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[1] = vce->data; +#endif + int state; + double checkRadius; + double curData; + double pred = last3CmprsData[0]; + double predAbsErr; + checkRadius = (quantization_intervals-1)*realPrecision; + double interval = 2*realPrecision; + + double recip_realPrecision = 1/realPrecision; + for(i=2;i=pred) + { + type[i] = intvRadius+state; + pred = pred + state*interval; + } + else //curDataszMode == SZ_TEMPORAL_COMPRESSION) + decData[i] = pred; +#endif + continue; + } + + //unpredictable data processing + type[i] = 0; + compressSingleDoubleValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + + //listAdd_double(last3CmprsData, vce->data); + pred = vce->data; + +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[i] = vce->data; +#endif + + }//end of for + + size_t exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageD* tdps; + + new_TightDataPointStorageD(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0); + +// printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n", +// exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size); + + //free memory + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + free(vce); + free(lce); + free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +void SZ_compress_args_double_StoreOriData(double* oriData, size_t dataLength, unsigned char** newByteData, size_t *outSize) +{ + int doubleSize = sizeof(double); + size_t k = 0, i; + size_t totalByteLength = 3 + MetaDataByteLength_double + exe_params->SZ_SIZE_TYPE + 1 + doubleSize*dataLength; + /*No need to malloc because newByteData should always already be allocated with no less totalByteLength.*/ + //*newByteData = (unsigned char*)malloc(totalByteLength); + + unsigned char dsLengthBytes[8]; + for (i = 0; i < 3; i++)//3 + (*newByteData)[k++] = versionNumber[i]; + + if(exe_params->SZ_SIZE_TYPE==4)//1 + (*newByteData)[k++] = 16; //00010000 + else + (*newByteData)[k++] = 80; //01010000: 01000000 indicates the SZ_SIZE_TYPE=8 + + convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k])); + k = k + MetaDataByteLength_double; + + sizeToBytes(dsLengthBytes,dataLength); + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)//ST: 4 or 8 + (*newByteData)[k++] = dsLengthBytes[i]; + + if(sysEndianType==BIG_ENDIAN_SYSTEM) + memcpy((*newByteData)+4+MetaDataByteLength_double+exe_params->SZ_SIZE_TYPE, oriData, dataLength*doubleSize); + else + { + unsigned char* p = (*newByteData)+4+MetaDataByteLength_double+exe_params->SZ_SIZE_TYPE; + for(i=0;iszMode == SZ_TEMPORAL_COMPRESSION) + { + int timestep = sz_tsc->currentStep; + if(cmprType == SZ_PERIO_TEMPORAL_COMPRESSION) + { + if(timestep % confparams_cpr->snapshotCmprStep != 0) + { + tdps = SZ_compress_double_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_d); + compressionType = 1; //time-series based compression + } + else + { + tdps = SZ_compress_double_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_d); + compressionType = 0; //snapshot-based compression + multisteps->lastSnapshotStep = timestep; + } + } + else if(cmprType == SZ_FORCE_SNAPSHOT_COMPRESSION) + { + tdps = SZ_compress_double_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_d); + compressionType = 0; //snapshot-based compression + multisteps->lastSnapshotStep = timestep; + } + else if(cmprType == SZ_FORCE_TEMPORAL_COMPRESSION) + { + tdps = SZ_compress_double_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_d); + compressionType = 1; //time-series based compression + } + + } + else +#endif + tdps = SZ_compress_double_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_d); + + convertTDPStoFlatBytes_double(tdps, newByteData, outSize); + + if(*outSize>3 + MetaDataByteLength_double + exe_params->SZ_SIZE_TYPE + 1 + sizeof(double)*dataLength) + SZ_compress_args_double_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageD(tdps); + return compressionType; +} + +TightDataPointStorageD* SZ_compress_double_2D_MDQ(double *oriData, size_t r1, size_t r2, double realPrecision, double valueRangeSize, double medianValue_d) +{ +#ifdef HAVE_TIMECMPR + double* decData = NULL; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData = (double*)(multisteps->hist_data); +#endif + + double recip_realPrecision = 1/realPrecision; + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_double_2D_opt(oriData, r1, r2, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + int intvRadius = quantization_intervals/2; + + size_t i,j; + int reqLength; + double pred1D, pred2D; + double diff = 0.0; + double itvNum = 0; + double *P0, *P1; + + size_t dataLength = r1*r2; + + P0 = (double*)malloc(r2*sizeof(double)); + memset(P0, 0, r2*sizeof(double)); + P1 = (double*)malloc(r2*sizeof(double)); + memset(P1, 0, r2*sizeof(double)); + + double medianValue = medianValue_d; + short radExpo = getExponent_double(valueRangeSize/2); + computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue); + + int* type = (int*) malloc(dataLength*sizeof(int)); + //type[dataLength]=0; + + double* spaceFillingValue = oriData; // + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + type[0] = 0; + + unsigned char preDataBytes[8]; + longToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + + DoubleValueCompressElement *vce = (DoubleValueCompressElement*)malloc(sizeof(DoubleValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + /* Process Row-0 data 0*/ + type[0] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[0] = vce->data; +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[0] = vce->data; +#endif + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + diff = spaceFillingValue[1] - pred1D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + intvRadius; + P1[1] = pred1D + 2 * (type[1] - intvRadius) * realPrecision; + } + else + { + type[1] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[1] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[1] = P1[1]; +#endif + + /* Process Row-0 data 2 --> data r2-1 */ + for (j = 2; j < r2; j++) + { + pred1D = 2*P1[j-1] - P1[j-2]; + diff = spaceFillingValue[j] - pred1D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + intvRadius; + P1[j] = pred1D + 2 * (type[j] - intvRadius) * realPrecision; + } + else + { + type[j] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[j], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[j] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[j] = P1[j]; +#endif + } + + /* Process Row-1 --> Row-r1-1 */ + size_t index; + for (i = 1; i < r1; i++) + { + /* Process row-i data 0 */ + index = i*r2; + pred1D = P1[0]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P0[0] = pred1D + 2 * (type[index] - intvRadius) * realPrecision; + } + else + { + type[index] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[0] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P0[0]; +#endif + + /* Process row-i data 1 --> r2-1*/ + for (j = 1; j < r2; j++) + { + index = i*r2+j; + pred2D = P0[j-1] + P1[j] - P1[j-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P0[j] = pred2D + 2 * (type[index] - intvRadius) * realPrecision; + } + else + { + type[index] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[j] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P0[j]; +#endif + } + + double *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + + if(r2!=1) + free(P0); + free(P1); + size_t exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageD* tdps; + + new_TightDataPointStorageD(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0); + +/* int sum =0; + for(i=0;isize=%d\n", +// exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size); + +// for(i = 3800;i<3844;i++) +// printf("exactLeadNumArray->array[%d]=%d\n",i,exactLeadNumArray->array[i]); + + //free memory + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + free(vce); + free(lce); + free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +/** + * + * Note: @r1 is high dimension + * @r2 is low dimension + * */ +char SZ_compress_args_double_NoCkRngeNoGzip_2D(int cmprType, unsigned char** newByteData, double *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize, double valueRangeSize, double medianValue_d) +{ + size_t dataLength = r1*r2; + char compressionType = 0; + TightDataPointStorageD* tdps = NULL; +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + { + int timestep = sz_tsc->currentStep; + if(cmprType == SZ_PERIO_TEMPORAL_COMPRESSION) + { + if(timestep % confparams_cpr->snapshotCmprStep != 0) + { + tdps = SZ_compress_double_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_d); + compressionType = 1; //time-series based compression + } + else + { + tdps = SZ_compress_double_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_d); + compressionType = 0; //snapshot-based compression + multisteps->lastSnapshotStep = timestep; + } + } + else if(cmprType == SZ_FORCE_SNAPSHOT_COMPRESSION) + { + tdps = SZ_compress_double_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_d); + compressionType = 0; //snapshot-based compression + multisteps->lastSnapshotStep = timestep; + } + else if(cmprType == SZ_FORCE_TEMPORAL_COMPRESSION) + { + tdps = SZ_compress_double_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_d); + compressionType = 1; //time-series based compression + } + } + else +#endif + tdps = SZ_compress_double_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_d); + + convertTDPStoFlatBytes_double(tdps, newByteData, outSize); + + if(*outSize>3 + MetaDataByteLength_double + exe_params->SZ_SIZE_TYPE + 1 + sizeof(double)*dataLength) + SZ_compress_args_double_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageD(tdps); + return compressionType; +} + +TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, double valueRangeSize, double medianValue_d) +{ +#ifdef HAVE_TIMECMPR + double* decData = NULL; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData = (double*)(multisteps->hist_data); +#endif + + double recip_realPrecision = 1/realPrecision; + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_double_3D_opt(oriData, r1, r2, r3, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + int intvRadius = quantization_intervals/2; + + size_t i,j,k; + int reqLength; + double pred1D, pred2D, pred3D; + double diff = 0.0; + double itvNum = 0; + double *P0, *P1; + + size_t dataLength = r1*r2*r3; + + size_t r23 = r2*r3; + + P0 = (double*)malloc(r23*sizeof(double)); + P1 = (double*)malloc(r23*sizeof(double)); + + double medianValue = medianValue_d; + short radExpo = getExponent_double(valueRangeSize/2); + computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue); + + int* type = (int*) malloc(dataLength*sizeof(int)); + //type[dataLength]=0; + + double* spaceFillingValue = oriData; // + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + type[0] = 0; + + unsigned char preDataBytes[8]; + longToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + + DoubleValueCompressElement *vce = (DoubleValueCompressElement*)malloc(sizeof(DoubleValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + type[0] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[0] = vce->data; +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[0] = P1[0]; +#endif + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + diff = spaceFillingValue[1] - pred1D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + intvRadius; + P1[1] = pred1D + 2 * (type[1] - intvRadius) * realPrecision; + } + else + { + type[1] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[1] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[1] = P1[1]; +#endif + + /* Process Row-0 data 2 --> data r3-1 */ + for (j = 2; j < r3; j++) + { + pred1D = 2*P1[j-1] - P1[j-2]; + diff = spaceFillingValue[j] - pred1D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + intvRadius; + P1[j] = pred1D + 2 * (type[j] - intvRadius) * realPrecision; + } + else + { + type[j] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[j], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[j] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[j] = P1[j]; +#endif + } + + /* Process Row-1 --> Row-r2-1 */ + size_t index; + for (i = 1; i < r2; i++) + { + /* Process row-i data 0 */ + index = i*r3; + pred1D = P1[index-r3]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P1[index] = pred1D + 2 * (type[index] - intvRadius) * realPrecision; + } + else + { + type[index] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P1[index]; +#endif + + /* Process row-i data 1 --> data r3-1*/ + for (j = 1; j < r3; j++) + { + index = i*r3+j; + pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P1[index] = pred2D + 2 * (type[index] - intvRadius) * realPrecision; + } + else + { + type[index] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P1[index]; +#endif + } + } + + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (k = 1; k < r1; k++) + { + /* Process Row-0 data 0*/ + index = k*r23; + pred1D = P1[0]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P0[0] = pred1D + 2 * (type[index] - intvRadius) * realPrecision; + } + else + { + type[index] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[0] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P0[0]; +#endif + + /* Process Row-0 data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + //index = k*r2*r3+j; + index ++; + pred2D = P0[j-1] + P1[j] - P1[j-1]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P0[j] = pred2D + 2 * (type[index] - intvRadius) * realPrecision; + } + else + { + type[index] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[j] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P0[j]; +#endif + } + + /* Process Row-1 --> Row-r2-1 */ + size_t index2D; + for (i = 1; i < r2; i++) + { + /* Process Row-i data 0 */ + index = k*r23 + i*r3; + index2D = i*r3; + pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred2D + 2 * (type[index] - intvRadius) * realPrecision; + } + else + { + type[index] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P0[index2D]; +#endif + + /* Process Row-i data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + //index = k*r2*r3 + i*r3 + j; + index ++; + index2D = i*r3 + j; + pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1]; + diff = spaceFillingValue[index] - pred3D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred3D + 2 * (type[index] - intvRadius) * realPrecision; + } + else + { + type[index] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P0[index2D]; +#endif + } + } + + double *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + if(r23!=1) + free(P0); + free(P1); + size_t exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageD* tdps; + + new_TightDataPointStorageD(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0); + +// printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n", +// exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size); + +// for(i = 3800;i<3844;i++) +// printf("exactLeadNumArray->array[%d]=%d\n",i,exactLeadNumArray->array[i]); + + //free memory + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + free(vce); + free(lce); + free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + + +char SZ_compress_args_double_NoCkRngeNoGzip_3D(int cmprType, unsigned char** newByteData, double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, double valueRangeSize, double medianValue_d) +{ + size_t dataLength = r1*r2*r3; + char compressionType = 0; + TightDataPointStorageD* tdps = NULL; +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + { + int timestep = sz_tsc->currentStep; + if(cmprType == SZ_PERIO_TEMPORAL_COMPRESSION) + { + if(timestep % confparams_cpr->snapshotCmprStep != 0) + { + tdps = SZ_compress_double_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_d); + compressionType = 1; //time-series based compression + } + else + { + if(confparams_cpr->withRegression == SZ_NO_REGRESSION) + tdps = SZ_compress_double_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_d); + else + *newByteData = SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(oriData, r1, r2, r3, realPrecision, outSize); + compressionType = 0; //snapshot-based compression + multisteps->lastSnapshotStep = timestep; + } + } + else if(cmprType == SZ_FORCE_SNAPSHOT_COMPRESSION) + { + if(confparams_cpr->withRegression == SZ_NO_REGRESSION) + tdps = SZ_compress_double_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_d); + else + *newByteData = SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(oriData, r1, r2, r3, realPrecision, outSize); + compressionType = 0; //snapshot-based compression + multisteps->lastSnapshotStep = timestep; + } + else if(cmprType == SZ_FORCE_TEMPORAL_COMPRESSION) + { + tdps = SZ_compress_double_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_d); + compressionType = 1; //time-series based compression + } + } + else +#endif + tdps = SZ_compress_double_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_d); + + if(tdps!=NULL) + { + convertTDPStoFlatBytes_double(tdps, newByteData, outSize); + if(*outSize>3 + MetaDataByteLength_double + exe_params->SZ_SIZE_TYPE + 1 + sizeof(double)*dataLength) + SZ_compress_args_double_StoreOriData(oriData, dataLength, newByteData, outSize); + free_TightDataPointStorageD(tdps); + } + + return compressionType; +} + +TightDataPointStorageD* SZ_compress_double_4D_MDQ(double *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, double valueRangeSize, double medianValue_d) +{ + double recip_realPrecision = 1/realPrecision; + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_double_4D(oriData, r1, r2, r3, r4, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + int intvRadius = quantization_intervals/2; + + size_t i,j,k; + int reqLength; + double pred1D, pred2D, pred3D; + double diff = 0.0; + double itvNum = 0; + double *P0, *P1; + + size_t dataLength = r1*r2*r3*r4; + + size_t r234 = r2*r3*r4; + size_t r34 = r3*r4; + + P0 = (double*)malloc(r34*sizeof(double)); + P1 = (double*)malloc(r34*sizeof(double)); + + double medianValue = medianValue_d; + short radExpo = getExponent_double(valueRangeSize/2); + computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + double* spaceFillingValue = oriData; // + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + unsigned char preDataBytes[8]; + longToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + + DoubleValueCompressElement *vce = (DoubleValueCompressElement*)malloc(sizeof(DoubleValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + + size_t l; + for (l = 0; l < r1; l++) + { + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + size_t index = l*r234; + size_t index2D = 0; + + type[index] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + + /* Process Row-0 data 1*/ + index = l*r234+1; + index2D = 1; + + pred1D = P1[index2D-1]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P1[index2D] = pred1D + 2 * (type[index] - intvRadius) * realPrecision; + } + else + { + type[index] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + } + + /* Process Row-0 data 2 --> data r4-1 */ + for (j = 2; j < r4; j++) + { + index = l*r234+j; + index2D = j; + + pred1D = 2*P1[index2D-1] - P1[index2D-2]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P1[index2D] = pred1D + 2 * (type[index] - intvRadius) * realPrecision; + } + else + { + type[index] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (i = 1; i < r3; i++) + { + /* Process row-i data 0 */ + index = l*r234+i*r4; + index2D = i*r4; + + pred1D = P1[index2D-r4]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P1[index2D] = pred1D + 2 * (type[index] - intvRadius) * realPrecision; + } + else + { + type[index] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + } + + /* Process row-i data 1 --> data r4-1*/ + for (j = 1; j < r4; j++) + { + index = l*r234+i*r4+j; + index2D = i*r4+j; + + pred2D = P1[index2D-1] + P1[index2D-r4] - P1[index2D-r4-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P1[index2D] = pred2D + 2 * (type[index] - intvRadius) * realPrecision; + } + else + { + type[index] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + } + } + } + + + /////////////////////////// Process layer-1 --> layer-r2-1 /////////////////////////// + + for (k = 1; k < r2; k++) + { + /* Process Row-0 data 0*/ + index = l*r234+k*r34; + index2D = 0; + + pred1D = P1[index2D]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred1D + 2 * (type[index] - intvRadius) * realPrecision; + } + else + { + type[index] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + + + /* Process Row-0 data 1 --> data r4-1 */ + for (j = 1; j < r4; j++) + { + index = l*r234+k*r34+j; + index2D = j; + + pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred2D + 2 * (type[index] - intvRadius) * realPrecision; + } + else + { + type[index] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (i = 1; i < r3; i++) + { + /* Process Row-i data 0 */ + index = l*r234+k*r34+i*r4; + index2D = i*r4; + + pred2D = P0[index2D-r4] + P1[index2D] - P1[index2D-r4]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred2D + 2 * (type[index] - intvRadius) * realPrecision; + } + else + { + type[index] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + + /* Process Row-i data 1 --> data r4-1 */ + for (j = 1; j < r4; j++) + { + index = l*r234+k*r34+i*r4+j; + index2D = i*r4+j; + + pred3D = P0[index2D-1] + P0[index2D-r4]+ P1[index2D] - P0[index2D-r4-1] - P1[index2D-r4] - P1[index2D-1] + P1[index2D-r4-1]; + diff = spaceFillingValue[index] - pred3D; + + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred3D + 2 * (type[index] - intvRadius) * realPrecision; + } + else + { + type[index] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + } + } + + double *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + } + + free(P0); + free(P1); + size_t exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageD* tdps; + + new_TightDataPointStorageD(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0); + + //free memory + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + free(vce); + free(lce); + free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + + +char SZ_compress_args_double_NoCkRngeNoGzip_4D(unsigned char** newByteData, double *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, size_t *outSize, double valueRangeSize, double medianValue_d) +{ + TightDataPointStorageD* tdps = SZ_compress_double_4D_MDQ(oriData, r1, r2, r3, r4, realPrecision, valueRangeSize, medianValue_d); + + convertTDPStoFlatBytes_double(tdps, newByteData, outSize); + + size_t dataLength = r1*r2*r3*r4; + if(*outSize>3 + MetaDataByteLength_double + exe_params->SZ_SIZE_TYPE + 1 + sizeof(double)*dataLength) + SZ_compress_args_double_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageD(tdps); + return 0; +} + +/*MSST19*/ +TightDataPointStorageD* SZ_compress_double_1D_MDQ_MSST19(double *oriData, +size_t dataLength, double realPrecision, double valueRangeSize, double medianValue_f) +{ +#ifdef HAVE_TIMECMPR + double* decData = NULL; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData = (double*)(multisteps->hist_data); +#endif + + //struct ClockPoint clockPointBuild; + //TimeDurationStart("build", &clockPointBuild); + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + quantization_intervals = optimize_intervals_double_1D_opt_MSST19(oriData, dataLength, realPrecision); + else + quantization_intervals = exe_params->intvCapacity; + //updateQuantizationInfo(quantization_intervals); + int intvRadius = quantization_intervals/2; + + double* precisionTable = (double*)malloc(sizeof(double) * quantization_intervals); + double inv = 2.0-pow(2, -(confparams_cpr->plus_bits)); + for(int i=0; iplus_bits); + + size_t i; + int reqLength; + double medianValue = medianValue_f; + //double medianInverse = 1 / medianValue_f; + //short radExpo = getExponent_double(realPrecision); + + reqLength = computeReqLength_double_MSST19(realPrecision); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + double* spaceFillingValue = oriData; // + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, dataLength/2/8); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, dataLength/2); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + unsigned char preDataBytes[8]; + intToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + double last3CmprsData[3] = {0}; + + //size_t miss=0, hit=0; + + DoubleValueCompressElement *vce = (DoubleValueCompressElement*)malloc(sizeof(DoubleValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + //add the first data + type[0] = 0; + compressSingleDoubleValue_MSST19(vce, spaceFillingValue[0], realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + listAdd_double(last3CmprsData, vce->data); + //miss++; +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[0] = vce->data; +#endif + + //add the second data + type[1] = 0; + compressSingleDoubleValue_MSST19(vce, spaceFillingValue[1], realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + listAdd_double(last3CmprsData, vce->data); + //miss++; +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[1] = vce->data; +#endif + int state; + //double checkRadius; + double curData; + double pred = vce->data; + + double predRelErrRatio; + + const uint64_t top = levelTable.topIndex, base = levelTable.baseIndex; + const uint64_t range = top - base; + const int bits = levelTable.bits; + uint64_t* const buffer = (uint64_t*)&predRelErrRatio; + const int shift = 52-bits; + uint64_t expoIndex, mantiIndex; + uint16_t* tables[range+1]; + for(int i=0; i<=range; i++){ + tables[i] = levelTable.subTables[i].table; + } + + for(i=2;i> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if(state) + { + type[i] = state; + pred *= precisionTable[state]; + //hit++; + continue; + } + + //unpredictable data processing + type[i] = 0; + compressSingleDoubleValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + pred = vce->data; + //miss++; +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[i] = vce->data; +#endif + + }//end of for + +// printf("miss:%d, hit:%d\n", miss, hit); + + size_t exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageD* tdps; + + new_TightDataPointStorageD(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0); + tdps->plus_bits = confparams_cpr->plus_bits; + + //free memory + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + free(vce); + free(lce); + free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps); + free(precisionTable); + freeTopLevelTableWideInterval(&levelTable); + return tdps; +} + +TightDataPointStorageD* SZ_compress_double_2D_MDQ_MSST19(double *oriData, size_t r1, size_t r2, double realPrecision, double valueRangeSize, double medianValue_f) +{ +#ifdef HAVE_TIMECMPR + double* decData = NULL; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData = (double*)(multisteps->hist_data); +#endif + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_double_2D_opt_MSST19(oriData, r1, r2, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + int intvRadius = quantization_intervals/2; + + double* precisionTable = (double*)malloc(sizeof(double) * quantization_intervals); + double inv = 2.0-pow(2, -(confparams_cpr->plus_bits)); + for(int i=0; iplus_bits); + + size_t i,j; + int reqLength; + double pred1D, pred2D; + //double diff = 0.0; + //double itvNum = 0; + double *P0, *P1; + double predRelErrRatio; + + size_t dataLength = r1*r2; + + P0 = (double*)malloc(r2*sizeof(double)); + memset(P0, 0, r2*sizeof(double)); + P1 = (double*)malloc(r2*sizeof(double)); + memset(P1, 0, r2*sizeof(double)); + + double medianValue = medianValue_f; + //double medianValueInverse = 1 / medianValue_f; + //short radExpo = getExponent_double(valueRangeSize/2); + reqLength = computeReqLength_double_MSST19(realPrecision); + + int* type = (int*) malloc(dataLength*sizeof(int)); + //type[dataLength]=0; + + double* spaceFillingValue = oriData; // + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + type[0] = 0; + unsigned char preDataBytes[8]; + intToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + + DoubleValueCompressElement *vce = (DoubleValueCompressElement*)malloc(sizeof(DoubleValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + const uint64_t top = levelTable.topIndex, base = levelTable.baseIndex; + const uint64_t range = top - base; + const int bits = levelTable.bits; + uint64_t* const buffer = (uint64_t*)&predRelErrRatio; + const int shift = 52-bits; + uint64_t expoIndex, mantiIndex; + uint16_t* tables[range+1]; + for(int i=0; i<=range; i++){ + tables[i] = levelTable.subTables[i].table; + } + + /* Process Row-0 data 0*/ + type[0] = 0; + compressSingleDoubleValue_MSST19(vce, spaceFillingValue[0], realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[0] = vce->data; +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[0] = vce->data; +#endif + + double curData; + int state; + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + + curData = spaceFillingValue[1]; + predRelErrRatio = curData / pred1D; + + expoIndex = ((*buffer & 0x7fffffffffffffff) >> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if (state) + { + type[1] = state; + P1[1] = fabs(pred1D) * precisionTable[state]; + } + else + { + type[1] = 0; + compressSingleDoubleValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[1] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[1] = P1[1]; +#endif + + /* Process Row-0 data 2 --> data r2-1 */ + for (j = 2; j < r2; j++) + { + pred1D = P1[j-1] * P1[j-1] / P1[j-2]; + curData = spaceFillingValue[j]; + predRelErrRatio = curData / pred1D; + + expoIndex = ((*buffer & 0x7fffffffffffffff) >> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if (state) + { + type[j] = state; + P1[j] = fabs(pred1D) * precisionTable[state]; + } + else + { + type[j] = 0; + compressSingleDoubleValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[j] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[j] = P1[j]; +#endif + } + + /* Process Row-1 --> Row-r1-1 */ + size_t index; + for (i = 1; i < r1; i++) + { + /* Process row-i data 0 */ + index = i*r2; + pred1D = P1[0]; + curData = spaceFillingValue[index]; + predRelErrRatio = curData / pred1D; + + expoIndex = ((*buffer & 0x7fffffffffffffff) >> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if (state) + { + type[index] = state; + P0[0] = fabs(pred1D) * precisionTable[state]; + } + else + { + type[index] = 0; + compressSingleDoubleValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[0] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P0[0]; +#endif + + /* Process row-i data 1 --> r2-1*/ + for (j = 1; j < r2; j++) + { + index = i*r2+j; + pred2D = P0[j-1] * P1[j] / P1[j-1]; + + curData = spaceFillingValue[index]; + predRelErrRatio = curData / pred2D; + + expoIndex = ((*buffer & 0x7fffffffffffffff) >> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if (state) + { + type[index] = state; + P0[j] = fabs(pred2D) * precisionTable[state]; + } + else + { + type[index] = 0; + compressSingleDoubleValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[j] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P0[j]; +#endif + } + + double *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + + if(r2!=1) + free(P0); + free(P1); + size_t exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageD* tdps; + + new_TightDataPointStorageD(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0); + tdps->plus_bits = confparams_cpr->plus_bits; + + //free memory + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + free(vce); + free(lce); + free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps); + free(precisionTable); + freeTopLevelTableWideInterval(&levelTable); + return tdps; +} + +TightDataPointStorageD* SZ_compress_double_3D_MDQ_MSST19(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, double valueRangeSize, double medianValue_f) +{ +#ifdef HAVE_TIMECMPR + double* decData = NULL; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData = (double*)(multisteps->hist_data); +#endif + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_double_3D_opt_MSST19(oriData, r1, r2, r3, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + int intvRadius = quantization_intervals/2; + + double* precisionTable = (double*)malloc(sizeof(double) * quantization_intervals); + double inv = 2.0-pow(2, -(confparams_cpr->plus_bits)); + for(int i=0; iplus_bits); + + size_t i,j,k; + int reqLength; + double pred1D, pred2D, pred3D; + //double diff = 0.0; + //double itvNum = 0; + double *P0, *P1; + double predRelErrRatio; + + size_t dataLength = r1*r2*r3; + size_t r23 = r2*r3; + P0 = (double*)malloc(r23*sizeof(double)); + P1 = (double*)malloc(r23*sizeof(double)); + + double medianValue = medianValue_f; + //double medianValueInverse = 1/ medianValue_f; + //short radExpo = getExponent_double(valueRangeSize/2); + reqLength = computeReqLength_double_MSST19(realPrecision); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + double* spaceFillingValue = oriData; // + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + unsigned char preDataBytes[8]; + longToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + + DoubleValueCompressElement *vce = (DoubleValueCompressElement*)malloc(sizeof(DoubleValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + const uint64_t top = levelTable.topIndex, base = levelTable.baseIndex; + const uint64_t range = top - base; + const int bits = levelTable.bits; + uint64_t* const buffer = (uint64_t*)&predRelErrRatio; + const int shift = 52-bits; + uint64_t expoIndex, mantiIndex; + uint16_t* tables[range+1]; + for(int i=0; i<=range; i++){ + tables[i] = levelTable.subTables[i].table; + } + int state; + + double temp, temp2; + + + //size_t miss=0, hit=0; + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + type[0] = 0; + compressSingleDoubleValue_MSST19(vce, spaceFillingValue[0], realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[0] = vce->data; + //miss++; +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[0] = P1[0]; +#endif + + double curData; + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + curData = spaceFillingValue[1]; + predRelErrRatio = curData / pred1D; + + expoIndex = ((*buffer & 0x7fffffffffffffff) >> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if (state) + { + type[1] = state; + P1[1] = fabs(pred1D) * precisionTable[state]; + //hit++; + } + else + { + type[1] = 0; + compressSingleDoubleValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[1] = vce->data; + //miss++; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[1] = P1[1]; +#endif + + /* Process Row-0 data 2 --> data r3-1 */ + for (j = 2; j < r3; j++) + { + temp = P1[j-1]; + pred1D = temp * temp / P1[j-2]; + curData = spaceFillingValue[j]; + predRelErrRatio = curData / pred1D; + + expoIndex = ((*buffer & 0x7fffffffffffffff) >> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if (state) + { + type[j] = state; + P1[j] = fabs(pred1D) * precisionTable[state]; + //hit++; + } + else + { + type[j] = 0; + compressSingleDoubleValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength);; + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[j] = vce->data; + //miss++; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[j] = P1[j]; +#endif + } + + /* Process Row-1 --> Row-r2-1 */ + size_t index; + for (i = 1; i < r2; i++) + { + /* Process row-i data 0 */ + index = i*r3; + pred1D = P1[index-r3]; + curData = spaceFillingValue[index]; + predRelErrRatio = curData / pred1D; + + expoIndex = ((*buffer & 0x7fffffffffffffff) >> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if (state) + { + type[index] = state; + P1[index] = pred1D * precisionTable[state]; + //hit++; + } + else + { + type[index] = 0; + compressSingleDoubleValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index] = vce->data; + //miss++; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P1[index]; +#endif + + /* Process row-i data 1 --> data r3-1*/ + for (j = 1; j < r3; j++) + { + index = i*r3+j; + temp = P1[index-1]; + pred2D = temp * P1[index-r3] / P1[index-r3-1]; + //double a = P1[index-1]; + //double b = P1[index-r3]; + //double c = P1[index-r3-1]; + + curData = spaceFillingValue[index]; + predRelErrRatio = curData / pred2D; + + expoIndex = ((*buffer & 0x7fffffffffffffff) >> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if (state) + { + type[index] = state; + P1[index] = fabs(pred2D) * precisionTable[state]; + //hit++; + } + else + { + type[index] = 0; + compressSingleDoubleValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index] = vce->data; + //miss++; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P1[index]; +#endif + } + } + + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (k = 1; k < r1; k++) + { + /* Process Row-0 data 0*/ + index = k*r23; + pred1D = P1[0]; + curData = spaceFillingValue[index]; + predRelErrRatio = curData / pred1D; + + expoIndex = ((*buffer & 0x7fffffffffffffff) >> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if (state) + { + type[index] = state; + P0[0] = fabs(pred1D) * precisionTable[state]; + //hit++; + } + else + { + type[index] = 0; + compressSingleDoubleValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[0] = vce->data; + //miss++; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P0[0]; +#endif + + /* Process Row-0 data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + //index = k*r2*r3+j; + index ++; + temp = P0[j-1]; + pred2D = temp * P1[j] / P1[j-1]; + curData = spaceFillingValue[index]; + predRelErrRatio = curData / pred2D; + + expoIndex = ((*buffer & 0x7fffffffffffffff) >> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if (state) + { + type[index] = state; + P0[j] = fabs(pred2D) * precisionTable[state]; + //hit++; + } + else + { + type[index] = 0; + compressSingleDoubleValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[j] = vce->data; + //miss++; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P0[j]; +#endif + } + + /* Process Row-1 --> Row-r2-1 */ + size_t index2D; + for (i = 1; i < r2; i++) + { + /* Process Row-i data 0 */ + index = k*r23 + i*r3; + index2D = i*r3; + temp = P0[index2D-r3]; + pred2D = temp * P1[index2D] / P1[index2D-r3]; + curData = spaceFillingValue[index]; + predRelErrRatio = curData / pred2D; + + expoIndex = ((*buffer & 0x7fffffffffffffff) >> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if (state) + { + type[index] = state; + P0[index2D] = fabs(pred2D) * precisionTable[state]; + //hit++; + } + else + { + type[index] = 0; + compressSingleDoubleValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + //miss++; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P0[index2D]; +#endif + + /* Process Row-i data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + index ++; + index2D = i*r3 + j; + //pred3D = P0[index2D-1] * P0[index2D-r3] * P1[index2D] / P0[index2D-r3-1] / P1[index2D-r3] / P1[index2D-1] * P1[index2D-r3-1]; + temp = P0[index2D-1]; + temp2 = P0[index2D-r3-1]; + pred3D = temp * P0[index2D-r3] * P1[index2D] * P1[index2D-r3-1] / (temp2 * P1[index2D-r3] * P1[index2D-1]); + + curData = spaceFillingValue[index]; + predRelErrRatio = curData / pred3D; + + expoIndex = ((*buffer & 0x7fffffffffffffff) >> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if (state) + { + type[index] = state; + P0[index2D] = fabs(pred3D) * precisionTable[state]; + //hit++; + } + else + { + type[index] = 0; + compressSingleDoubleValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + //miss++; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P0[index2D]; +#endif + } + } + + double *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + if(r23!=1) + free(P0); + free(P1); + size_t exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageD* tdps; + + new_TightDataPointStorageD(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0); + tdps->plus_bits = confparams_cpr->plus_bits; + + //free memory + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + free(vce); + free(lce); + free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps); + free(precisionTable); + freeTopLevelTableWideInterval(&levelTable); + return tdps; +} +void SZ_compress_args_double_withinRange(unsigned char** newByteData, double *oriData, size_t dataLength, size_t *outSize) +{ + TightDataPointStorageD* tdps = (TightDataPointStorageD*) malloc(sizeof(TightDataPointStorageD)); + tdps->rtypeArray = NULL; + tdps->typeArray = NULL; + tdps->leadNumArray = NULL; + tdps->residualMidBits = NULL; + + tdps->allSameData = 1; + tdps->dataSeriesLength = dataLength; + tdps->exactMidBytes = (unsigned char*)malloc(sizeof(unsigned char)*8); + tdps->pwrErrBoundBytes = NULL; + tdps->isLossless = 0; + double value = oriData[0]; + doubleToBytes(tdps->exactMidBytes, value); + tdps->exactMidBytes_size = 8; + + size_t tmpOutSize; + //unsigned char *tmpByteData; + convertTDPStoFlatBytes_double(tdps, newByteData, &tmpOutSize); + //convertTDPStoFlatBytes_double(tdps, &tmpByteData, &tmpOutSize); + + //*newByteData = (unsigned char*)malloc(sizeof(unsigned char)*16); //for floating-point data (1+3+4+4) + //memcpy(*newByteData, tmpByteData, 16); + *outSize = tmpOutSize;//12==3+1+8(double_size)+MetaDataByteLength_double + free_TightDataPointStorageD(tdps); +} + +/*int SZ_compress_args_double_wRngeNoGzip(unsigned char** newByteData, double *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio, double pwrErrRatio) +{ + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + double valueRangeSize = 0, medianValue = 0; + + double min = computeRangeSize_double(oriData, dataLength, &valueRangeSize, &medianValue); + double max = min+valueRangeSize; + double realPrecision = getRealPrecision_double(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status); + + if(valueRangeSize <= realPrecision) + { + SZ_compress_args_double_withinRange(newByteData, oriData, dataLength, outSize); + } + else + { + if(r5==0&&r4==0&&r3==0&&r2==0) + { + if(errBoundMode>=PW_REL) + { + SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr(newByteData, oriData, pwrErrRatio, r1, outSize, min, max); + //SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize); + } + else + SZ_compress_args_double_NoCkRngeNoGzip_1D(newByteData, oriData, r1, realPrecision, outSize, valueRangeSize, medianValue); + } + else if(r5==0&&r4==0&&r3==0) + { + if(errBoundMode>=PW_REL) + SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr(newByteData, oriData, realPrecision, r2, r1, outSize, min, max); + else + SZ_compress_args_double_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize, valueRangeSize, medianValue); + } + else if(r5==0&&r4==0) + { + if(errBoundMode>=PW_REL) + SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr(newByteData, oriData, realPrecision, r3, r2, r1, outSize, min, max); + else + SZ_compress_args_double_NoCkRngeNoGzip_3D(newByteData, oriData, r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue); + } + else if(r5==0) + { + if(errBoundMode>=PW_REL) + SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr(newByteData, oriData, realPrecision, r4*r3, r2, r1, outSize, min, max); + else + SZ_compress_args_double_NoCkRngeNoGzip_3D(newByteData, oriData, r4*r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue); + } + } + return status; +}*/ + +int SZ_compress_args_double(int cmprType, int withRegression, unsigned char** newByteData, double *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRatio) +{ + confparams_cpr->errorBoundMode = errBoundMode; + if(errBoundMode==PW_REL) + { + confparams_cpr->pw_relBoundRatio = pwRelBoundRatio; + } + + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + + if(dataLength <= MIN_NUM_OF_ELEMENTS) + { + *newByteData = SZ_skip_compress_double(oriData, dataLength, outSize); + return status; + } + + double valueRangeSize = 0, medianValue = 0; + + unsigned char * signs = NULL; + bool positive = true; + double nearZero = 0.0; + double min = 0; + if(pwRelBoundRatio < 0.000009999) + confparams_cpr->accelerate_pw_rel_compression = 0; + + if(confparams_cpr->errorBoundMode == PW_REL && confparams_cpr->accelerate_pw_rel_compression == 1) + { + signs = (unsigned char *) malloc(dataLength); + memset(signs, 0, dataLength); + min = computeRangeSize_double_MSST19(oriData, dataLength, &valueRangeSize, &medianValue, signs, &positive, &nearZero); + } + else + min = computeRangeSize_double(oriData, dataLength, &valueRangeSize, &medianValue); + double max = min+valueRangeSize; + confparams_cpr->dmin = min; + confparams_cpr->dmax = max; + + double realPrecision = 0; + + if(confparams_cpr->errorBoundMode==PSNR) + { + confparams_cpr->errorBoundMode = ABS; + realPrecision = confparams_cpr->absErrBound = computeABSErrBoundFromPSNR(confparams_cpr->psnr, (double)confparams_cpr->predThreshold, valueRangeSize); + } + else if(confparams_cpr->errorBoundMode==NORM) //norm error = sqrt(sum((xi-xi_)^2)) + { + confparams_cpr->errorBoundMode = ABS; + realPrecision = confparams_cpr->absErrBound = computeABSErrBoundFromNORM_ERR(confparams_cpr->normErr, dataLength); + //printf("realPrecision=%lf\n", realPrecision); + } + else + { + realPrecision = getRealPrecision_double(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status); + confparams_cpr->absErrBound = realPrecision; + } + if(valueRangeSize <= realPrecision) + { + if(confparams_cpr->errorBoundMode>=PW_REL && confparams_cpr->accelerate_pw_rel_compression == 1) + free(signs); + SZ_compress_args_double_withinRange(newByteData, oriData, dataLength, outSize); + } + else + { + size_t tmpOutSize = 0; + unsigned char* tmpByteData; + if (r2==0) + { + if(confparams_cpr->errorBoundMode>=PW_REL) + { + if(confparams_cpr->accelerate_pw_rel_compression && confparams_cpr->maxRangeRadius <= 32768) + SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_pre_log_MSST19(&tmpByteData, oriData, pwRelBoundRatio, r1, &tmpOutSize, valueRangeSize, medianValue, signs, &positive, min, max, nearZero); + else + SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r1, &tmpOutSize, min, max); + //SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(&tmpByteData, oriData, r1, absErr_Bound, relBoundRatio, pwRelBoundRatio, valueRangeSize, medianValue, &tmpOutSize); + } + else +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_1D(cmprType, &tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + else +#endif + { + SZ_compress_args_double_NoCkRngeNoGzip_1D(cmprType, &tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + if(tmpOutSize>=dataLength*sizeof(double) + 3 + MetaDataByteLength_double + exe_params->SZ_SIZE_TYPE + 1) + SZ_compress_args_double_StoreOriData(oriData, dataLength, &tmpByteData, &tmpOutSize); + } + } + else + if (r3==0) + { + if(confparams_cpr->errorBoundMode>=PW_REL) + { + if(confparams_cpr->accelerate_pw_rel_compression && confparams_cpr->maxRangeRadius <= 32768) + SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_pre_log_MSST19(&tmpByteData, oriData, pwRelBoundRatio, r2, r1, &tmpOutSize, valueRangeSize, signs, &positive, min, max, nearZero); + else + SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r2, r1, &tmpOutSize, min, max); + } + else +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_2D(cmprType, &tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + else +#endif + { + if(withRegression == SZ_NO_REGRESSION) + SZ_compress_args_double_NoCkRngeNoGzip_2D(cmprType, &tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + else + { + tmpByteData = SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(oriData, r2, r1, realPrecision, &tmpOutSize); + if(tmpOutSize>=dataLength*sizeof(double) + 3 + MetaDataByteLength_double + exe_params->SZ_SIZE_TYPE + 1) + SZ_compress_args_double_StoreOriData(oriData, dataLength, &tmpByteData, &tmpOutSize); + } + } + } + else + if (r4==0) + { + if(confparams_cpr->errorBoundMode>=PW_REL) + { + if(confparams_cpr->accelerate_pw_rel_compression && confparams_cpr->maxRangeRadius <= 32768) + SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log_MSST19(&tmpByteData, oriData, pwRelBoundRatio, r3, r2, r1, &tmpOutSize, valueRangeSize, signs, &positive, min, max, nearZero); + else + SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r3, r2, r1, &tmpOutSize, min, max); + } + else +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_3D(cmprType, &tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + else +#endif + { + if(withRegression == SZ_NO_REGRESSION) + SZ_compress_args_double_NoCkRngeNoGzip_3D(cmprType, &tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + else + { + tmpByteData = SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(oriData, r3, r2, r1, realPrecision, &tmpOutSize); + if(tmpOutSize>=dataLength*sizeof(double) + 3 + MetaDataByteLength_double + exe_params->SZ_SIZE_TYPE + 1) + SZ_compress_args_double_StoreOriData(oriData, dataLength, &tmpByteData, &tmpOutSize); + } + } + + + } + else + if (r5==0) + { + if(confparams_cpr->errorBoundMode>=PW_REL) + { + if(confparams_cpr->accelerate_pw_rel_compression && confparams_cpr->maxRangeRadius <= 32768) + SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log_MSST19(&tmpByteData, oriData, pwRelBoundRatio, r4*r3, r2, r1, &tmpOutSize, valueRangeSize, signs, &positive, min, max, nearZero); + else + SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r4*r3, r2, r1, &tmpOutSize, min, max); + } + else +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + else +#endif + { + if(withRegression == SZ_NO_REGRESSION) + SZ_compress_args_double_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + else + { + tmpByteData = SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(oriData, r4*r3, r2, r1, realPrecision, &tmpOutSize); + if(tmpOutSize>=dataLength*sizeof(double) + 3 + MetaDataByteLength_double + exe_params->SZ_SIZE_TYPE + 1) + SZ_compress_args_double_StoreOriData(oriData, dataLength, &tmpByteData, &tmpOutSize); + } + } + + } + else + { + printf("Error: doesn't support 5 dimensions for now.\n"); + status = SZ_DERR; + } + + //Call Gzip to do the further compression. + if(confparams_cpr->szMode==SZ_BEST_SPEED) + { + *outSize = tmpOutSize; + *newByteData = tmpByteData; + } + else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION || confparams_cpr->szMode==SZ_TEMPORAL_COMPRESSION) + { + *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); + free(tmpByteData); + } + else + { + printf("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n"); + status = SZ_MERR; + } + } + + return status; +} + +//TODO +int SZ_compress_args_double_subblock(unsigned char* compressedBytes, double *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, +size_t s5, size_t s4, size_t s3, size_t s2, size_t s1, +size_t e5, size_t e4, size_t e3, size_t e2, size_t e1, +size_t *outSize, int errBoundMode, double absErr_Bound, double relBoundRatio) +{ + int status = SZ_SCES; + double valueRangeSize = 0, medianValue = 0; + computeRangeSize_double_subblock(oriData, &valueRangeSize, &medianValue, r5, r4, r3, r2, r1, s5, s4, s3, s2, s1, e5, e4, e3, e2, e1); + + double realPrecision = getRealPrecision_double(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status); + + if(valueRangeSize <= realPrecision) + { + //TODO + //SZ_compress_args_double_withinRange_subblock(); + } + else + { + if (r2==0) + { + //TODO + if(errBoundMode==PW_REL) + { + //TODO + //SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_subblock(); + printf ("Current subblock version does not support point-wise relative error bound.\n"); + } + else + SZ_compress_args_double_NoCkRnge_1D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r1, s1, e1); + } + else + if (r3==0) + { + if(errBoundMode>=PW_REL) + { + //TODO + //SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_subblock(); + printf ("Current subblock version does not support point-wise relative error bound.\n"); + } + else + SZ_compress_args_double_NoCkRnge_2D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r2, r1, s2, s1, e2, e1); + } + else + if (r4==0) + { + if(errBoundMode==PW_REL) + { + //TODO + //SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_subblock(); + printf ("Current subblock version does not support point-wise relative error bound.\n"); + } + else + SZ_compress_args_double_NoCkRnge_3D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r3, r2, r1, s3, s2, s1, e3, e2, e1); + } + else + if (r5==0) + { + if(errBoundMode==PW_REL) + { + //TODO + //SZ_compress_args_double_NoCkRngeNoGzip_4D_pwr_subblock(); + printf ("Current subblock version does not support point-wise relative error bound.\n"); + } + else + SZ_compress_args_double_NoCkRnge_4D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r4, r3, r2, r1, s4, s3, s2, s1, e4, e3, e2, e1); + } + else + { + printf("Error: doesn't support 5 dimensions for now.\n"); + status = SZ_DERR; //dimension error + } + } + return status; +} + +void SZ_compress_args_double_NoCkRnge_1D_subblock(unsigned char* compressedBytes, double *oriData, double realPrecision, size_t *outSize, double valueRangeSize, double medianValue_d, +size_t r1, size_t s1, size_t e1) +{ + TightDataPointStorageD* tdps = SZ_compress_double_1D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_d, r1, s1, e1); + + if (confparams_cpr->szMode==SZ_BEST_SPEED) + convertTDPStoFlatBytes_double_args(tdps, compressedBytes, outSize); + else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) + { + unsigned char *tmpCompBytes; + size_t tmpOutSize; + convertTDPStoFlatBytes_double(tdps, &tmpCompBytes, &tmpOutSize); + *outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode); + free(tmpCompBytes); + } + else + { + printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n"); + } + + //TODO +// if(*outSize>dataLength*sizeof(double)) +// SZ_compress_args_double_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageD(tdps); +} + +void SZ_compress_args_double_NoCkRnge_2D_subblock(unsigned char* compressedBytes, double *oriData, double realPrecision, size_t *outSize, double valueRangeSize, double medianValue_d, +size_t r2, size_t r1, size_t s2, size_t s1, size_t e2, size_t e1) +{ + TightDataPointStorageD* tdps = SZ_compress_double_2D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_d, r2, r1, s2, s1, e2, e1); + + if (confparams_cpr->szMode==SZ_BEST_SPEED) + convertTDPStoFlatBytes_double_args(tdps, compressedBytes, outSize); + else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) + { + unsigned char *tmpCompBytes; + size_t tmpOutSize; + convertTDPStoFlatBytes_double(tdps, &tmpCompBytes, &tmpOutSize); + *outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode); + free(tmpCompBytes); + } + else + { + printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n"); + } + + //TODO +// if(*outSize>dataLength*sizeof(double)) +// SZ_compress_args_double_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageD(tdps); +} + +void SZ_compress_args_double_NoCkRnge_3D_subblock(unsigned char* compressedBytes, double *oriData, double realPrecision, size_t *outSize, double valueRangeSize, double medianValue_d, +size_t r3, size_t r2, size_t r1, size_t s3, size_t s2, size_t s1, size_t e3, size_t e2, size_t e1) +{ + TightDataPointStorageD* tdps = SZ_compress_double_3D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_d, r3, r2, r1, s3, s2, s1, e3, e2, e1); + + if (confparams_cpr->szMode==SZ_BEST_SPEED) + convertTDPStoFlatBytes_double_args(tdps, compressedBytes, outSize); + else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) + { + unsigned char *tmpCompBytes; + size_t tmpOutSize; + convertTDPStoFlatBytes_double(tdps, &tmpCompBytes, &tmpOutSize); + *outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode); + free(tmpCompBytes); + } + else + { + printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n"); + } + + //TODO +// if(*outSize>dataLength*sizeof(double)) +// SZ_compress_args_double_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageD(tdps); +} + +void SZ_compress_args_double_NoCkRnge_4D_subblock(unsigned char* compressedBytes, double *oriData, double realPrecision, size_t *outSize, double valueRangeSize, double medianValue_d, +size_t r4, size_t r3, size_t r2, size_t r1, size_t s4, size_t s3, size_t s2, size_t s1, size_t e4, size_t e3, size_t e2, size_t e1) +{ + TightDataPointStorageD* tdps = SZ_compress_double_4D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_d, r4, r3, r2, r1, s4, s3, s2, s1, e4, e3, e2, e1); + + if (confparams_cpr->szMode==SZ_BEST_SPEED) + convertTDPStoFlatBytes_double_args(tdps, compressedBytes, outSize); + else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) + { + unsigned char *tmpCompBytes; + size_t tmpOutSize; + convertTDPStoFlatBytes_double(tdps, &tmpCompBytes, &tmpOutSize); + *outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode); + free(tmpCompBytes); + } + else + { + printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n"); + } + + //TODO +// if(*outSize>dataLength*sizeof(double)) +// SZ_compress_args_double_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageD(tdps); +} + + +unsigned int optimize_intervals_double_1D_subblock(double *oriData, double realPrecision, size_t r1, size_t s1, size_t e1) +{ + size_t dataLength = e1 - s1 + 1; + oriData = oriData + s1; + + size_t i = 0; + unsigned long radiusIndex; + double pred_value = 0, pred_err; + int *intervals = (int*)malloc(confparams_cpr->maxRangeRadius*sizeof(int)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int)); + size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance; + for(i=2;isampleDistance==0) + { + pred_value = 2*oriData[i-1] - oriData[i-2]; + //pred_value = oriData[i-1]; + pred_err = fabs(pred_value - oriData[i]); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + return powerOf2; +} + +unsigned int optimize_intervals_double_2D_subblock(double *oriData, double realPrecision, size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2) +{ + size_t R1 = e1 - s1 + 1; + size_t R2 = e2 - s2 + 1; + + size_t i,j, index; + unsigned long radiusIndex; + double pred_value = 0, pred_err; + int *intervals = (int*)malloc(confparams_cpr->maxRangeRadius*sizeof(int)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int)); + size_t totalSampleSize = R1*R2/confparams_cpr->sampleDistance; + for(i=s1+1;i<=e1;i++) + { + for(j=s2+1;j<=e2;j++) + { + if((i+j)%confparams_cpr->sampleDistance==0) + { + index = i*r2+j; + pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1]; + pred_err = fabs(pred_value - oriData[index]); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + return powerOf2; +} + +unsigned int optimize_intervals_double_3D_subblock(double *oriData, double realPrecision, size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, size_t e2, size_t e3) +{ + size_t R1 = e1 - s1 + 1; + size_t R2 = e2 - s2 + 1; + size_t R3 = e3 - s3 + 1; + + size_t r23 = r2*r3; + + size_t i,j,k, index; + unsigned long radiusIndex; + double pred_value = 0, pred_err; + int *intervals = (int*)malloc(confparams_cpr->maxRangeRadius*sizeof(int)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int)); + size_t totalSampleSize = R1*R2*R3/confparams_cpr->sampleDistance; + for(i=s1+1;i<=e1;i++) + { + for(j=s2+1;j<=e2;j++) + { + for(k=s3+1;k<=e3;k++) + { + if((i+j+k)%confparams_cpr->sampleDistance==0) + { + index = i*r23+j*r3+k; + pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] + - oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1]; + pred_err = fabs(pred_value - oriData[index]); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + return powerOf2; +} + +unsigned int optimize_intervals_double_4D_subblock(double *oriData, double realPrecision, +size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4) +{ + size_t R1 = e1 - s1 + 1; + size_t R2 = e2 - s2 + 1; + size_t R3 = e3 - s3 + 1; + size_t R4 = e4 - s4 + 1; + + size_t r34 = r3*r4; + size_t r234 = r2*r3*r4; + + size_t i,j,k,l, index; + unsigned long radiusIndex; + double pred_value = 0, pred_err; + int *intervals = (int*)malloc(confparams_cpr->maxRangeRadius*sizeof(int)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int)); + size_t totalSampleSize = R1*R2*R3*R4/confparams_cpr->sampleDistance; + for(i=s1+1;i<=e1;i++) + { + for(j=s2+1;j<=e2;j++) + { + for(k=s3+1;k<=e3;k++) + { + for(l=s4+1;l<=e4;l++) + { + if((i+j+k+l)%confparams_cpr->sampleDistance==0) + { + index = i*r234+j*r34+k*r4+l; + pred_value = oriData[index-1] + oriData[index-r4] + oriData[index-r34] + - oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1]; + pred_err = fabs(pred_value - oriData[index]); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + return powerOf2; +} + +TightDataPointStorageD* SZ_compress_double_1D_MDQ_subblock(double *oriData, double realPrecision, double valueRangeSize, double medianValue_d, +size_t r1, size_t s1, size_t e1) +{ + size_t dataLength = e1 - s1 + 1; + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + quantization_intervals = optimize_intervals_double_1D_subblock(oriData, realPrecision, r1, s1, e1); + else + quantization_intervals = exe_params->intvCapacity; + //updateQuantizationInfo(quantization_intervals); + int intvRadius = quantization_intervals/2; + + size_t i; + int reqLength; + double medianValue = medianValue_d; + short radExpo = getExponent_double(valueRangeSize/2); + + computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + double* spaceFillingValue = oriData + s1; // + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + type[0] = 0; + + unsigned char preDataBytes[8]; + longToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + double last3CmprsData[3] = {0}; + + DoubleValueCompressElement *vce = (DoubleValueCompressElement*)malloc(sizeof(DoubleValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + //add the first data + compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + listAdd_double(last3CmprsData, vce->data); + + //add the second data + type[1] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + listAdd_double(last3CmprsData, vce->data); + + int state; + double checkRadius; + double curData; + double pred; + double predAbsErr; + checkRadius = (quantization_intervals-1)*realPrecision; + double interval = 2*realPrecision; + + for(i=2;i=pred) + { + type[i] = intvRadius+state; + pred = pred + state*interval; + } + else //curDatacurBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + + listAdd_double(last3CmprsData, vce->data); + }//end of for + + size_t exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageD* tdps; + + new_TightDataPointStorageD(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0); + + //free memory + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + free(vce); + free(lce); + free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + + +TightDataPointStorageD* SZ_compress_double_2D_MDQ_subblock(double *oriData, double realPrecision, double valueRangeSize, double medianValue_d, +size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2) +{ + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_double_2D_subblock(oriData, realPrecision, r1, r2, s1, s2, e1, e2); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + int intvRadius = quantization_intervals/2; + + size_t i,j; + int reqLength; + double pred1D, pred2D; + double diff = 0.0; + double itvNum = 0; + double *P0, *P1; + + size_t R1 = e1 - s1 + 1; + size_t R2 = e2 - s2 + 1; + size_t dataLength = R1*R2; + + P0 = (double*)malloc(R2*sizeof(double)); + memset(P0, 0, R2*sizeof(double)); + P1 = (double*)malloc(R2*sizeof(double)); + memset(P1, 0, R2*sizeof(double)); + + double medianValue = medianValue_d; + short radExpo = getExponent_double(valueRangeSize/2); + computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + double* spaceFillingValue = oriData; // + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + unsigned char preDataBytes[8]; + longToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + + DoubleValueCompressElement *vce = (DoubleValueCompressElement*)malloc(sizeof(DoubleValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + /* Process Row-s1 data s2*/ + size_t gIndex; + size_t lIndex; + + gIndex = s1*r2+s2; + lIndex = 0; + + type[lIndex] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[0] = vce->data; + + /* Process Row-s1 data s2+1*/ + gIndex = s1*r2+(s2+1); + lIndex = 1; + + pred1D = P1[0]; + diff = spaceFillingValue[gIndex] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P1[1] = pred1D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[1] = vce->data; + } + + /* Process Row-s1 data s2+2 --> data e2 */ + for (j = 2; j < R2; j++) + { + gIndex = s1*r2+(s2+j); + lIndex = j; + + pred1D = 2*P1[j-1] - P1[j-2]; + diff = spaceFillingValue[gIndex] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P1[j] = pred1D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[j] = vce->data; + } + } + + /* Process Row-s1+1 --> Row-e1 */ + for (i = 1; i < R1; i++) + { + /* Process row-s1+i data s2 */ + gIndex = (s1+i)*r2+s2; + lIndex = i*R2; + + pred1D = P1[0]; + diff = spaceFillingValue[gIndex] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P0[0] = pred1D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[0] = vce->data; + } + + /* Process row-s1+i data s2+1 --> e2 */ + for (j = 1; j < R2; j++) + { + gIndex = (s1+i)*r2+(s2+j); + lIndex = i*R2+j; + + pred2D = P0[j-1] + P1[j] - P1[j-1]; + diff = spaceFillingValue[gIndex] - pred2D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P0[j] = pred2D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[j] = vce->data; + } + } + + double *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + + free(P0); + free(P1); + size_t exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageD* tdps; + + new_TightDataPointStorageD(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0); + + //free memory + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + free(vce); + free(lce); + free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +TightDataPointStorageD* SZ_compress_double_3D_MDQ_subblock(double *oriData, double realPrecision, double valueRangeSize, double medianValue_d, +size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, size_t e2, size_t e3) +{ + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_double_3D_subblock(oriData, realPrecision, r1, r2, r3, s1, s2, s3, e1, e2, e3); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + int intvRadius = quantization_intervals/2; + + size_t i,j,k; + int reqLength; + double pred1D, pred2D, pred3D; + double diff = 0.0; + double itvNum = 0; + double *P0, *P1; + + size_t R1 = e1 - s1 + 1; + size_t R2 = e2 - s2 + 1; + size_t R3 = e3 - s3 + 1; + size_t dataLength = R1*R2*R3; + + size_t r23 = r2*r3; + size_t R23 = R2*R3; + + P0 = (double*)malloc(R23*sizeof(double)); + P1 = (double*)malloc(R23*sizeof(double)); + + double medianValue = medianValue_d; + short radExpo = getExponent_double(valueRangeSize/2); + computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + double* spaceFillingValue = oriData; // + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + unsigned char preDataBytes[8]; + longToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + + DoubleValueCompressElement *vce = (DoubleValueCompressElement*)malloc(sizeof(DoubleValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + + /////////////////////////// Process layer-s1 /////////////////////////// + /* Process Row-s2 data s3*/ + size_t gIndex; //global index + size_t lIndex; //local index + size_t index2D; //local 2D index + + gIndex = s1*r23+s2*r3+s3; + lIndex = 0; + index2D = 0; + + type[lIndex] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + + /* Process Row-s2 data s3+1*/ + gIndex = s1*r23+s2*r3+s3+1; + lIndex = 1; + index2D = 1; + + pred1D = P1[index2D-1]; + diff = spaceFillingValue[gIndex] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P1[index2D] = pred1D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + } + + /* Process Row-s2 data s3+2 --> data e3 */ + for (j = 2; j < R3; j++) + { + gIndex = s1*r23+s2*r3+s3+j; + lIndex = j; + index2D = j; + + pred1D = 2*P1[index2D-1] - P1[index2D-2]; + diff = spaceFillingValue[gIndex] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P1[index2D] = pred1D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + } + } + + /* Process Row-s2+1 --> Row-e2 */ + for (i = 1; i < R2; i++) + { + /* Process row-s2+i data s3 */ + gIndex = s1*r23+(s2+i)*r3+s3; + lIndex = i*R3; + index2D = i*R3; + + pred1D = P1[index2D-R3]; + diff = spaceFillingValue[gIndex] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P1[index2D] = pred1D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + } + + /* Process row-s2+i data s3+1 --> data e3*/ + for (j = 1; j < R3; j++) + { + gIndex = s1*r23+(s2+i)*r3+s3+j; + lIndex = i*R3+j; + index2D = i*R3+j; + + pred2D = P1[index2D-1] + P1[index2D-R3] - P1[index2D-R3-1]; + diff = spaceFillingValue[gIndex] - pred2D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P1[index2D] = pred2D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + } + } + } + + + /////////////////////////// Process layer-s1+1 --> layer-e1 /////////////////////////// + + for (k = 1; k < R1; k++) + { + /* Process Row-s2 data s3*/ + gIndex = (s1+k)*r23+s2*r3+s3; + lIndex = k*R23; + index2D = 0; + + pred1D = P1[index2D]; + diff = spaceFillingValue[gIndex] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred1D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + + + /* Process Row-s2 data s3+1 --> data e3 */ + for (j = 1; j < R3; j++) + { + gIndex = (s1+k)*r23+s2*r3+s3+j; + lIndex = k*R23+j; + index2D = j; + + pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1]; + diff = spaceFillingValue[gIndex] - pred2D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred2D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + } + + /* Process Row-s2+1 --> Row-e2 */ + for (i = 1; i < R2; i++) + { + /* Process Row-s2+i data s3 */ + gIndex = (s1+k)*r23+(s2+i)*r3+s3; + lIndex = k*R23+i*R3; + index2D = i*R3; + + pred2D = P0[index2D-R3] + P1[index2D] - P1[index2D-R3]; + diff = spaceFillingValue[gIndex] - pred2D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred2D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + + /* Process Row-s2+i data s3+1 --> data e3 */ + for (j = 1; j < R3; j++) + { + gIndex = (s1+k)*r23+(s2+i)*r3+s3+j; + lIndex = k*R23+i*R3+j; + index2D = i*R3+j; + + pred3D = P0[index2D-1] + P0[index2D-R3]+ P1[index2D] - P0[index2D-R3-1] - P1[index2D-R3] - P1[index2D-1] + P1[index2D-R3-1]; + diff = spaceFillingValue[gIndex] - pred3D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred3D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + } + } + + double *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + + free(P0); + free(P1); + size_t exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageD* tdps; + + new_TightDataPointStorageD(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0); + + //free memory + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + free(vce); + free(lce); + free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +TightDataPointStorageD* SZ_compress_double_4D_MDQ_subblock(double *oriData, double realPrecision, double valueRangeSize, double medianValue_d, +size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4) +{ + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_double_4D_subblock(oriData, realPrecision, r1, r2, r3, r4, s1, s2, s3, s4, e1, e2, e3, e4); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + int intvRadius = quantization_intervals/2; + + size_t i,j,k; + int reqLength; + double pred1D, pred2D, pred3D; + double diff = 0.0; + double itvNum = 0; + double *P0, *P1; + + size_t R1 = e1 - s1 + 1; + size_t R2 = e2 - s2 + 1; + size_t R3 = e3 - s3 + 1; + size_t R4 = e4 - s4 + 1; + + size_t dataLength = R1*R2*R3*R4; + + size_t r34 = r3*r4; + size_t r234 = r2*r3*r4; + size_t R34 = R3*R4; + size_t R234 = R2*R3*R4; + + P0 = (double*)malloc(R34*sizeof(double)); + P1 = (double*)malloc(R34*sizeof(double)); + + double medianValue = medianValue_d; + short radExpo = getExponent_double(valueRangeSize/2); + computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + double* spaceFillingValue = oriData; // + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + unsigned char preDataBytes[8]; + longToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + + DoubleValueCompressElement *vce = (DoubleValueCompressElement*)malloc(sizeof(DoubleValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + size_t l; + for (l = 0; l < R1; l++) + { + + /////////////////////////// Process layer-s2 /////////////////////////// + /* Process Row-s3 data s4*/ + size_t gIndex; //global index + size_t lIndex; //local index + size_t index2D; //local 2D index + + gIndex = (s1+l)*r234+s2*r34+s3*r4+s4; + lIndex = l*R234; + index2D = 0; + + type[lIndex] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + + /* Process Row-s3 data s4+1*/ + gIndex = (s1+l)*r234+s2*r34+s3*r4+s4+1; + lIndex = l*R234+1; + index2D = 1; + + pred1D = P1[index2D-1]; + diff = spaceFillingValue[gIndex] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P1[index2D] = pred1D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + } + + /* Process Row-s3 data s4+2 --> data e4 */ + for (j = 2; j < R4; j++) + { + gIndex = (s1+l)*r234+s2*r34+s3*r4+s4+j; + lIndex = l*R234+j; + index2D = j; + + pred1D = 2*P1[index2D-1] - P1[index2D-2]; + diff = spaceFillingValue[gIndex] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P1[index2D] = pred1D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + } + } + + /* Process Row-s3+1 --> Row-e3 */ + for (i = 1; i < R3; i++) + { + /* Process row-s2+i data s3 */ + gIndex = (s1+l)*r234+s2*r34+(s3+i)*r4+s4; + lIndex = l*R234+i*R4; + index2D = i*R4; + + pred1D = P1[index2D-R4]; + diff = spaceFillingValue[gIndex] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P1[index2D] = pred1D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + } + + /* Process row-s3+i data s4+1 --> data e4*/ + for (j = 1; j < R4; j++) + { + gIndex = (s1+l)*r234+s2*r34+(s3+i)*r4+s4+j; + lIndex = l*R234+i*R4+j; + index2D = i*R4+j; + + pred2D = P1[index2D-1] + P1[index2D-R4] - P1[index2D-R4-1]; + diff = spaceFillingValue[gIndex] - pred2D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P1[index2D] = pred2D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + } + } + } + + + /////////////////////////// Process layer-s2+1 --> layer-e2 /////////////////////////// + + for (k = 1; k < R2; k++) + { + /* Process Row-s3 data s4*/ + gIndex = (s1+l)*r234+(s2+k)*r34+s3*r4+s4; + lIndex = l*R234+k*R34; + index2D = 0; + + pred1D = P1[index2D]; + diff = spaceFillingValue[gIndex] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred1D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + + + /* Process Row-s3 data s4+1 --> data e4 */ + for (j = 1; j < R4; j++) + { + gIndex = (s1+l)*r234+(s2+k)*r34+s3*r4+s4+j; + lIndex = l*R234+k*R34+j; + index2D = j; + + pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1]; + diff = spaceFillingValue[gIndex] - pred2D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred2D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + } + + /* Process Row-s3+1 --> Row-e3 */ + for (i = 1; i < R3; i++) + { + /* Process Row-s3+i data s4 */ + gIndex = (s1+l)*r234+(s2+k)*r34+(s3+i)*r4+s4; + lIndex = l*R234+k*R34+i*R4; + index2D = i*R4; + + pred2D = P0[index2D-R4] + P1[index2D] - P1[index2D-R4]; + diff = spaceFillingValue[gIndex] - pred2D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred2D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + + /* Process Row-s3+i data s4+1 --> data e4 */ + for (j = 1; j < R4; j++) + { + gIndex = (s1+l)*r234+(s2+k)*r34+(s3+i)*r4+s4+j; + lIndex = l*R234+k*R34+i*R4+j; + index2D = i*R4+j; + +// printf ("global index = %d, local index = %d\n", gIndex, lIndex); + + pred3D = P0[index2D-1] + P0[index2D-R4]+ P1[index2D] - P0[index2D-R4-1] - P1[index2D-R4] - P1[index2D-1] + P1[index2D-R4-1]; + diff = spaceFillingValue[gIndex] - pred3D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred3D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + } + } + + double *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + } + + free(P0); + free(P1); + size_t exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageD* tdps; + + new_TightDataPointStorageD(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0); + + //free memory + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + free(vce); + free(lce); + free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +unsigned int optimize_intervals_double_1D_opt_MSST19(double *oriData, size_t dataLength, double realPrecision) +{ + size_t i = 0, radiusIndex; + double pred_value = 0; + double pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = 0;//dataLength/confparams_cpr->sampleDistance; + + double * data_pos = oriData + 2; + double divider = log2(1+realPrecision)*2; + int tempIndex = 0; + while(data_pos - oriData < dataLength){ + if(*data_pos == 0){ + data_pos += confparams_cpr->sampleDistance; + continue; + } + tempIndex++; + totalSampleSize++; + pred_value = data_pos[-1]; + pred_err = fabs((double)*data_pos / pred_value); + radiusIndex = (unsigned long)fabs(log2(pred_err)/divider+0.5); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + + data_pos += confparams_cpr->sampleDistance; + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<64) + powerOf2 = 64; + + free(intervals); + return powerOf2; +} + +unsigned int optimize_intervals_double_2D_opt_MSST19(double *oriData, size_t r1, size_t r2, double realPrecision) +{ + size_t i; + size_t radiusIndex; + double pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = 0; + + size_t offset_count = confparams_cpr->sampleDistance - 1; // count r2 offset + size_t offset_count_2; + double * data_pos = oriData + r2 + offset_count; + double divider = log2(1+realPrecision)*2; + size_t n1_count = 1; // count i sum + size_t len = r1 * r2; + while(data_pos - oriData < len){ + if(*data_pos == 0){ + data_pos += confparams_cpr->sampleDistance; + continue; + } + totalSampleSize++; + pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1]; + pred_err = fabs(pred_value / *data_pos); + radiusIndex = (unsigned long)fabs(log2(pred_err)/divider+0.5); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + + offset_count += confparams_cpr->sampleDistance; + if(offset_count >= r2){ + n1_count ++; + offset_count_2 = n1_count % confparams_cpr->sampleDistance; + data_pos += (r2 + confparams_cpr->sampleDistance - offset_count) + (confparams_cpr->sampleDistance - offset_count_2); + offset_count = (confparams_cpr->sampleDistance - offset_count_2); + if(offset_count == 0) offset_count ++; + } + else data_pos += confparams_cpr->sampleDistance; + } + + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<64) + powerOf2 = 64; + + free(intervals); + return powerOf2; +} + +unsigned int optimize_intervals_double_3D_opt_MSST19(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision) +{ + size_t i; + size_t radiusIndex; + size_t r23=r2*r3; + double pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = 0; + + size_t offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset + size_t offset_count_2; + double * data_pos = oriData + r23 + r3 + offset_count; + double divider = log2(1+realPrecision)*2; + size_t n1_count = 1, n2_count = 1; // count i,j sum + size_t len = r1 * r2 * r3; + while(data_pos - oriData < len){ + if(*data_pos == 0){ + data_pos += confparams_cpr->sampleDistance; + continue; + } + totalSampleSize++; + pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1]; + pred_err = fabs(*data_pos / pred_value); + radiusIndex = fabs(log2(pred_err)/divider+0.5); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + { + radiusIndex = confparams_cpr->maxRangeRadius - 1; + } + intervals[radiusIndex]++; + offset_count += confparams_cpr->sampleDistance; + if(offset_count >= r3){ + n2_count ++; + if(n2_count == r2){ + n1_count ++; + n2_count = 1; + data_pos += r3; + } + offset_count_2 = (n1_count + n2_count) % confparams_cpr->sampleDistance; + data_pos += (r3 + confparams_cpr->sampleDistance - offset_count) + (confparams_cpr->sampleDistance - offset_count_2); + offset_count = (confparams_cpr->sampleDistance - offset_count_2); + if(offset_count == 0) offset_count ++; + } + else data_pos += confparams_cpr->sampleDistance; + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<64) + powerOf2 = 64; + free(intervals); + return powerOf2; +} +unsigned int optimize_intervals_double_3D_opt(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision){ + size_t i; + size_t radiusIndex; + size_t r23=r2*r3; + double pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = 0; + + size_t offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset + size_t offset_count_2; + double * data_pos = oriData + r23 + r3 + offset_count; + size_t n1_count = 1, n2_count = 1; // count i,j sum + size_t len = r1 * r2 * r3; + while(data_pos - oriData < len){ + totalSampleSize++; + pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1]; + pred_err = fabs(pred_value - *data_pos); + radiusIndex = (pred_err/realPrecision+1)/2; + if(radiusIndex>=confparams_cpr->maxRangeRadius) + { + radiusIndex = confparams_cpr->maxRangeRadius - 1; + } + intervals[radiusIndex]++; + offset_count += confparams_cpr->sampleDistance; + if(offset_count >= r3){ + n2_count ++; + if(n2_count == r2){ + n1_count ++; + n2_count = 1; + data_pos += r3; + } + offset_count_2 = (n1_count + n2_count) % confparams_cpr->sampleDistance; + data_pos += (r3 + confparams_cpr->sampleDistance - offset_count) + (confparams_cpr->sampleDistance - offset_count_2); + offset_count = (confparams_cpr->sampleDistance - offset_count_2); + if(offset_count == 0) offset_count ++; + } + else data_pos += confparams_cpr->sampleDistance; + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + free(intervals); + return powerOf2; +} + +size_t SZ_compress_double_3D_MDQ_RA_block(double * block_ori_data, double * mean, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, double realPrecision, double * P0, double * P1, int * type, double * unpredictable_data) +{ + double recip_realPrecision = 1/realPrecision; + size_t dim0_offset = dim_1 * dim_2; + size_t dim1_offset = dim_2; + + mean[0] = block_ori_data[0]; + + size_t unpredictable_count = 0; + size_t r1, r2, r3; + r1 = block_dim_0; + r2 = block_dim_1; + r3 = block_dim_2; + + double * cur_data_pos = block_ori_data; + double curData; + double pred1D, pred2D, pred3D; + double itvNum; + double diff; + size_t i, j, k; + size_t r23 = r2*r3; + // Process Row-0 data 0 + pred1D = mean[0]; + curData = *cur_data_pos; + diff = curData - pred1D; + itvNum = fabs(diff)*recip_realPrecision + 1; + if (itvNum < exe_params->intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[0] = (int) (itvNum/2) + exe_params->intvRadius; + P1[0] = pred1D + 2 * (type[0] - exe_params->intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P1[0])>realPrecision){ + type[0] = 0; + P1[0] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[0] = 0; + P1[0] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + curData = cur_data_pos[1]; + diff = curData - pred1D; + itvNum = fabs(diff)*recip_realPrecision + 1; + if (itvNum < exe_params->intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + exe_params->intvRadius; + P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P1[1])>realPrecision){ + type[1] = 0; + P1[1] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[1] = 0; + P1[1] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + /* Process Row-0 data 2 --> data r3-1 */ + for (j = 2; j < r3; j++){ + pred1D = 2*P1[j-1] - P1[j-2]; + curData = cur_data_pos[j]; + diff = curData - pred1D; + itvNum = fabs(diff)*recip_realPrecision + 1; + if (itvNum < exe_params->intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + exe_params->intvRadius; + P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P1[j])>realPrecision){ + type[j] = 0; + P1[j] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[j] = 0; + P1[j] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + cur_data_pos += dim1_offset; + + /* Process Row-1 --> Row-r2-1 */ + size_t index; + for (i = 1; i < r2; i++) + { + /* Process row-i data 0 */ + index = i*r3; + pred1D = P1[index-r3]; + curData = *cur_data_pos; + diff = curData - pred1D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P1[index])>realPrecision) + { + type[index] = 0; + P1[index] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else + { + type[index] = 0; + P1[index] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + + /* Process row-i data 1 --> data r3-1*/ + for (j = 1; j < r3; j++) + { + index = i*r3+j; + pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1]; + + curData = cur_data_pos[j]; + diff = curData - pred2D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P1[index])>realPrecision) + { + type[index] = 0; + P1[index] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else + { + type[index] = 0; + P1[index] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + cur_data_pos += dim1_offset; + } + cur_data_pos += dim0_offset - r2 * dim1_offset; + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (k = 1; k < r1; k++) + { + /* Process Row-0 data 0*/ + index = k*r23; + pred1D = P1[0]; + curData = *cur_data_pos; + diff = curData - pred1D; + itvNum = fabs(diff)*recip_realPrecision + 1; + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P0[0])>realPrecision) + { + type[index] = 0; + P0[0] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else + { + type[index] = 0; + P0[0] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + /* Process Row-0 data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + //index = k*r2*r3+j; + index ++; + pred2D = P0[j-1] + P1[j] - P1[j-1]; + curData = cur_data_pos[j]; + diff = curData - pred2D; + itvNum = fabs(diff)*recip_realPrecision + 1; + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P0[j])>realPrecision) + { + type[index] = 0; + P0[j] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else + { + type[index] = 0; + P0[j] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + + cur_data_pos += dim1_offset; + /* Process Row-1 --> Row-r2-1 */ + size_t index2D; + for (i = 1; i < r2; i++) + { + /* Process Row-i data 0 */ + index = k*r23 + i*r3; + index2D = i*r3; + pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3]; + curData = *cur_data_pos; + diff = curData - pred2D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P0[index2D])>realPrecision) + { + type[index] = 0; + P0[index2D] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else + { + type[index] = 0; + P0[index2D] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + + /* Process Row-i data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + //index = k*r2*r3 + i*r3 + j; + index ++; + index2D = i*r3 + j; + pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1]; + curData = cur_data_pos[j]; + diff = curData - pred3D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P0[index2D])>realPrecision) + { + type[index] = 0; + P0[index2D] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else + { + type[index] = 0; + P0[index2D] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + cur_data_pos += dim1_offset; + } + cur_data_pos += dim0_offset - r2 * dim1_offset; + double *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + + return unpredictable_count; +} + +unsigned int optimize_intervals_double_2D_opt(double *oriData, size_t r1, size_t r2, double realPrecision) +{ + size_t i; + size_t radiusIndex; + double pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = 0; + + size_t offset_count = confparams_cpr->sampleDistance - 1; // count r2 offset + size_t offset_count_2; + double * data_pos = oriData + r2 + offset_count; + size_t n1_count = 1; // count i sum + size_t len = r1 * r2; + while(data_pos - oriData < len){ + totalSampleSize++; + pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1]; + pred_err = fabs(pred_value - *data_pos); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + + offset_count += confparams_cpr->sampleDistance; + if(offset_count >= r2){ + n1_count ++; + offset_count_2 = n1_count % confparams_cpr->sampleDistance; + data_pos += (r2 + confparams_cpr->sampleDistance - offset_count) + (confparams_cpr->sampleDistance - offset_count_2); + offset_count = (confparams_cpr->sampleDistance - offset_count_2); + if(offset_count == 0) offset_count ++; + } + else data_pos += confparams_cpr->sampleDistance; + } + + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + return powerOf2; +} + +unsigned int optimize_intervals_double_1D_opt(double *oriData, size_t dataLength, double realPrecision) +{ + size_t i = 0, radiusIndex; + double pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = 0; + + double * data_pos = oriData + 2; + while(data_pos - oriData < dataLength){ + totalSampleSize++; + pred_value = data_pos[-1]; + pred_err = fabs(pred_value - *data_pos); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + + data_pos += confparams_cpr->sampleDistance; + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + return powerOf2; +} + +/*The above code is for sz 1.4.13; the following code is for sz 2.0*/ +unsigned int optimize_intervals_double_2D_with_freq_and_dense_pos(double *oriData, size_t r1, size_t r2, double realPrecision, double * dense_pos, double * max_freq, double * mean_freq) +{ + double mean = 0.0; + size_t len = r1 * r2; + size_t mean_distance = (int) (sqrt(len)); + + double * data_pos = oriData; + size_t mean_count = 0; + while(data_pos - oriData < len){ + mean += *data_pos; + mean_count ++; + data_pos += mean_distance; + } + if(mean_count > 0) mean /= mean_count; + size_t range = 8192; + size_t radius = 4096; + size_t * freq_intervals = (size_t *) malloc(range*sizeof(size_t)); + memset(freq_intervals, 0, range*sizeof(size_t)); + + unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius; + int sampleDistance = confparams_cpr->sampleDistance; + double predThreshold = confparams_cpr->predThreshold; + + size_t i; + size_t radiusIndex; + double pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, maxRangeRadius*sizeof(size_t)); + + double mean_diff; + ptrdiff_t freq_index; + size_t freq_count = 0; + size_t n1_count = 1; + size_t offset_count = sampleDistance - 1; + size_t offset_count_2 = 0; + size_t sample_count = 0; + data_pos = oriData + r2 + offset_count; + while(data_pos - oriData < len){ + pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1]; + pred_err = fabs(pred_value - *data_pos); + if(pred_err < realPrecision) freq_count ++; + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=maxRangeRadius) + radiusIndex = maxRangeRadius - 1; + intervals[radiusIndex]++; + + mean_diff = *data_pos - mean; + if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius; + else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius; + if(freq_index <= 0){ + freq_intervals[0] ++; + } + else if(freq_index >= range){ + freq_intervals[range - 1] ++; + } + else{ + freq_intervals[freq_index] ++; + } + offset_count += sampleDistance; + if(offset_count >= r2){ + n1_count ++; + offset_count_2 = n1_count % sampleDistance; + data_pos += (r2 + sampleDistance - offset_count) + (sampleDistance - offset_count_2); + offset_count = (sampleDistance - offset_count_2); + if(offset_count == 0) offset_count ++; + } + else data_pos += sampleDistance; + sample_count ++; + } + *max_freq = freq_count * 1.0/ sample_count; + + //compute the appropriate number + size_t targetCount = sample_count*predThreshold; + size_t sum = 0; + for(i=0;itargetCount) + break; + } + if(i>=maxRangeRadius) + i = maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + // collect frequency + size_t max_sum = 0; + size_t max_index = 0; + size_t tmp_sum; + size_t * freq_pos = freq_intervals + 1; + for(size_t i=1; i max_sum){ + max_sum = tmp_sum; + max_index = i; + } + freq_pos ++; + } + *dense_pos = mean + realPrecision * (ptrdiff_t)(max_index + 1 - radius); + *mean_freq = max_sum * 1.0 / sample_count; + + free(freq_intervals); + free(intervals); + return powerOf2; +} + +#define MIN(a, b) aoptQuantMode==1) + { + quantization_intervals = optimize_intervals_double_2D_with_freq_and_dense_pos(oriData, r1, r2, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq); + if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1; + updateQuantizationInfo(quantization_intervals); + } + else{ + quantization_intervals = exe_params->intvCapacity; + } + + // calculate block dims + size_t num_x, num_y; + size_t block_size = 16; + + SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r1, num_x, block_size); + SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r2, num_y, block_size); + + size_t split_index_x, split_index_y; + size_t early_blockcount_x, early_blockcount_y; + size_t late_blockcount_x, late_blockcount_y; + SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); + SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); + + size_t max_num_block_elements = early_blockcount_x * early_blockcount_y; + size_t num_blocks = num_x * num_y; + size_t num_elements = r1 * r2; + + size_t dim0_offset = r2; + + int * result_type = (int *) malloc(num_elements * sizeof(int)); + size_t unpred_data_max_size = max_num_block_elements; + double * result_unpredictable_data = (double *) malloc(unpred_data_max_size * sizeof(double) * num_blocks); + size_t total_unpred = 0; + size_t unpredictable_count; + double * data_pos = oriData; + int * type = result_type; + size_t offset_x, offset_y; + size_t current_blockcount_x, current_blockcount_y; + + double * reg_params = (double *) malloc(num_blocks * 4 * sizeof(double)); + double * reg_params_pos = reg_params; + // move regression part out + size_t params_offset_b = num_blocks; + size_t params_offset_c = 2*num_blocks; + for(size_t i=0; i 0) mean = sum / mean_count; + } + + // use two prediction buffers for higher performance + double * unpredictable_data = result_unpredictable_data; + unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char)); + memset(indicator, 0, num_blocks * sizeof(unsigned char)); + size_t reg_count = 0; + size_t strip_dim_0 = early_blockcount_x + 1; + size_t strip_dim_1 = r2 + 1; + size_t strip_dim0_offset = strip_dim_1; + unsigned char * indicator_pos = indicator; + size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(double); + double * prediction_buffer_1 = (double *) malloc(prediction_buffer_size); + memset(prediction_buffer_1, 0, prediction_buffer_size); + double * prediction_buffer_2 = (double *) malloc(prediction_buffer_size); + memset(prediction_buffer_2, 0, prediction_buffer_size); + double * cur_pb_buf = prediction_buffer_1; + double * next_pb_buf = prediction_buffer_2; + double * cur_pb_buf_pos; + double * next_pb_buf_pos; + int intvCapacity = quantization_intervals; //exe_params->intvCapacity; + int intvRadius = intvCapacity/2; //exe_params->intvRadius; + int use_reg = 0; + + reg_params_pos = reg_params; + // compress the regression coefficients on the fly + double last_coeffcients[3] = {0.0}; + int coeff_intvCapacity_sz = 65536; + int coeff_intvRadius = coeff_intvCapacity_sz / 2; + int * coeff_type[3]; + int * coeff_result_type = (int *) malloc(num_blocks*3*sizeof(int)); + double * coeff_unpred_data[3]; + double * coeff_unpredictable_data = (double *) malloc(num_blocks*3*sizeof(double)); + double precision[3], recip_precision[3]; + precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c; + recip_precision[0] = 1/precision_a, recip_precision[1] = 1/precision_b, recip_precision[2] = 1/precision_c; + for(int i=0; i<3; i++){ + coeff_type[i] = coeff_result_type + i * num_blocks; + coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks; + } + int coeff_index = 0; + unsigned int coeff_unpredictable_count[3] = {0}; + double noise = realPrecision * 0.81; + if(use_mean){ + type = result_type; + int intvCapacity_sz = intvCapacity - 2; + for(size_t i=0; iprecision[e]){ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + else{ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + coeff_index ++; + } + double curData; + double pred; + double itvNum; + double diff; + size_t index = 0; + size_t block_unpredictable_count = 0; + double * cur_data_pos = data_pos; + for(size_t ii=0; iirealPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + index ++; + cur_data_pos ++; + } + /*dealing with the last jj (boundary)*/ + { + size_t jj = current_blockcount_y - 1; + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; + diff = curData - pred; + itvNum = fabs(diff)*recip_realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj] = pred; + index ++; + cur_data_pos ++; + } + cur_data_pos += dim0_offset - current_blockcount_y; + } + /*dealing with the last ii (boundary)*/ + { + size_t ii = current_blockcount_x - 1; + for(size_t jj=0; jjrealPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + // assign value to next prediction buffer + next_pb_pos[jj] = pred; + index ++; + cur_data_pos ++; + } + /*dealing with the last jj (boundary)*/ + { + size_t jj = current_blockcount_y - 1; + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; + diff = curData - pred; + itvNum = fabs(diff)*recip_realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj] = pred; + // assign value to next prediction buffer + next_pb_pos[jj] = pred; + + index ++; + cur_data_pos ++; + } + } // end ii == -1 + unpredictable_count = block_unpredictable_count; + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + reg_count ++; + }// end use_reg + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + double * cur_pb_pos = pb_pos; + double * cur_data_pos = data_pos; + double curData; + double pred2D; + double itvNum, diff; + size_t index = 0; + for(size_t ii=0; iirealPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + cur_pb_pos += strip_dim0_offset - current_blockcount_y; + cur_data_pos += dim0_offset - current_blockcount_y; + } + /*dealing with the last ii (boundary)*/ + { + // ii == current_blockcount_x - 1 + for(size_t jj=0; jjrealPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + next_pb_pos[jj] = *cur_pb_pos; + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + } + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + // change indicator + indicator_pos[j] = 1; + }// end SZ + reg_params_pos ++; + data_pos += current_blockcount_y; + pb_pos += current_blockcount_y; + next_pb_pos += current_blockcount_y; + type += current_blockcount_x * current_blockcount_y; + }// end j + indicator_pos += num_y; + double * tmp; + tmp = cur_pb_buf; + cur_pb_buf = next_pb_buf; + next_pb_buf = tmp; + }// end i + }// end use mean + else{ + type = result_type; + int intvCapacity_sz = intvCapacity - 2; + for(size_t i=0; iprecision[e]){ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + else{ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + coeff_index ++; + } + double curData; + double pred; + double itvNum; + double diff; + size_t index = 0; + size_t block_unpredictable_count = 0; + double * cur_data_pos = data_pos; + for(size_t ii=0; iirealPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + index ++; + cur_data_pos ++; + } + /*dealing with the last jj (boundary)*/ + { + // jj == current_blockcount_y - 1 + size_t jj = current_blockcount_y - 1; + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; + diff = curData - pred; + itvNum = fabs(diff)*recip_realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj] = pred; + index ++; + cur_data_pos ++; + } + cur_data_pos += dim0_offset - current_blockcount_y; + } + /*dealing with the last ii (boundary)*/ + { + size_t ii = current_blockcount_x - 1; + for(size_t jj=0; jjrealPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + // assign value to next prediction buffer + next_pb_pos[jj] = pred; + index ++; + cur_data_pos ++; + } + /*dealing with the last jj (boundary)*/ + { + // jj == current_blockcount_y - 1 + size_t jj = current_blockcount_y - 1; + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; + diff = curData - pred; + itvNum = fabs(diff)*recip_realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj] = pred; + // assign value to next prediction buffer + next_pb_pos[jj] = pred; + + index ++; + cur_data_pos ++; + } + } // end ii == -1 + unpredictable_count = block_unpredictable_count; + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + reg_count ++; + }// end use_reg + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + double * cur_pb_pos = pb_pos; + double * cur_data_pos = data_pos; + double curData; + double pred2D; + double itvNum, diff; + size_t index = 0; + for(size_t ii=0; iirealPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + cur_pb_pos += strip_dim0_offset - current_blockcount_y; + cur_data_pos += dim0_offset - current_blockcount_y; + } + /*dealing with the last ii (boundary)*/ + { + // ii == current_blockcount_x - 1 + for(size_t jj=0; jjrealPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + next_pb_pos[jj] = *cur_pb_pos; + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + } + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + // change indicator + indicator_pos[j] = 1; + }// end SZ + reg_params_pos ++; + data_pos += current_blockcount_y; + pb_pos += current_blockcount_y; + next_pb_pos += current_blockcount_y; + type += current_blockcount_x * current_blockcount_y; + }// end j + indicator_pos += num_y; + double * tmp; + tmp = cur_pb_buf; + cur_pb_buf = next_pb_buf; + next_pb_buf = tmp; + }// end i + } + free(prediction_buffer_1); + free(prediction_buffer_2); + + int stateNum = 2*quantization_intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + size_t nodeCount = 0; + size_t i = 0; + init(huffmanTree, result_type, num_elements); + for (i = 0; i < stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + + unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength_double; + // total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements + unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + 5*treeByteSize + 3*num_blocks*sizeof(int) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(double) + total_unpred * sizeof(double) + num_elements * sizeof(int), 1); + unsigned char * result_pos = result; + initRandomAccessBytes(result_pos); + result_pos += meta_data_offset; + + sizeToBytes(result_pos, num_elements); + result_pos += exe_params->SZ_SIZE_TYPE; + + intToBytes_bigEndian(result_pos, block_size); + result_pos += sizeof(int); + doubleToBytes(result_pos, realPrecision); + result_pos += sizeof(double); + intToBytes_bigEndian(result_pos, quantization_intervals); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + + memcpy(result_pos, &use_mean, sizeof(unsigned char)); + result_pos += sizeof(unsigned char); + memcpy(result_pos, &mean, sizeof(double)); + result_pos += sizeof(double); + + size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos); + result_pos += indicator_size; + + //convert the lead/mid/resi to byte stream + if(reg_count>0){ + for(int e=0; e<3; e++){ + int stateNum = 2*coeff_intvCapacity_sz; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + size_t nodeCount = 0; + init(huffmanTree, coeff_type[e], reg_count); + size_t i = 0; + for (i = 0; i < huffmanTree->stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + doubleToBytes(result_pos, precision[e]); + result_pos += sizeof(double); + intToBytes_bigEndian(result_pos, coeff_intvRadius); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + size_t typeArray_size = 0; + encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size); + sizeToBytes(result_pos, typeArray_size); + result_pos += sizeof(size_t) + typeArray_size; + intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]); + result_pos += sizeof(int); + memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(double)); + result_pos += coeff_unpredictable_count[e]*sizeof(double); + SZ_ReleaseHuffman(huffmanTree); + } + } + free(coeff_result_type); + free(coeff_unpredictable_data); + + //record the number of unpredictable data and also store them + memcpy(result_pos, &total_unpred, sizeof(size_t)); + result_pos += sizeof(size_t); + memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(double)); + result_pos += total_unpred * sizeof(double); + size_t typeArray_size = 0; + encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size); + result_pos += typeArray_size; +#ifdef HAVE_WRITESTATS + writeHuffmanInfo(treeByteSize, typeArray_size, num_elements*sizeof(float), nodeCount); + writeBlockInfo(use_mean, block_size, reg_count, num_blocks); + writeUnpredictDataCounts(total_unpred, num_elements); +#endif + + size_t totalEncodeSize = result_pos - result; + free(indicator); + free(result_unpredictable_data); + free(result_type); + free(reg_params); + + SZ_ReleaseHuffman(huffmanTree); + *comp_size = totalEncodeSize; + + + + return result; +} +unsigned int optimize_intervals_double_3D_with_freq_and_dense_pos(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, double * dense_pos, double * max_freq, double * mean_freq) +{ + double mean = 0.0; + size_t len = r1 * r2 * r3; + size_t mean_distance = (int) (sqrt(len)); + double * data_pos = oriData; + size_t offset_count = 0; + size_t offset_count_2 = 0; + size_t mean_count = 0; + while(data_pos - oriData < len){ + mean += *data_pos; + mean_count ++; + data_pos += mean_distance; + offset_count += mean_distance; + offset_count_2 += mean_distance; + if(offset_count >= r3){ + offset_count = 0; + data_pos -= 1; + } + if(offset_count_2 >= r2 * r3){ + offset_count_2 = 0; + data_pos -= 1; + } + } + if(mean_count > 0) mean /= mean_count; + size_t range = 8192; + size_t radius = 4096; + size_t * freq_intervals = (size_t *) malloc(range*sizeof(size_t)); + memset(freq_intervals, 0, range*sizeof(size_t)); + + unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius; + int sampleDistance = confparams_cpr->sampleDistance; + double predThreshold = confparams_cpr->predThreshold; + + size_t i; + size_t radiusIndex; + size_t r23=r2*r3; + double pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, maxRangeRadius*sizeof(size_t)); + + double mean_diff; + ptrdiff_t freq_index; + size_t freq_count = 0; + size_t sample_count = 0; + + offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset + data_pos = oriData + r23 + r3 + offset_count; + size_t n1_count = 1, n2_count = 1; // count i,j sum + + while(data_pos - oriData < len){ + + pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1]; + pred_err = fabs(pred_value - *data_pos); + if(pred_err < realPrecision) freq_count ++; + radiusIndex = (pred_err/realPrecision+1)/2; + if(radiusIndex>=maxRangeRadius) + { + radiusIndex = maxRangeRadius - 1; + } + intervals[radiusIndex]++; + + mean_diff = *data_pos - mean; + if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius; + else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius; + if(freq_index <= 0){ + freq_intervals[0] ++; + } + else if(freq_index >= range){ + freq_intervals[range - 1] ++; + } + else{ + freq_intervals[freq_index] ++; + } + offset_count += sampleDistance; + if(offset_count >= r3){ + n2_count ++; + if(n2_count == r2){ + n1_count ++; + n2_count = 1; + data_pos += r3; + } + offset_count_2 = (n1_count + n2_count) % sampleDistance; + data_pos += (r3 + sampleDistance - offset_count) + (sampleDistance - offset_count_2); + offset_count = (sampleDistance - offset_count_2); + if(offset_count == 0) offset_count ++; + } + else data_pos += sampleDistance; + sample_count ++; + } + *max_freq = freq_count * 1.0/ sample_count; + + //compute the appropriate number + size_t targetCount = sample_count*predThreshold; + size_t sum = 0; + for(i=0;itargetCount) + break; + } + if(i>=maxRangeRadius) + i = maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + // collect frequency + size_t max_sum = 0; + size_t max_index = 0; + size_t tmp_sum; + size_t * freq_pos = freq_intervals + 1; + for(size_t i=1; i max_sum){ + max_sum = tmp_sum; + max_index = i; + } + freq_pos ++; + } + *dense_pos = mean + realPrecision * (ptrdiff_t)(max_index + 1 - radius); + *mean_freq = max_sum * 1.0 / sample_count; + + free(freq_intervals); + free(intervals); + return powerOf2; +} + + + +unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size){ + +#ifdef HAVE_TIMECMPR + double* decData = NULL; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData = (double*)(multisteps->hist_data); +#endif + + double recip_realPrecision = 1/realPrecision; + //printf("recip_realPrecision = %.20G\n", recip_realPrecision); + + unsigned int quantization_intervals; + double sz_sample_correct_freq = -1;//0.5; //-1 + double dense_pos; + double mean_flush_freq; + unsigned char use_mean = 0; + + // calculate block dims + size_t num_x, num_y, num_z; + size_t block_size = 6; + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size); + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size); + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r3, num_z, block_size); + + size_t split_index_x, split_index_y, split_index_z; + size_t early_blockcount_x, early_blockcount_y, early_blockcount_z; + size_t late_blockcount_x, late_blockcount_y, late_blockcount_z; + SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); + SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); + SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z); + + size_t max_num_block_elements = early_blockcount_x * early_blockcount_y * early_blockcount_z; + size_t num_blocks = num_x * num_y * num_z; + size_t num_elements = r1 * r2 * r3; + + size_t dim0_offset = r2 * r3; + size_t dim1_offset = r3; + + int * result_type = (int *) malloc(num_elements * sizeof(int)); + memset(result_type, 0, num_elements*sizeof(int)); + size_t unpred_data_max_size = max_num_block_elements; + double * result_unpredictable_data = (double *) malloc(unpred_data_max_size * sizeof(double) * num_blocks); + size_t total_unpred = 0; + size_t unpredictable_count; + size_t max_unpred_count = 0; + double * data_pos = oriData; + int * type = result_type; + size_t type_offset; + size_t offset_x, offset_y, offset_z; + size_t current_blockcount_x, current_blockcount_y, current_blockcount_z; + + double * reg_params = (double *) malloc(num_blocks * 4 * sizeof(double)); + double * reg_params_pos = reg_params; + // move regression part out + size_t params_offset_b = num_blocks; + size_t params_offset_c = 2*num_blocks; + size_t params_offset_d = 3*num_blocks; + for(size_t i=0; ioptQuantMode==1) + { + quantization_intervals = optimize_intervals_double_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq); + if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1; + updateQuantizationInfo(quantization_intervals); + } + else{ + quantization_intervals = exe_params->intvCapacity; + } + + double mean = 0; + if(use_mean){ + // compute mean + double sum = 0.0; + size_t mean_count = 0; + for(size_t i=0; i 0) mean = sum / mean_count; + } + + // use two prediction buffers for higher performance + double * unpredictable_data = result_unpredictable_data; + unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char)); + memset(indicator, 0, num_blocks * sizeof(unsigned char)); + size_t reg_count = 0; + size_t strip_dim_0 = early_blockcount_x + 1; + size_t strip_dim_1 = r2 + 1; + size_t strip_dim_2 = r3 + 1; + size_t strip_dim0_offset = strip_dim_1 * strip_dim_2; + size_t strip_dim1_offset = strip_dim_2; + unsigned char * indicator_pos = indicator; + + size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(double); + double * prediction_buffer_1 = (double *) malloc(prediction_buffer_size); + memset(prediction_buffer_1, 0, prediction_buffer_size); + double * prediction_buffer_2 = (double *) malloc(prediction_buffer_size); + memset(prediction_buffer_2, 0, prediction_buffer_size); + double * cur_pb_buf = prediction_buffer_1; + double * next_pb_buf = prediction_buffer_2; + double * cur_pb_buf_pos; + double * next_pb_buf_pos; + int intvCapacity = quantization_intervals;// exe_params->intvCapacity; + int intvRadius = intvCapacity/2; //exe_params->intvRadius; + int use_reg = 0; + double noise = realPrecision * 1.22; + + reg_params_pos = reg_params; + // compress the regression coefficients on the fly + double last_coeffcients[4] = {0.0}; + int coeff_intvCapacity_sz = 65536; + int coeff_intvRadius = coeff_intvCapacity_sz / 2; + int * coeff_type[4]; + int * coeff_result_type = (int *) malloc(num_blocks*4*sizeof(int)); + double * coeff_unpred_data[4]; + double * coeff_unpredictable_data = (double *) malloc(num_blocks*4*sizeof(double)); + double precision[4], recip_precision[4]; + precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d; + recip_precision[0] = 1/precision_a, recip_precision[1] = 1/precision_b, recip_precision[2] = 1/precision_c, recip_precision[3] = 1/precision_d; + + for(int i=0; i<4; i++){ + coeff_type[i] = coeff_result_type + i * num_blocks; + coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks; + } + int coeff_index = 0; + unsigned int coeff_unpredictable_count[4] = {0}; + + if(use_mean){ + int intvCapacity_sz = intvCapacity - 2; + for(size_t i=0; iprecision[e]){ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + else{ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + coeff_index ++; + } + double curData; + double pred; + double itvNum; + double diff; + size_t index = 0; + size_t block_unpredictable_count = 0; + double * cur_data_pos = data_pos; + for(size_t ii=0; iirealPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + +#ifdef HAVE_TIMECMPR + size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[block_offset + point_offset] = pred; +#endif + + if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; + } + index ++; + cur_data_pos ++; + } + cur_data_pos += dim1_offset - current_blockcount_z; + } + cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + /*dealing with the last ii (boundary)*/ + { + // ii == current_blockcount_x - 1 + size_t ii = current_blockcount_x - 1; + for(size_t jj=0; jjrealPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + +#ifdef HAVE_TIMECMPR + size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[block_offset + point_offset] = pred; +#endif + + if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; + } + // assign value to next prediction buffer + next_pb_pos[jj * strip_dim1_offset + kk] = pred; + index ++; + cur_data_pos ++; + } + cur_data_pos += dim1_offset - current_blockcount_z; + } + } + unpredictable_count = block_unpredictable_count; + strip_unpredictable_count += unpredictable_count; + unpredictable_data += unpredictable_count; + + reg_count ++; + } + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + double * cur_pb_pos = pb_pos; + double * cur_data_pos = data_pos; + double curData; + double pred3D; + double itvNum, diff; + size_t index = 0; + for(size_t ii=0; iirealPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + +#ifdef HAVE_TIMECMPR + size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[block_offset + point_offset] = *cur_pb_pos; +#endif + + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + cur_pb_pos += strip_dim1_offset - current_blockcount_z; + cur_data_pos += dim1_offset - current_blockcount_z; + } + cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset; + cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + /*dealing with the last ii (boundary)*/ + { + // ii == current_blockcount_x - 1 + for(size_t jj=0; jjrealPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + +#ifdef HAVE_TIMECMPR + size_t ii = current_blockcount_x - 1; + size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[block_offset + point_offset] = *cur_pb_pos; +#endif + + next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos; + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + cur_pb_pos += strip_dim1_offset - current_blockcount_z; + cur_data_pos += dim1_offset - current_blockcount_z; + } + } + strip_unpredictable_count += unpredictable_count; + unpredictable_data += unpredictable_count; + // change indicator + indicator_pos[k] = 1; + }// end SZ + + reg_params_pos ++; + data_pos += current_blockcount_z; + pb_pos += current_blockcount_z; + next_pb_pos += current_blockcount_z; + type += current_blockcount_x * current_blockcount_y * current_blockcount_z; + + } // end k + + if(strip_unpredictable_count > max_unpred_count){ + max_unpred_count = strip_unpredictable_count; + } + total_unpred += strip_unpredictable_count; + indicator_pos += num_z; + }// end j + double * tmp; + tmp = cur_pb_buf; + cur_pb_buf = next_pb_buf; + next_pb_buf = tmp; + }// end i + } + else{ + int intvCapacity_sz = intvCapacity - 2; + for(size_t i=0; iprecision[e]){ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + else{ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + coeff_index ++; + } + double curData; + double pred; + double itvNum; + double diff; + size_t index = 0; + size_t block_unpredictable_count = 0; + double * cur_data_pos = data_pos; + for(size_t ii=0; iirealPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + +#ifdef HAVE_TIMECMPR + size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[block_offset + point_offset] = pred; +#endif + + if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; + } + index ++; + cur_data_pos ++; + } + cur_data_pos += dim1_offset - current_blockcount_z; + } + cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + /*dealing with the last ii (boundary)*/ + { + // ii == current_blockcount_x - 1 + size_t ii = current_blockcount_x - 1; + for(size_t jj=0; jjrealPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + +#ifdef HAVE_TIMECMPR + size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[block_offset + point_offset] = pred; +#endif + + if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; + } + // assign value to next prediction buffer + next_pb_pos[jj * strip_dim1_offset + kk] = pred; + index ++; + cur_data_pos ++; + } + cur_data_pos += dim1_offset - current_blockcount_z; + } + } + unpredictable_count = block_unpredictable_count; + strip_unpredictable_count += unpredictable_count; + unpredictable_data += unpredictable_count; + reg_count ++; + } + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + double * cur_pb_pos = pb_pos; + double * cur_data_pos = data_pos; + double curData; + double pred3D; + double itvNum, diff; + size_t index = 0; + for(size_t ii=0; iirealPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + +#ifdef HAVE_TIMECMPR + size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[block_offset + point_offset] = *cur_pb_pos; +#endif + + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + cur_pb_pos += strip_dim1_offset - current_blockcount_z; + cur_data_pos += dim1_offset - current_blockcount_z; + } + cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset; + cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + /*dealing with the last ii (boundary)*/ + { + // ii == current_blockcount_x - 1 + for(size_t jj=0; jjrealPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + +#ifdef HAVE_TIMECMPR + size_t ii = current_blockcount_x - 1; + size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[block_offset + point_offset] = *cur_pb_pos; +#endif + + // assign value to next prediction buffer + next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos; + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + cur_pb_pos += strip_dim1_offset - current_blockcount_z; + cur_data_pos += dim1_offset - current_blockcount_z; + } + } + strip_unpredictable_count += unpredictable_count; + unpredictable_data += unpredictable_count; + // change indicator + indicator_pos[k] = 1; + }// end SZ + + reg_params_pos ++; + data_pos += current_blockcount_z; + pb_pos += current_blockcount_z; + next_pb_pos += current_blockcount_z; + type += current_blockcount_x * current_blockcount_y * current_blockcount_z; + + } + + if(strip_unpredictable_count > max_unpred_count){ + max_unpred_count = strip_unpredictable_count; + } + total_unpred += strip_unpredictable_count; + indicator_pos += num_z; + } + double * tmp; + tmp = cur_pb_buf; + cur_pb_buf = next_pb_buf; + next_pb_buf = tmp; + } + } + + free(prediction_buffer_1); + free(prediction_buffer_2); + + int stateNum = 2*quantization_intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + size_t nodeCount = 0; + init(huffmanTree, result_type, num_elements); + size_t i = 0; + for (i = 0; i < huffmanTree->stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + + unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength_double; + // total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements + unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + 5*treeByteSize + 4*num_blocks*sizeof(int)+ num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(double) + total_unpred * sizeof(double) + num_elements * sizeof(int), 1); + unsigned char * result_pos = result; + initRandomAccessBytes(result_pos); + + result_pos += meta_data_offset; + + sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8 + result_pos += exe_params->SZ_SIZE_TYPE; + + intToBytes_bigEndian(result_pos, block_size); + result_pos += sizeof(int); + doubleToBytes(result_pos, realPrecision); + result_pos += sizeof(double); + intToBytes_bigEndian(result_pos, quantization_intervals); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + + memcpy(result_pos, &use_mean, sizeof(unsigned char)); + result_pos += sizeof(unsigned char); + memcpy(result_pos, &mean, sizeof(double)); + result_pos += sizeof(double); + size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos); + result_pos += indicator_size; + + //convert the lead/mid/resi to byte stream + if(reg_count > 0){ + for(int e=0; e<4; e++){ + int stateNum = 2*coeff_intvCapacity_sz; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + size_t nodeCount = 0; + init(huffmanTree, coeff_type[e], reg_count); + size_t i = 0; + for (i = 0; i < huffmanTree->stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + doubleToBytes(result_pos, precision[e]); + result_pos += sizeof(double); + intToBytes_bigEndian(result_pos, coeff_intvRadius); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + size_t typeArray_size = 0; + encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size); + sizeToBytes(result_pos, typeArray_size); + result_pos += sizeof(size_t) + typeArray_size; + intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]); + result_pos += sizeof(int); + memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(double)); + result_pos += coeff_unpredictable_count[e]*sizeof(double); + SZ_ReleaseHuffman(huffmanTree); + } + } + free(coeff_result_type); + free(coeff_unpredictable_data); + + //record the number of unpredictable data and also store them + memcpy(result_pos, &total_unpred, sizeof(size_t)); + result_pos += sizeof(size_t); + memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(double)); + result_pos += total_unpred * sizeof(double); + size_t typeArray_size = 0; + encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size); + result_pos += typeArray_size; + size_t totalEncodeSize = result_pos - result; + free(indicator); + free(result_unpredictable_data); + free(result_type); + free(reg_params); + +#ifdef HAVE_WRITESTATS + writeHuffmanInfo(treeByteSize, typeArray_size, num_elements*sizeof(float), nodeCount); + writeBlockInfo(use_mean, block_size, reg_count, num_blocks); + writeUnpredictDataCounts(total_unpred, num_elements); +#endif + + SZ_ReleaseHuffman(huffmanTree); + *comp_size = totalEncodeSize; + return result; +} diff --git a/deps/SZ/sz/src/sz_double_pwr.c b/deps/SZ/sz/src/sz_double_pwr.c new file mode 100644 index 0000000000000000000000000000000000000000..dc037db7fbb2280df74c77aa324e91d42259d4d3 --- /dev/null +++ b/deps/SZ/sz/src/sz_double_pwr.c @@ -0,0 +1,2067 @@ +/** + * @file sz_double_pwr.c + * @author Sheng Di, Dingwen Tao, Xin Liang, Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang + * @date Aug, 2016 + * @brief SZ_Init, Compression and Decompression functions + * This file contains the compression/decompression functions related to point-wise relative errors + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + + +#include +#include +#include +#include +#include +#include "sz.h" +#include "CompressElement.h" +#include "DynamicByteArray.h" +#include "DynamicIntArray.h" +#include "TightDataPointStorageD.h" +#include "sz_double.h" +#include "sz_double_pwr.h" +#include "zlib.h" +#include "rw.h" +#include "utility.h" + +void compute_segment_precisions_double_1D(double *oriData, size_t dataLength, double* pwrErrBound, unsigned char* pwrErrBoundBytes, double globalPrecision) +{ + size_t i = 0, j = 0, k = 0; + double realPrecision = oriData[0]!=0?fabs(confparams_cpr->pw_relBoundRatio*oriData[0]):confparams_cpr->pw_relBoundRatio; + double approxPrecision; + unsigned char realPrecBytes[8]; + double curPrecision; + double curValue; + double sum = 0; + for(i=0;isegment_size==0&&i>0) + { + //get two first bytes of the realPrecision + if(confparams_cpr->pwr_type==SZ_PWR_AVG_TYPE) + { + realPrecision = sum/confparams_cpr->segment_size; + sum = 0; + } + realPrecision *= confparams_cpr->pw_relBoundRatio; + if(confparams_cpr->errorBoundMode==ABS_AND_PW_REL||confparams_cpr->errorBoundMode==REL_AND_PW_REL) + realPrecision = realPrecisionerrorBoundMode==ABS_OR_PW_REL||confparams_cpr->errorBoundMode==REL_OR_PW_REL) + realPrecision = realPrecisionpwr_type) + { + case SZ_PWR_MIN_TYPE: + if(realPrecision>curPrecision) + realPrecision = curPrecision; + break; + case SZ_PWR_AVG_TYPE: + sum += curPrecision; + break; + case SZ_PWR_MAX_TYPE: + if(realPrecisionpwr_type==SZ_PWR_AVG_TYPE) + { + int size = dataLength%confparams_cpr->segment_size==0?confparams_cpr->segment_size:dataLength%confparams_cpr->segment_size; + realPrecision = sum/size; + } + if(confparams_cpr->errorBoundMode==ABS_AND_PW_REL||confparams_cpr->errorBoundMode==REL_AND_PW_REL) + realPrecision = realPrecisionerrorBoundMode==ABS_OR_PW_REL||confparams_cpr->errorBoundMode==REL_OR_PW_REL) + realPrecision = realPrecisionmaxRangeRadius*sizeof(int)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int)); + int totalSampleSize = dataLength/confparams_cpr->sampleDistance; + for(i=2;isegment_size==0) + realPrecision = pwrErrBound[j++]; + if(i%confparams_cpr->sampleDistance==0) + { + //pred_value = 2*oriData[i-1] - oriData[i-2]; + pred_value = oriData[i-1]; + pred_err = fabs(pred_value - oriData[i]); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2); + return powerOf2; +} + +void compute_segment_precisions_double_2D(double *oriData, double* pwrErrBound, +size_t r1, size_t r2, size_t R2, size_t edgeSize, unsigned char* pwrErrBoundBytes, double Min, double Max, double globalPrecision) +{ + size_t i = 0, j = 0, k = 0, p = 0, index = 0, J = 0; //I=-1,J=-1 if they are needed + double realPrecision; + double approxPrecision; + unsigned char realPrecBytes[8]; + double curValue, curAbsValue; + double* statAbsValues = (double*)malloc(R2*sizeof(double)); + + double max = fabs(Min)pwr_type == SZ_PWR_MIN_TYPE) + statAbsValues[i] = max; + else if(confparams_cpr->pwr_type == SZ_PWR_MAX_TYPE) + statAbsValues[i] = min; + else + statAbsValues[i] = 0; //for SZ_PWR_AVG_TYPE + } + for(i=0;i0) || (i%edgeSize==0&&j==0&&i>0)) + { + if(confparams_cpr->pwr_type==SZ_PWR_AVG_TYPE) + { + int a = edgeSize, b = edgeSize; + if(j==0) + { + if(r2%edgeSize==0) + b = edgeSize; + else + b = r2%edgeSize; + } + if(i==r1-1) + { + if(r1%edgeSize==0) + a = edgeSize; + else + a = r1%edgeSize; + } + realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J]/(a*b); + } + else + realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J]; + + if(confparams_cpr->errorBoundMode==ABS_AND_PW_REL||confparams_cpr->errorBoundMode==REL_AND_PW_REL) + realPrecision = realPrecisionerrorBoundMode==ABS_OR_PW_REL||confparams_cpr->errorBoundMode==REL_OR_PW_REL) + realPrecision = realPrecisionpwr_type == SZ_PWR_MIN_TYPE) + statAbsValues[J] = max; + else if(confparams_cpr->pwr_type == SZ_PWR_MAX_TYPE) + statAbsValues[J] = min; + else + statAbsValues[J] = 0; //for SZ_PWR_AVG_TYPE + } + if(j==0) + J = 0; + else if(j%edgeSize==0) + J++; + if(curValue!=0) + { + curAbsValue = fabs(curValue); + + switch(confparams_cpr->pwr_type) + { + case SZ_PWR_MIN_TYPE: + if(statAbsValues[J]>curAbsValue) + statAbsValues[J] = curAbsValue; + break; + case SZ_PWR_AVG_TYPE: + statAbsValues[J] += curAbsValue; + break; + case SZ_PWR_MAX_TYPE: + if(statAbsValues[J]pwr_type==SZ_PWR_AVG_TYPE) + { + int a = edgeSize, b = edgeSize; + if(r2%edgeSize==0) + b = edgeSize; + else + b = r2%edgeSize; + if(r1%edgeSize==0) + a = edgeSize; + else + a = r1%edgeSize; + realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J]/(a*b); + } + else + realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J]; + + if(confparams_cpr->errorBoundMode==ABS_AND_PW_REL||confparams_cpr->errorBoundMode==REL_AND_PW_REL) + realPrecision = realPrecisionerrorBoundMode==ABS_OR_PW_REL||confparams_cpr->errorBoundMode==REL_OR_PW_REL) + realPrecision = realPrecisionmaxRangeRadius*sizeof(int)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int)); + size_t totalSampleSize = (r1-1)*(r2-1)/confparams_cpr->sampleDistance; + size_t ir2; + for(i=1;isampleDistance==0) + { + realPrecision = pwrErrBound[I*R2+J]; + pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1]; + pred_err = fabs(pred_value - oriData[index]); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2); + return powerOf2; +} + +void compute_segment_precisions_double_3D(double *oriData, double* pwrErrBound, +size_t r1, size_t r2, size_t r3, size_t R2, size_t R3, size_t edgeSize, unsigned char* pwrErrBoundBytes, double Min, double Max, double globalPrecision) +{ + size_t i = 0, j = 0, k = 0, p = 0, q = 0, index = 0, J = 0, K = 0; //I=-1,J=-1 if they are needed + size_t r23 = r2*r3, ir, jr; + double realPrecision; + double approxPrecision; + unsigned char realPrecBytes[8]; + double curValue, curAbsValue; + + double** statAbsValues = create2DArray_double(R2, R3); + double max = fabs(Min)pwr_type == SZ_PWR_MIN_TYPE) + statAbsValues[i][j] = max; + else if(confparams_cpr->pwr_type == SZ_PWR_MAX_TYPE) + statAbsValues[i][j] = min; + else + statAbsValues[i][j] = 0; + } + for(i=0;i0) + { + realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J][K]; + doubleToBytes(realPrecBytes, realPrecision); + memset(&realPrecBytes[2], 0, 6); + approxPrecision = bytesToDouble(realPrecBytes); + //put the realPrecision in double* pwrErBound + pwrErrBound[p++] = approxPrecision; + //put the two bytes in pwrErrBoundBytes + //printf("q=%d, i=%d, j=%d, k=%d\n",q,i,j,k); + pwrErrBoundBytes[q++] = realPrecBytes[0]; + pwrErrBoundBytes[q++] = realPrecBytes[1]; + if(confparams_cpr->pwr_type == SZ_PWR_MIN_TYPE) + statAbsValues[J][K] = max; + else if(confparams_cpr->pwr_type == SZ_PWR_MAX_TYPE) + statAbsValues[J][K] = min; + } + for(j=0;j0) + { + realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J][K]; + doubleToBytes(realPrecBytes, realPrecision); + memset(&realPrecBytes[2], 0, 6); + approxPrecision = bytesToDouble(realPrecBytes); + //put the realPrecision in double* pwrErBound + pwrErrBound[p++] = approxPrecision; + //put the two bytes in pwrErrBoundBytes + //printf("q=%d, i=%d, j=%d, k=%d\n",q,i,j,k); + pwrErrBoundBytes[q++] = realPrecBytes[0]; + pwrErrBoundBytes[q++] = realPrecBytes[1]; + if(confparams_cpr->pwr_type == SZ_PWR_MIN_TYPE) + statAbsValues[J][K] = max; + else if(confparams_cpr->pwr_type == SZ_PWR_MAX_TYPE) + statAbsValues[J][K] = min; + } + + if(j==0) + J = 0; + else if(j%edgeSize==0) + J++; + + for(k=0;k0) + { + realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J][K]; + doubleToBytes(realPrecBytes, realPrecision); + memset(&realPrecBytes[2], 0, 6); + approxPrecision = bytesToDouble(realPrecBytes); + //put the realPrecision in double* pwrErBound + pwrErrBound[p++] = approxPrecision; + //put the two bytes in pwrErrBoundBytes + //printf("q=%d, i=%d, j=%d, k=%d\n",q,i,j,k); + pwrErrBoundBytes[q++] = realPrecBytes[0]; + pwrErrBoundBytes[q++] = realPrecBytes[1]; + + if(confparams_cpr->pwr_type == SZ_PWR_MIN_TYPE) + statAbsValues[J][K] = max; + else if(confparams_cpr->pwr_type == SZ_PWR_MAX_TYPE) + statAbsValues[J][K] = min; + } + + if(k==0) + K = 0; + else if(k%edgeSize==0) + K++; + + if(curValue!=0) + { + curAbsValue = fabs(curValue); + if(confparams_cpr->pwr_type == SZ_PWR_MIN_TYPE) + { + if(statAbsValues[J][K]>curAbsValue) + { + statAbsValues[J][K] = curAbsValue; + } + } + else if(confparams_cpr->pwr_type == SZ_PWR_MAX_TYPE) + { + if(statAbsValues[J][K]pw_relBoundRatio*statAbsValues[J][K]; + doubleToBytes(realPrecBytes, realPrecision); + memset(&realPrecBytes[2], 0, 6); + approxPrecision = bytesToDouble(realPrecBytes); + //put the realPrecision in double* pwrErBound + pwrErrBound[p++] = approxPrecision; + //put the two bytes in pwrErrBoundBytes + pwrErrBoundBytes[q++] = realPrecBytes[0]; + pwrErrBoundBytes[q++] = realPrecBytes[1]; + + free2DArray_double(statAbsValues, R2); +} + +unsigned int optimize_intervals_double_3D_pwr(double *oriData, size_t r1, size_t r2, size_t r3, size_t R2, size_t R3, size_t edgeSize, double* pwrErrBound) +{ + size_t i,j,k, ir,jr,index, I = 0,J=0,K=0; + double realPrecision = pwrErrBound[0]; + unsigned long radiusIndex; + size_t r23=r2*r3; + size_t R23 = R2*R3; + double pred_value = 0, pred_err; + int *intervals = (int*)malloc(confparams_cpr->maxRangeRadius*sizeof(int)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int)); + size_t totalSampleSize = (r1-1)*(r2-1)*(r3-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + realPrecision = pwrErrBound[I*R23+J*R2+K]; + pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] + - oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1]; + pred_err = fabs(pred_value - oriData[index]); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2); + return powerOf2; +} + +void SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr(unsigned char** newByteData, double *oriData, double globalPrecision, +size_t dataLength, size_t *outSize, double min, double max) +{ + size_t pwrLength = dataLength%confparams_cpr->segment_size==0?dataLength/confparams_cpr->segment_size:dataLength/confparams_cpr->segment_size+1; + double* pwrErrBound = (double*)malloc(sizeof(double)*pwrLength); + size_t pwrErrBoundBytes_size = sizeof(unsigned char)*pwrLength*2; + unsigned char* pwrErrBoundBytes = (unsigned char*)malloc(pwrErrBoundBytes_size); + + compute_segment_precisions_double_1D(oriData, dataLength, pwrErrBound, pwrErrBoundBytes, globalPrecision); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_double_1D_pwr(oriData, dataLength, pwrErrBound); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i = 0, j = 0; + int reqLength; + double realPrecision = pwrErrBound[j++]; + double medianValue = 0; + double radius = fabs(max)curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + listAdd_double(last3CmprsData, vce->data); + //printf("%.30G\n",last3CmprsData[0]); + + //add the second data + type[1] = 0; + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + listAdd_double(last3CmprsData, vce->data); + //printf("%.30G\n",last3CmprsData[0]); + + int state; + double checkRadius; + double curData; + double pred; + double predAbsErr; + checkRadius = (exe_params->intvCapacity-1)*realPrecision; + double interval = 2*realPrecision; + int updateReqLength = 0; //a marker: 1 means already updated + + for(i=2;isegment_size==0) + { + realPrecision = pwrErrBound[j++]; + checkRadius = (exe_params->intvCapacity-1)*realPrecision; + interval = 2*realPrecision; + updateReqLength = 0; + } + //pred = 2*last3CmprsData[0] - last3CmprsData[1]; + pred = last3CmprsData[0]; + predAbsErr = fabs(curData - pred); + if(predAbsErr=pred) + { + type[i] = exe_params->intvRadius+state; + pred = pred + state*interval; + } + else //curDataintvRadius-state; + pred = pred - state*interval; + } + listAdd_double(last3CmprsData, pred); + continue; + } + + //unpredictable data processing + if(updateReqLength==0) + { + computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + type[i] = 0; + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + + compressSingleDoubleValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + + listAdd_double(last3CmprsData, vce->data); + }//end of for + +// char* expSegmentsInBytes; +// int expSegmentsInBytes_size = convertESCToBytes(esc, &expSegmentsInBytes); + int exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageD* tdps; + + new_TightDataPointStorageD2(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitLengthArray->array, resiBitLengthArray->size, + realPrecision, medianValue, (char)reqLength, quantization_intervals, pwrErrBoundBytes, pwrErrBoundBytes_size, radExpo); + +//sdi:Debug +/* int sum =0; + for(i=0;itypeArray, tdps->typeArray_size, type_); +// printf("tdps->typeArray_size=%d\n", tdps->typeArray_size); + + //free memory + free_DBA(resiBitLengthArray); + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + + convertTDPStoFlatBytes_double(tdps, newByteData, outSize); + + int doubleSize=sizeof(double); + if(*outSize>dataLength*doubleSize) + { + size_t k = 0, i; + tdps->isLossless = 1; + size_t totalByteLength = 3 + exe_params->SZ_SIZE_TYPE + 1 + doubleSize*dataLength; + *newByteData = (unsigned char*)malloc(totalByteLength); + + unsigned char dsLengthBytes[exe_params->SZ_SIZE_TYPE]; + intToBytes_bigEndian(dsLengthBytes, dataLength);//4 + for (i = 0; i < 3; i++)//3 + (*newByteData)[k++] = versionNumber[i]; + + if(exe_params->SZ_SIZE_TYPE==4) + { + (*newByteData)[k++] = 16; //=00010000 + } + else + { + (*newByteData)[k++] = 80; + } + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)//4 or 8 + (*newByteData)[k++] = dsLengthBytes[i]; + + + if(sysEndianType==BIG_ENDIAN_SYSTEM) + memcpy((*newByteData)+4+exe_params->SZ_SIZE_TYPE, oriData, dataLength*doubleSize); + else + { + unsigned char* p = (*newByteData)+4+exe_params->SZ_SIZE_TYPE; + for(i=0;isegment_size); + size_t R1 = 1+(r1-1)/blockEdgeSize; + size_t R2 = 1+(r2-1)/blockEdgeSize; + double* pwrErrBound = (double*)malloc(sizeof(double)*R1*R2); + size_t pwrErrBoundBytes_size = sizeof(unsigned char)*R1*R2*2; + unsigned char* pwrErrBoundBytes = (unsigned char*)malloc(pwrErrBoundBytes_size); + + compute_segment_precisions_double_2D(oriData, pwrErrBound, r1, r2, R2, blockEdgeSize, pwrErrBoundBytes, min, max, globalPrecision); + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_double_2D_pwr(oriData, r1, r2, R2, blockEdgeSize, pwrErrBound); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + //printf("quantization_intervals=%d\n",quantization_intervals); + + size_t i=0,j=0,I=0,J=0; + int reqLength; + double realPrecision = pwrErrBound[I*R2+J]; + double pred1D, pred2D; + double diff = 0.0; + double itvNum = 0; + double *P0, *P1; + + P0 = (double*)malloc(r2*sizeof(double)); + memset(P0, 0, r2*sizeof(double)); + P1 = (double*)malloc(r2*sizeof(double)); + memset(P1, 0, r2*sizeof(double)); + + double medianValue = 0; + double radius = fabs(max)curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[0] = vce->data; + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + diff = spaceFillingValue[1] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + exe_params->intvRadius; + P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision; + } + else + { + type[1] = 0; + + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[1] = vce->data; + } + + /* Process Row-0 data 2 --> data r2-1 */ + for (j = 2; j < r2; j++) + { + if(j%blockEdgeSize==0) + { + J++; + realPrecision = pwrErrBound[I*R2+J]; + updateReqLength = 0; + } + + pred1D = 2*P1[j-1] - P1[j-2]; + diff = spaceFillingValue[j] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + exe_params->intvRadius; + P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision; + } + else + { + if(updateReqLength==0) + { + computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + type[j] = 0; + + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleDoubleValue(vce, spaceFillingValue[j], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[j] = vce->data; + } + } + + /* Process Row-1 --> Row-r1-1 */ + size_t index; + for (i = 1; i < r1; i++) + { + /* Process row-i data 0 */ + index = i*r2; + J = 0; + if(i%blockEdgeSize==0) + I++; + realPrecision = pwrErrBound[I*R2+J]; //J==0 + updateReqLength = 0; + + pred1D = P1[0]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + if(updateReqLength==0) + { + computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + type[index] = 0; + + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[0] = vce->data; + } + + /* Process row-i data 1 --> r2-1*/ + for (j = 1; j < r2; j++) + { + index = i*r2+j; + if(j%blockEdgeSize==0) + { + J++; + realPrecision = pwrErrBound[I*R2+J]; + updateReqLength = 0; + } + pred2D = P0[j-1] + P1[j] - P1[j-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + if(updateReqLength==0) + { + computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + type[index] = 0; + + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[j] = vce->data; + } + } + + double *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + + if(r2!=1) + free(P0); + free(P1); + int exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageD* tdps; + + new_TightDataPointStorageD2(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitLengthArray->array, resiBitLengthArray->size, + realPrecision, medianValue, (char)reqLength, quantization_intervals, pwrErrBoundBytes, pwrErrBoundBytes_size, radExpo); + + //free memory + free_DBA(resiBitLengthArray); + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + + convertTDPStoFlatBytes_double(tdps, newByteData, outSize); + + free(pwrErrBound); + + free(vce); + free(lce); + free_TightDataPointStorageD(tdps); + free(exactMidByteArray); +} + +void SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr(unsigned char** newByteData, double *oriData, double globalPrecision, +size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max) +{ + size_t dataLength=r1*r2*r3; + + int blockEdgeSize = computeBlockEdgeSize_3D(confparams_cpr->segment_size); + size_t R1 = 1+(r1-1)/blockEdgeSize; + size_t R2 = 1+(r2-1)/blockEdgeSize; + size_t R3 = 1+(r3-1)/blockEdgeSize; + double* pwrErrBound = (double*)malloc(sizeof(double)*R1*R2*R3); + size_t pwrErrBoundBytes_size = sizeof(unsigned char)*R1*R2*R3*2; + unsigned char* pwrErrBoundBytes = (unsigned char*)malloc(pwrErrBoundBytes_size); + + compute_segment_precisions_double_3D(oriData, pwrErrBound, r1, r2, r3, R2, R3, blockEdgeSize, pwrErrBoundBytes, min, max, globalPrecision); + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_double_3D_pwr(oriData, r1, r2, r3, R2, R3, blockEdgeSize, pwrErrBound); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i=0,j=0,k=0, I = 0, J = 0, K = 0; + int reqLength; + double realPrecision = pwrErrBound[0]; + double pred1D, pred2D, pred3D; + double diff = 0.0; + double itvNum = 0; + double *P0, *P1; + + size_t r23 = r2*r3; + size_t R23 = R2*R3; + P0 = (double*)malloc(r23*sizeof(double)); + P1 = (double*)malloc(r23*sizeof(double)); + double radius = fabs(max)curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[0] = vce->data; + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + diff = spaceFillingValue[1] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + exe_params->intvRadius; + P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision; + } + else + { + if(updateReqLength==0) + { + computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + type[1] = 0; + + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[1] = vce->data; + } + + /* Process Row-0 data 2 --> data r3-1 */ + for (j = 2; j < r3; j++) + { + if(j%blockEdgeSize==0) + { + J++; + realPrecision = pwrErrBound[J]; + updateReqLength = 0; + } + pred1D = 2*P1[j-1] - P1[j-2]; + diff = spaceFillingValue[j] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + exe_params->intvRadius; + P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision; + } + else + { + if(updateReqLength==0) + { + computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + type[j] = 0; + + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleDoubleValue(vce, spaceFillingValue[j], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[j] = vce->data; + } + } + + /* Process Row-1 --> Row-r2-1 */ + size_t index; + K = 0; + for (i = 1; i < r2; i++) + { + /* Process row-i data 0 */ + index = i*r3; + + J = 0; + if(i%blockEdgeSize==0) + I++; + realPrecision = pwrErrBound[I*R3+J]; //J==0 + updateReqLength = 0; + + pred1D = P1[index-r3]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + if(updateReqLength==0) + { + computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + type[index] = 0; + + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index] = vce->data; + } + + /* Process row-i data 1 --> data r3-1*/ + for (j = 1; j < r3; j++) //note that this j refers to fastest dimension (lowest order) + { + index = i*r3+j; + if(j%blockEdgeSize==0) + { + J++; + realPrecision = pwrErrBound[I*R3+J]; + updateReqLength = 0; + } + + pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + if(updateReqLength==0) + { + computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + type[index] = 0; + + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index] = vce->data; + } + } + } + + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (k = 1; k < r1; k++) + { + /* Process Row-0 data 0*/ + index = k*r23; + I = 0; + J = 0; + if(k%blockEdgeSize==0) + K++; + realPrecision = pwrErrBound[K*R23]; //J==0 + updateReqLength = 0; + + pred1D = P1[0]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + if(updateReqLength==0) + { + computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + type[index] = 0; + + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[0] = vce->data; + } + + + /* Process Row-0 data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + index = k*r23+j; + + if(j%blockEdgeSize==0) + { + J++; + realPrecision = pwrErrBound[K*R23+J]; + updateReqLength = 0; + } + pred2D = P0[j-1] + P1[j] - P1[j-1]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + if(updateReqLength==0) + { + computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + type[index] = 0; + + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[j] = vce->data; + } + } + + /* Process Row-1 --> Row-r2-1 */ + size_t index2D; + for (i = 1; i < r2; i++) + { + /* Process Row-i data 0 */ + index = k*r23 + i*r3; + + J = 0; + if(i%blockEdgeSize==0) + I++; + realPrecision = pwrErrBound[K*R23+I*R3+J]; //J==0 + updateReqLength = 0; + + index2D = i*r3; + pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + if(updateReqLength==0) + { + computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + type[index] = 0; + + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + + /* Process Row-i data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + index = k*r23 + i*r3 + j; + + if(j%blockEdgeSize==0) + { + J++; + realPrecision = pwrErrBound[K*R23+I*R3+J]; + updateReqLength = 0; + } + index2D = i*r3 + j; + pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1]; + diff = spaceFillingValue[index] - pred3D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + if(updateReqLength==0) + { + computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + type[index] = 0; + + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + } + } + + double *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + if(r23!=1) + free(P0); + free(P1); + int exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageD* tdps; + + new_TightDataPointStorageD2(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitLengthArray->array, resiBitLengthArray->size, + realPrecision, medianValue, (char)reqLength, quantization_intervals, pwrErrBoundBytes, pwrErrBoundBytes_size, radExpo); + + convertTDPStoFlatBytes_double(tdps, newByteData, outSize); + + //free memory + free_DBA(resiBitLengthArray); + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + + free(pwrErrBound); + + free(vce); + free(lce); + free_TightDataPointStorageD(tdps); + free(exactMidByteArray); +} + +void createRangeGroups_double(double** posGroups, double** negGroups, int** posFlags, int** negFlags) +{ + size_t size = GROUP_COUNT*sizeof(double); + size_t size2 = GROUP_COUNT*sizeof(int); + *posGroups = (double*)malloc(size); + *negGroups = (double*)malloc(size); + *posFlags = (int*)malloc(size2); + *negFlags = (int*)malloc(size2); + memset(*posGroups, 0, size); + memset(*negGroups, 0, size); + memset(*posFlags, 0, size2); + memset(*negFlags, 0, size2); +} + +void compressGroupIDArray_double(char* groupID, TightDataPointStorageD* tdps) +{ + size_t dataLength = tdps->dataSeriesLength; + int* standGroupID = (int*)malloc(dataLength*sizeof(int)); + + size_t i; + standGroupID[0] = groupID[0]+GROUP_COUNT; //plus an offset such that it would not be a negative number. + char lastGroupIDValue = groupID[0], curGroupIDValue; + int offset = 2*(GROUP_COUNT + 2); + for(i=1; ipwrErrBoundBytes = out; //groupIDArray + tdps->pwrErrBoundBytes_size = outSize; + + free(standGroupID); +} + +TightDataPointStorageD* SZ_compress_double_1D_MDQ_pwrGroup(double* oriData, size_t dataLength, int errBoundMode, +double absErrBound, double relBoundRatio, double pwrErrRatio, double valueRangeSize, double medianValue_f) +{ + size_t i; + double *posGroups, *negGroups, *groups; + double pos_01_group = 0, neg_01_group = 0; //[0,1] and [-1,0] + int *posFlags, *negFlags, *flags; + int pos_01_flag = 0, neg_01_flag = 0; + createRangeGroups_double(&posGroups, &negGroups, &posFlags, &negFlags); + size_t nbBins = (size_t)(1/pwrErrRatio); + if(nbBins%2==1) + nbBins++; + exe_params->intvRadius = nbBins; + + int reqLength, status; + double medianValue = medianValue_f; + double realPrecision = (double)getRealPrecision_double(valueRangeSize, errBoundMode, absErrBound, relBoundRatio, &status); + if(realPrecision<0) + realPrecision = pwrErrRatio; + double realGroupPrecision; //precision (error) based on group ID + getPrecisionReqLength_double(realPrecision); + short radExpo = getExponent_double(valueRangeSize/2); + short lastGroupNum = 0, groupNum, grpNum = 0; + + double* groupErrorBounds = generateGroupErrBounds(errBoundMode, realPrecision, pwrErrRatio); + exe_params->intvRadius = generateGroupMaxIntervalCount(groupErrorBounds); + + computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue); + + int* type = (int*) malloc(dataLength*sizeof(int)); + char *groupID = (char*) malloc(dataLength*sizeof(char)); + char *gp = groupID; + + double* spaceFillingValue = oriData; + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + unsigned char preDataBytes[8]; + intToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + + DoubleValueCompressElement *vce = (DoubleValueCompressElement*)malloc(sizeof(DoubleValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + int state; + double curData, decValue; + double pred; + double predAbsErr; + double interval = 0; + + //add the first data + type[0] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + + curData = spaceFillingValue[0]; + groupNum = computeGroupNum_double(vce->data); + + if(curData > 0 && groupNum >= 0) + { + groups = posGroups; + flags = posFlags; + grpNum = groupNum; + } + else if(curData < 0 && groupNum >= 0) + { + groups = negGroups; + flags = negFlags; + grpNum = groupNum; + } + else if(curData >= 0 && groupNum == -1) + { + groups = &pos_01_group; + flags = &pos_01_flag; + grpNum = 0; + } + else //curData < 0 && groupNum == -1 + { + groups = &neg_01_group; + flags = &neg_01_flag; + grpNum = 0; + } + + listAdd_double_group(groups, flags, groupNum, spaceFillingValue[0], vce->data, gp); + gp++; + + for(i=1;i 0 && groupNum >= 0) + { + groups = posGroups; + flags = posFlags; + grpNum = groupNum; + } + else if(curData < 0 && groupNum >= 0) + { + groups = negGroups; + flags = negFlags; + grpNum = groupNum; + } + else if(curData >= 0 && groupNum == -1) + { + groups = &pos_01_group; + flags = &pos_01_flag; + grpNum = 0; + } + else //curData < 0 && groupNum == -1 + { + groups = &neg_01_group; + flags = &neg_01_flag; + grpNum = 0; + } + + if(groupNum>=GROUP_COUNT) + { + type[i] = 0; + compressSingleDoubleValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + listAdd_double_group(groups, flags, lastGroupNum, curData, vce->data, gp); //set the group number to be last one in order to get the groupID array as smooth as possible. + } + else if(flags[grpNum]==0) //the dec value may not be in the same group + { + type[i] = 0; + compressSingleDoubleValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + //decGroupNum = computeGroupNum_double(vce->data); + + //if(decGroupNum < groupNum) + // decValue = curData>0?pow(2, groupNum):-pow(2, groupNum); + //else if(decGroupNum > groupNum) + // decValue = curData>0?pow(2, groupNum+1):-pow(2, groupNum+1); + //else + // decValue = vce->data; + + decValue = vce->data; + listAdd_double_group(groups, flags, groupNum, curData, decValue, gp); + lastGroupNum = curData>0?groupNum + 2: -(groupNum+2); + } + else //if flags[groupNum]==1, the dec value must be in the same group + { + pred = groups[grpNum]; + predAbsErr = fabs(curData - pred); + realGroupPrecision = groupErrorBounds[grpNum]; //compute real error bound + interval = realGroupPrecision*2; + state = (predAbsErr/realGroupPrecision+1)/2; + if(curData>=pred) + { + type[i] = exe_params->intvRadius+state; + decValue = pred + state*interval; + } + else //curDataintvRadius-state; + decValue = pred - state*interval; + } + //decGroupNum = computeGroupNum_double(pred); + + if((decValue>0&&curData<0)||(decValue<0&&curData>=0)) + decValue = 0; + //else + //{ + // if(decGroupNum < groupNum) + // decValue = curData>0?pow(2, groupNum):-pow(2, groupNum); + // else if(decGroupNum > groupNum) + // decValue = curData>0?pow(2, groupNum+1):-pow(2, groupNum+1); + // else + // decValue = pred; + //} + + if(fabs(curData-decValue)>realGroupPrecision) + { + type[i] = 0; + compressSingleDoubleValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + + decValue = vce->data; + } + + listAdd_double_group(groups, flags, groupNum, curData, decValue, gp); + lastGroupNum = curData>=0?groupNum + 2: -(groupNum+2); + } + gp++; + + } + + int exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageD* tdps; + + //combineTypeAndGroupIDArray(nbBins, dataLength, &type, groupID); + + new_TightDataPointStorageD(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, nbBins, NULL, 0, radExpo); + + compressGroupIDArray_double(groupID, tdps); + + free(posGroups); + free(negGroups); + free(posFlags); + free(negFlags); + free(groupID); + free(groupErrorBounds); + + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + free(vce); + free(lce); + free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageD(tdps); + + return tdps; +} + +void SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(unsigned char** newByteData, double *oriData, +size_t dataLength, double absErrBound, double relBoundRatio, double pwrErrRatio, double valueRangeSize, double medianValue_f, size_t *outSize) +{ + TightDataPointStorageD* tdps = SZ_compress_double_1D_MDQ_pwrGroup(oriData, dataLength, confparams_cpr->errorBoundMode, + absErrBound, relBoundRatio, pwrErrRatio, + valueRangeSize, medianValue_f); + + convertTDPStoFlatBytes_double(tdps, newByteData, outSize); + + if(*outSize>3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + sizeof(double)*dataLength) + SZ_compress_args_double_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageD(tdps); +} + +#include + +void SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_pre_log(unsigned char** newByteData, double *oriData, double pwrErrRatio, size_t dataLength, size_t *outSize, double min, double max){ + + double * log_data = (double *) malloc(dataLength * sizeof(double)); + + unsigned char * signs = (unsigned char *) malloc(dataLength); + memset(signs, 0, dataLength); + // preprocess + double max_abs_log_data; + if(min == 0) max_abs_log_data = fabs(log2(fabs(max))); + else if(max == 0) max_abs_log_data = fabs(log2(fabs(min))); + else max_abs_log_data = fabs(log2(fabs(min))) > fabs(log2(fabs(max))) ? fabs(log2(fabs(min))) : fabs(log2(fabs(max))); + double min_log_data = max_abs_log_data; + bool positive = true; + for(size_t i=0; i 0){ + log_data[i] = log2(log_data[i]); + if(log_data[i] > max_abs_log_data) max_abs_log_data = log_data[i]; + if(log_data[i] < min_log_data) min_log_data = log_data[i]; + } + } + + double valueRangeSize, medianValue_f; + computeRangeSize_double(log_data, dataLength, &valueRangeSize, &medianValue_f); + if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data); + double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 2.23e-16; + for(size_t i=0; iminLogValue = min_log_data - 1.0001*realPrecision; + free(log_data); + if(!positive){ + unsigned char * comp_signs; + // compress signs + unsigned long signSize = sz_lossless_compress(ZSTD_COMPRESSOR, 3, signs, dataLength, &comp_signs); + tdps->pwrErrBoundBytes = comp_signs; + tdps->pwrErrBoundBytes_size = signSize; + } + else{ + tdps->pwrErrBoundBytes = NULL; + tdps->pwrErrBoundBytes_size = 0; + } + free(signs); + + convertTDPStoFlatBytes_double(tdps, newByteData, outSize); + if(*outSize>3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + sizeof(double)*dataLength) + SZ_compress_args_double_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageD(tdps); +} + +void SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_pre_log(unsigned char** newByteData, double *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t *outSize, double min, double max){ + + size_t dataLength = r1 * r2; + double * log_data = (double *) malloc(dataLength * sizeof(double)); + + unsigned char * signs = (unsigned char *) malloc(dataLength); + memset(signs, 0, dataLength); + // preprocess + double max_abs_log_data; + if(min == 0) max_abs_log_data = fabs(log2(fabs(max))); + else if(max == 0) max_abs_log_data = fabs(log2(fabs(min))); + else max_abs_log_data = fabs(log2(fabs(min))) > fabs(log2(fabs(max))) ? fabs(log2(fabs(min))) : fabs(log2(fabs(max))); + double min_log_data = max_abs_log_data; + bool positive = true; + for(size_t i=0; i 0){ + log_data[i] = log2(log_data[i]); + if(log_data[i] > max_abs_log_data) max_abs_log_data = log_data[i]; + if(log_data[i] < min_log_data) min_log_data = log_data[i]; + } + } + + double valueRangeSize, medianValue_f; + computeRangeSize_double(log_data, dataLength, &valueRangeSize, &medianValue_f); + if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data); + double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 2.23e-16; + for(size_t i=0; iminLogValue = min_log_data - 1.0001*realPrecision; + free(log_data); + + if(!positive){ + unsigned char * comp_signs; + // compress signs + unsigned long signSize = sz_lossless_compress(ZSTD_COMPRESSOR, 3, signs, dataLength, &comp_signs); + tdps->pwrErrBoundBytes = comp_signs; + tdps->pwrErrBoundBytes_size = signSize; + } + else{ + tdps->pwrErrBoundBytes = NULL; + tdps->pwrErrBoundBytes_size = 0; + } + free(signs); + + convertTDPStoFlatBytes_double(tdps, newByteData, outSize); + if(*outSize>3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + sizeof(double)*dataLength) + SZ_compress_args_double_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageD(tdps); +} + +void SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(unsigned char** newByteData, double *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max){ + + size_t dataLength = r1 * r2 * r3; + double * log_data = (double *) malloc(dataLength * sizeof(double)); + + unsigned char * signs = (unsigned char *) malloc(dataLength); + memset(signs, 0, dataLength); + // preprocess + double max_abs_log_data; + if(min == 0) max_abs_log_data = fabs(log2(fabs(max))); + else if(max == 0) max_abs_log_data = fabs(log2(fabs(min))); + else max_abs_log_data = fabs(log2(fabs(min))) > fabs(log2(fabs(max))) ? fabs(log2(fabs(min))) : fabs(log2(fabs(max))); + double min_log_data = max_abs_log_data; + bool positive = true; + for(size_t i=0; i 0){ + log_data[i] = log2(log_data[i]); + if(log_data[i] > max_abs_log_data) max_abs_log_data = log_data[i]; + if(log_data[i] < min_log_data) min_log_data = log_data[i]; + } + } + + double valueRangeSize, medianValue_f; + computeRangeSize_double(log_data, dataLength, &valueRangeSize, &medianValue_f); + if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data); + double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 2.23e-16; + for(size_t i=0; iminLogValue = min_log_data - 1.0001*realPrecision; + free(log_data); + if(!positive){ + unsigned char * comp_signs; + // compress signs + unsigned long signSize = sz_lossless_compress(ZSTD_COMPRESSOR, 3, signs, dataLength, &comp_signs); + tdps->pwrErrBoundBytes = comp_signs; + tdps->pwrErrBoundBytes_size = signSize; + } + else{ + tdps->pwrErrBoundBytes = NULL; + tdps->pwrErrBoundBytes_size = 0; + } + free(signs); + + convertTDPStoFlatBytes_double(tdps, newByteData, outSize); + if(*outSize>3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + sizeof(double)*dataLength) + SZ_compress_args_double_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageD(tdps); +} +void SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_pre_log_MSST19(unsigned char** newByteData, double *oriData, double pwrErrRatio, size_t dataLength, size_t *outSize, double valueRangeSize, double medianValue_f, + unsigned char* signs, bool* positive, double min, double max, double nearZero){ + double multiplier = pow((1+pwrErrRatio), -3.0001); + for(int i=0; iminLogValue = nearZero / ((1+pwrErrRatio)*(1+pwrErrRatio)); + if(!(*positive)){ + unsigned char * comp_signs; + // compress signs + unsigned long signSize = sz_lossless_compress(ZSTD_COMPRESSOR, 3, signs, dataLength, &comp_signs); + tdps->pwrErrBoundBytes = comp_signs; + tdps->pwrErrBoundBytes_size = signSize; + } + else{ + tdps->pwrErrBoundBytes = NULL; + tdps->pwrErrBoundBytes_size = 0; + } + free(signs); + + convertTDPStoFlatBytes_double(tdps, newByteData, outSize); + if(*outSize>3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + sizeof(double)*dataLength) + SZ_compress_args_double_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageD(tdps); +} + +void SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_pre_log_MSST19(unsigned char** newByteData, double *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t *outSize, double valueRangeSize, + unsigned char* signs, bool* positive, double min, double max, double nearZero){ + + size_t dataLength = r1 * r2; + + double multiplier = pow((1+pwrErrRatio), -3.0001); + for(int i=0; iminLogValue = nearZero / ((1+pwrErrRatio)*(1+pwrErrRatio)); + + if(!*positive){ + unsigned char * comp_signs; + // compress signs + unsigned long signSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, signs, dataLength, &comp_signs); + tdps->pwrErrBoundBytes = comp_signs; + tdps->pwrErrBoundBytes_size = signSize; + } + else{ + tdps->pwrErrBoundBytes = NULL; + tdps->pwrErrBoundBytes_size = 0; + } + free(signs); + + convertTDPStoFlatBytes_double(tdps, newByteData, outSize); + if(*outSize>3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + sizeof(double)*dataLength) + SZ_compress_args_double_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageD(tdps); +} + +void SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log_MSST19(unsigned char** newByteData, double *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t r3, size_t *outSize, double valueRangeSize, unsigned char* signs, bool* positive, double min, double max, double nearZero){ + + size_t dataLength = r1 * r2 * r3; + + double multiplier = pow((1+pwrErrRatio), -3.0001); + for(int i=0; iminLogValue = nearZero / ((1+pwrErrRatio)*(1+pwrErrRatio)); + + if(!*positive){ + unsigned char * comp_signs; + // compress signs + unsigned long signSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, signs, dataLength, &comp_signs); + tdps->pwrErrBoundBytes = comp_signs; + tdps->pwrErrBoundBytes_size = signSize; + } + else{ + tdps->pwrErrBoundBytes = NULL; + tdps->pwrErrBoundBytes_size = 0; + } + free(signs); + + + convertTDPStoFlatBytes_double(tdps, newByteData, outSize); + if(*outSize>3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + sizeof(double)*dataLength) + SZ_compress_args_double_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageD(tdps); +} diff --git a/deps/SZ/sz/src/sz_double_ts.c b/deps/SZ/sz/src/sz_double_ts.c new file mode 100644 index 0000000000000000000000000000000000000000..3c9b184ee6f515a2188b980e052575c0e0635d93 --- /dev/null +++ b/deps/SZ/sz/src/sz_double_ts.c @@ -0,0 +1,191 @@ +/** + * @file sz_double_ts.c + * @author Sheng Di and Dingwen Tao + * @date Aug, 2016 + * @brief + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + + +#include +#include +#include +#include +#include +#include "sz.h" +#include "CompressElement.h" +#include "DynamicByteArray.h" +#include "DynamicIntArray.h" +#include "TightDataPointStorageD.h" +#include "zlib.h" +#include "rw.h" +#include "sz_double_ts.h" + +unsigned int optimize_intervals_double_1D_ts(double *oriData, size_t dataLength, double* preData, double realPrecision) +{ + size_t i = 0, radiusIndex; + double pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance; + for(i=2;isampleDistance==0) + { + pred_value = preData[i]; + pred_err = fabs(pred_value - oriData[i]); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + return powerOf2; +} + +TightDataPointStorageD* SZ_compress_double_1D_MDQ_ts(double *oriData, size_t dataLength, sz_multisteps* multisteps, +double realPrecision, double valueRangeSize, double medianValue_d) +{ + double* preStepData = (double*)(multisteps->hist_data); + //store the decompressed data + //double* decData = (double*)malloc(sizeof(double)*dataLength); + //memset(decData, 0, sizeof(double)*dataLength); + double* decData = preStepData; + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + quantization_intervals = optimize_intervals_double_1D_ts(oriData, dataLength, preStepData, realPrecision); + else + quantization_intervals = exe_params->intvCapacity; + updateQuantizationInfo(quantization_intervals); + + size_t i; + int reqLength; + double medianValue = medianValue_d; + short radExpo = getExponent_double(valueRangeSize/2); + + computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + double* spaceFillingValue = oriData; // + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + unsigned char preDataBytes[8]; + longToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + + DoubleValueCompressElement *vce = (DoubleValueCompressElement*)malloc(sizeof(DoubleValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + //add the first data + type[0] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + decData[0] = vce->data; + + //add the second data + type[1] = 0; + compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + decData[1] = vce->data; + + int state = 0; + double checkRadius = 0; + double curData = 0; + double pred = 0; + double predAbsErr = 0; + checkRadius = (exe_params->intvCapacity-1)*realPrecision; + double interval = 2*realPrecision; + + for(i=2;i=pred) + { + type[i] = exe_params->intvRadius+state; + pred = pred + state*interval; + } + else //curDataintvRadius-state; + pred = pred - state*interval; + } + decData[i] = pred; + continue; + } + + //unpredictable data processing + type[i] = 0; + compressSingleDoubleValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + decData[i] = vce->data; + }//end of for + + size_t exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageD* tdps; + + new_TightDataPointStorageD(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0); + + //free memory + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + free(vce); + free(lce); + free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps); + + //memcpy(preStepData, decData, dataLength*sizeof(double)); //update the data + //free(decData); + + return tdps; +} + + diff --git a/deps/SZ/sz/src/sz_float.c b/deps/SZ/sz/src/sz_float.c new file mode 100644 index 0000000000000000000000000000000000000000..118bf11c346e52a74ed8c3c96571b23a375ae2cc --- /dev/null +++ b/deps/SZ/sz/src/sz_float.c @@ -0,0 +1,10079 @@ +/** + * @file sz_float.c + * @author Sheng Di, Dingwen Tao, Xin Liang, Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang + * @date Aug, 2016 + * @brief SZ_Init, Compression and Decompression functions + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + + +#include +#include +#include +#include +#include +#include +#include "sz.h" +#include "CompressElement.h" +#include "DynamicByteArray.h" +#include "DynamicIntArray.h" +#include "TightDataPointStorageF.h" +#include "sz_float.h" +#include "sz_float_pwr.h" +#include "szd_float.h" +#include "szd_float_pwr.h" +#include "zlib.h" +#include "rw.h" +#include "sz_float_ts.h" +#include "utility.h" +#include "CacheTable.h" +#include "MultiLevelCacheTableWideInterval.h" +#include "sz_stats.h" + +unsigned char* SZ_skip_compress_float(float* data, size_t dataLength, size_t* outSize) +{ + *outSize = dataLength*sizeof(float); + unsigned char* out = (unsigned char*)malloc(dataLength*sizeof(float)); + memcpy(out, data, dataLength*sizeof(float)); + return out; +} + +void computeReqLength_float(double realPrecision, short radExpo, int* reqLength, float* medianValue) +{ + short reqExpo = getPrecisionReqLength_double(realPrecision); + *reqLength = 9+radExpo - reqExpo+1; //radExpo-reqExpo == reqMantiLength + if(*reqLength<9) + *reqLength = 9; + if(*reqLength>32) + { + *reqLength = 32; + *medianValue = 0; + } +} + +inline short computeReqLength_float_MSST19(double realPrecision) +{ + short reqExpo = getPrecisionReqLength_float(realPrecision); + return 9-reqExpo; +} + +unsigned int optimize_intervals_float_1D(float *oriData, size_t dataLength, double realPrecision) +{ + size_t i = 0, radiusIndex; + float pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance; + for(i=2;isampleDistance==0) + { + //pred_value = 2*oriData[i-1] - oriData[i-2]; + pred_value = oriData[i-1]; + pred_err = fabs(pred_value - oriData[i]); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2); + return powerOf2; +} + +unsigned int optimize_intervals_float_2D(float *oriData, size_t r1, size_t r2, double realPrecision) +{ + size_t i,j, index; + size_t radiusIndex; + float pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)/confparams_cpr->sampleDistance; + + //float max = oriData[0]; + //float min = oriData[0]; + + for(i=1;isampleDistance==0) + { + index = i*r2+j; + pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1]; + pred_err = fabs(pred_value - oriData[index]); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + + // if (max < oriData[index]) max = oriData[index]; + // if (min > oriData[index]) min = oriData[index]; + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + // struct timeval costStart, costEnd; + // double cost_est = 0; + // + // gettimeofday(&costStart, NULL); + // + // //compute estimate of bit-rate and distortion + // double est_br = 0; + // double est_psnr = 0; + // double c1 = log2(targetCount)+1; + // double c2 = -20.0*log10(realPrecision) + 20.0*log10(max-min) + 10.0*log10(3); + // + // for (i = 0; i < powerOf2/2; i++) + // { + // int count = intervals[i]; + // if (count != 0) + // est_br += count*log2(count); + // est_psnr += count; + // } + // + // //compute estimate of bit-rate + // est_br -= c1*est_psnr; + // est_br /= totalSampleSize; + // est_br = -est_br; + // + // //compute estimate of psnr + // est_psnr /= totalSampleSize; + // printf ("sum of P(i) = %lf\n", est_psnr); + // est_psnr = -10.0*log10(est_psnr); + // est_psnr += c2; + // + // printf ("estimate bitrate = %.2f\n", est_br); + // printf ("estimate psnr = %.2f\n",est_psnr); + // + // gettimeofday(&costEnd, NULL); + // cost_est = ((costEnd.tv_sec*1000000+costEnd.tv_usec)-(costStart.tv_sec*1000000+costStart.tv_usec))/1000000.0; + // + // printf ("analysis time = %f\n", cost_est); + + free(intervals); + //printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2); + return powerOf2; +} + +unsigned int optimize_intervals_float_3D(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision) +{ + size_t i,j,k, index; + size_t radiusIndex; + size_t r23=r2*r3; + float pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)*(r3-1)/confparams_cpr->sampleDistance; + + //float max = oriData[0]; + //float min = oriData[0]; + + for(i=1;isampleDistance==0) + { + index = i*r23+j*r3+k; + pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] + - oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1]; + pred_err = fabs(pred_value - oriData[index]); + radiusIndex = (pred_err/realPrecision+1)/2; + if(radiusIndex>=confparams_cpr->maxRangeRadius) + { + radiusIndex = confparams_cpr->maxRangeRadius - 1; + //printf("radiusIndex=%d\n", radiusIndex); + } + intervals[radiusIndex]++; + + // if (max < oriData[index]) max = oriData[index]; + // if (min > oriData[index]) min = oriData[index]; + } + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + // struct timeval costStart, costEnd; + // double cost_est = 0; + // + // gettimeofday(&costStart, NULL); + // + // //compute estimate of bit-rate and distortion + // double est_br = 0; + // double est_psnr = 0; + // double c1 = log2(targetCount)+1; + // double c2 = -20.0*log10(realPrecision) + 20.0*log10(max-min) + 10.0*log10(3); + // + // for (i = 0; i < powerOf2/2; i++) + // { + // int count = intervals[i]; + // if (count != 0) + // est_br += count*log2(count); + // est_psnr += count; + // } + // + // //compute estimate of bit-rate + // est_br -= c1*est_psnr; + // est_br /= totalSampleSize; + // est_br = -est_br; + // + // //compute estimate of psnr + // est_psnr /= totalSampleSize; + // printf ("sum of P(i) = %lf\n", est_psnr); + // est_psnr = -10.0*log10(est_psnr); + // est_psnr += c2; + // + // printf ("estimate bitrate = %.2f\n", est_br); + // printf ("estimate psnr = %.2f\n",est_psnr); + // + // gettimeofday(&costEnd, NULL); + // cost_est = ((costEnd.tv_sec*1000000+costEnd.tv_usec)-(costStart.tv_sec*1000000+costStart.tv_usec))/1000000.0; + // + // printf ("analysis time = %f\n", cost_est); + + free(intervals); + //printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2); + return powerOf2; +} + + +unsigned int optimize_intervals_float_4D(float *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision) +{ + size_t i,j,k,l, index; + size_t radiusIndex; + size_t r234=r2*r3*r4; + size_t r34=r3*r4; + float pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)*(r3-1)*(r4-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r234+j*r34+k*r4+l; + pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r34] + - oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1]; + pred_err = fabs(pred_value - oriData[index]); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + return powerOf2; +} + +TightDataPointStorageF* SZ_compress_float_1D_MDQ(float *oriData, +size_t dataLength, float realPrecision, float valueRangeSize, float medianValue_f) +{ +#ifdef HAVE_TIMECMPR + float* decData = NULL; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData = (float*)(multisteps->hist_data); +#endif + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + quantization_intervals = optimize_intervals_float_1D_opt(oriData, dataLength, realPrecision); + else + quantization_intervals = exe_params->intvCapacity; + //updateQuantizationInfo(quantization_intervals); + int intvRadius = quantization_intervals/2; + + size_t i; + int reqLength; + float medianValue = medianValue_f; + short radExpo = getExponent_float(valueRangeSize/2); + + computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + float* spaceFillingValue = oriData; // + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + unsigned char preDataBytes[4]; + intToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + float last3CmprsData[3] = {0}; + + FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + //add the first data + type[0] = 0; + compressSingleFloatValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + listAdd_float(last3CmprsData, vce->data); +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[0] = vce->data; +#endif + + //add the second data + type[1] = 0; + compressSingleFloatValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + listAdd_float(last3CmprsData, vce->data); +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[1] = vce->data; +#endif + int state; + float checkRadius; + float curData; + float pred = last3CmprsData[0]; + float predAbsErr; + checkRadius = (quantization_intervals-1)*realPrecision; + float interval = 2*realPrecision; + + float recip_precision = 1/realPrecision; + + for(i=2;i>1; + if(curData>=pred) + { + type[i] = intvRadius+state; + pred = pred + state*interval; + } + else //curDatarealPrecision) + { + type[i] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + + //listAdd_float(last3CmprsData, vce->data); + pred = vce->data; +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[i] = vce->data; +#endif + } + else + { + //listAdd_float(last3CmprsData, pred); +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[i] = pred; +#endif + } + continue; + } + + //unpredictable data processing + type[i] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + + //listAdd_float(last3CmprsData, vce->data); + pred = vce->data; +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[i] = vce->data; +#endif + + }//end of for + +// char* expSegmentsInBytes; +// int expSegmentsInBytes_size = convertESCToBytes(esc, &expSegmentsInBytes); + size_t exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageF* tdps; + + new_TightDataPointStorageF(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0); + +//sdi:Debug +/* int sum =0; + for(i=0;iarray has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +void SZ_compress_args_float_StoreOriData(float* oriData, size_t dataLength, unsigned char** newByteData, size_t *outSize) +{ + int floatSize=sizeof(float); + size_t k = 0, i; + size_t totalByteLength = 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + floatSize*dataLength; + /*No need to malloc because newByteData should always already be allocated with no less totalByteLength.*/ + //*newByteData = (unsigned char*)malloc(totalByteLength); + + unsigned char dsLengthBytes[8]; + for (i = 0; i < 3; i++)//3 + (*newByteData)[k++] = versionNumber[i]; + + if(exe_params->SZ_SIZE_TYPE==4)//1 + (*newByteData)[k++] = 16; //00010000 + else + (*newByteData)[k++] = 80; //01010000: 01000000 indicates the SZ_SIZE_TYPE=8 + + convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k])); + k = k + MetaDataByteLength; + + sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8 + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + (*newByteData)[k++] = dsLengthBytes[i]; + + if(sysEndianType==BIG_ENDIAN_SYSTEM) + memcpy((*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, oriData, dataLength*floatSize); + else + { + unsigned char* p = (*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE; + for(i=0;iszMode == SZ_TEMPORAL_COMPRESSION) + { + int timestep = sz_tsc->currentStep; + if(cmprType == SZ_PERIO_TEMPORAL_COMPRESSION) + { + if(timestep % confparams_cpr->snapshotCmprStep != 0) + { + tdps = SZ_compress_float_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_f); + compressionType = 1; //time-series based compression + } + else + { + tdps = SZ_compress_float_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_f); + compressionType = 0; //snapshot-based compression + multisteps->lastSnapshotStep = timestep; + } + } + else if(cmprType == SZ_FORCE_SNAPSHOT_COMPRESSION) + { + tdps = SZ_compress_float_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_f); + compressionType = 0; //snapshot-based compression + multisteps->lastSnapshotStep = timestep; + } + else if(cmprType == SZ_FORCE_TEMPORAL_COMPRESSION) + { + tdps = SZ_compress_float_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_f); + compressionType = 1; //time-series based compression + } + } + else +#endif + tdps = SZ_compress_float_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_f); + + convertTDPStoFlatBytes_float(tdps, newByteData, outSize); + + if(*outSize>3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + sizeof(float)*dataLength) + SZ_compress_args_float_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageF(tdps); + return compressionType; +} + +TightDataPointStorageF* SZ_compress_float_2D_MDQ(float *oriData, size_t r1, size_t r2, float realPrecision, float valueRangeSize, float medianValue_f) +{ +#ifdef HAVE_TIMECMPR + float* decData = NULL; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData = (float*)(multisteps->hist_data); +#endif + + float recip_realPrecision = 1/realPrecision; + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_float_2D_opt(oriData, r1, r2, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + int intvRadius = quantization_intervals/2; + + size_t i,j; + int reqLength; + float pred1D, pred2D; + float diff = 0.0; + float itvNum = 0; + float *P0, *P1; + + size_t dataLength = r1*r2; + + P0 = (float*)malloc(r2*sizeof(float)); + memset(P0, 0, r2*sizeof(float)); + P1 = (float*)malloc(r2*sizeof(float)); + memset(P1, 0, r2*sizeof(float)); + + float medianValue = medianValue_f; + short radExpo = getExponent_float(valueRangeSize/2); + computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue); + + int* type = (int*) malloc(dataLength*sizeof(int)); + //type[dataLength]=0; + + float* spaceFillingValue = oriData; // + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + type[0] = 0; + unsigned char preDataBytes[4]; + intToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + + FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + /* Process Row-0 data 0*/ + type[0] = 0; + compressSingleFloatValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[0] = vce->data; +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[0] = vce->data; +#endif + + float curData; + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + curData = spaceFillingValue[1]; + diff = curData - pred1D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + intvRadius; + P1[1] = pred1D + 2 * (type[1] - intvRadius) * realPrecision; + + //ganrantee comporession error against the case of machine-epsilon + if(fabs(spaceFillingValue[1]-P1[1])>realPrecision) + { + type[1] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + + P1[1] = vce->data; + } + } + else + { + type[1] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[1] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[1] = P1[1]; +#endif + + /* Process Row-0 data 2 --> data r2-1 */ + for (j = 2; j < r2; j++) + { + pred1D = 2*P1[j-1] - P1[j-2]; + curData = spaceFillingValue[j]; + diff = curData - pred1D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + intvRadius; + P1[j] = pred1D + 2 * (type[j] - intvRadius) * realPrecision; + + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P1[j])>realPrecision) + { + type[j] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + + P1[j] = vce->data; + } + } + else + { + type[j] = 0; + compressSingleFloatValue(vce,curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[j] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[j] = P1[j]; +#endif + } + + /* Process Row-1 --> Row-r1-1 */ + size_t index; + for (i = 1; i < r1; i++) + { + /* Process row-i data 0 */ + index = i*r2; + pred1D = P1[0]; + curData = spaceFillingValue[index]; + diff = curData - pred1D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P0[0] = pred1D + 2 * (type[index] - intvRadius) * realPrecision; + + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P0[0])>realPrecision) + { + type[index] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + + P0[0] = vce->data; + } + } + else + { + type[index] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[0] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P0[0]; +#endif + + /* Process row-i data 1 --> r2-1*/ + for (j = 1; j < r2; j++) + { + index = i*r2+j; + pred2D = P0[j-1] + P1[j] - P1[j-1]; + + curData = spaceFillingValue[index]; + diff = curData - pred2D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P0[j] = pred2D + 2 * (type[index] - intvRadius) * realPrecision; + + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P0[j])>realPrecision) + { + type[index] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + + P0[j] = vce->data; + } + } + else + { + type[index] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[j] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P0[j]; +#endif + } + + float *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + + if(r2!=1) + free(P0); + free(P1); + size_t exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageF* tdps; + + new_TightDataPointStorageF(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0); + +// printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n", +// exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size); + +// for(i = 3800;i<3844;i++) +// printf("exactLeadNumArray->array[%d]=%d\n",i,exactLeadNumArray->array[i]); + + //free memory + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + free(vce); + free(lce); + free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +/** + * + * Note: @r1 is high dimension + * @r2 is low dimension + * */ +char SZ_compress_args_float_NoCkRngeNoGzip_2D(int cmprType, unsigned char** newByteData, float *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize, float valueRangeSize, float medianValue_f) +{ + size_t dataLength = r1*r2; + char compressionType = 0; + TightDataPointStorageF* tdps = NULL; + +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + { + int timestep = sz_tsc->currentStep; + if(cmprType == SZ_PERIO_TEMPORAL_COMPRESSION) + { + if(timestep % confparams_cpr->snapshotCmprStep != 0) + { + tdps = SZ_compress_float_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_f); + compressionType = 1; //time-series based compression + } + else + { + tdps = SZ_compress_float_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_f); + compressionType = 0; //snapshot-based compression + multisteps->lastSnapshotStep = timestep; + } + } + else if(cmprType == SZ_FORCE_SNAPSHOT_COMPRESSION) + { + tdps = SZ_compress_float_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_f); + compressionType = 0; //snapshot-based compression + multisteps->lastSnapshotStep = timestep; + } + else if(cmprType == SZ_FORCE_TEMPORAL_COMPRESSION) + { + tdps = SZ_compress_float_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_f); + compressionType = 1; //time-series based compression + } + } + else +#endif + tdps = SZ_compress_float_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_f); + + convertTDPStoFlatBytes_float(tdps, newByteData, outSize); + + if(*outSize>3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + sizeof(float)*dataLength) + SZ_compress_args_float_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageF(tdps); + + return compressionType; +} + +TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size_t r2, size_t r3, float realPrecision, float valueRangeSize, float medianValue_f) +{ +#ifdef HAVE_TIMECMPR + float* decData = NULL; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData = (float*)(multisteps->hist_data); +#endif + + float recip_realPrecision = 1/realPrecision; + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_float_3D_opt(oriData, r1, r2, r3, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + int intvRadius = quantization_intervals/2; + + size_t i,j,k; + int reqLength; + float pred1D, pred2D, pred3D; + float diff = 0.0; + float itvNum = 0; + float *P0, *P1; + + size_t dataLength = r1*r2*r3; + size_t r23 = r2*r3; + P0 = (float*)malloc(r23*sizeof(float)); + P1 = (float*)malloc(r23*sizeof(float)); + + float medianValue = medianValue_f; + short radExpo = getExponent_float(valueRangeSize/2); + computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + float* spaceFillingValue = oriData; // + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + type[0] = 0; + + unsigned char preDataBytes[8]; + longToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + + FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + type[0] = 0; + compressSingleFloatValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[0] = vce->data; +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[0] = P1[0]; +#endif + + float curData; + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + curData = spaceFillingValue[1]; + diff = curData - pred1D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + intvRadius; + P1[1] = pred1D + 2 * (type[1] - intvRadius) * realPrecision; + + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P1[1])>realPrecision) + { + type[1] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + + P1[1] = vce->data; + } + } + else + { + type[1] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[1] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[1] = P1[1]; +#endif + + /* Process Row-0 data 2 --> data r3-1 */ + for (j = 2; j < r3; j++) + { + pred1D = 2*P1[j-1] - P1[j-2]; + curData = spaceFillingValue[j]; + diff = curData - pred1D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + intvRadius; + P1[j] = pred1D + 2 * (type[j] - intvRadius) * realPrecision; + + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P1[j])>realPrecision) + { + type[j] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + + P1[j] = vce->data; + } + } + else + { + type[j] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[j] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[j] = P1[j]; +#endif + } + + /* Process Row-1 --> Row-r2-1 */ + size_t index; + for (i = 1; i < r2; i++) + { + /* Process row-i data 0 */ + index = i*r3; + pred1D = P1[index-r3]; + curData = spaceFillingValue[index]; + diff = curData - pred1D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P1[index] = pred1D + 2 * (type[index] - intvRadius) * realPrecision; + + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P1[index])>realPrecision) + { + type[index] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + + P1[index] = vce->data; + } + } + else + { + type[index] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P1[index]; +#endif + + /* Process row-i data 1 --> data r3-1*/ + for (j = 1; j < r3; j++) + { + index = i*r3+j; + pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1]; + + curData = spaceFillingValue[index]; + diff = curData - pred2D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P1[index] = pred2D + 2 * (type[index] - intvRadius) * realPrecision; + + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P1[index])>realPrecision) + { + type[index] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + + P1[index] = vce->data; + } + } + else + { + type[index] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P1[index]; +#endif + } + } + + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (k = 1; k < r1; k++) + { + /* Process Row-0 data 0*/ + index = k*r23; + pred1D = P1[0]; + curData = spaceFillingValue[index]; + diff = curData - pred1D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P0[0] = pred1D + 2 * (type[index] - intvRadius) * realPrecision; + + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P0[0])>realPrecision) + { + type[index] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + + P0[0] = vce->data; + } + } + else + { + type[index] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[0] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P0[0]; +#endif + + /* Process Row-0 data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + //index = k*r2*r3+j; + index ++; + pred2D = P0[j-1] + P1[j] - P1[j-1]; + curData = spaceFillingValue[index]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P0[j] = pred2D + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P0[j])>realPrecision) + { + type[index] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + + P0[j] = vce->data; + } + } + else + { + type[index] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[j] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P0[j]; +#endif + } + + /* Process Row-1 --> Row-r2-1 */ + size_t index2D; + for (i = 1; i < r2; i++) + { + /* Process Row-i data 0 */ + index = k*r23 + i*r3; + index2D = i*r3; + pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3]; + curData = spaceFillingValue[index]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred2D + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P0[index2D])>realPrecision) + { + type[index] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + + P0[index2D] = vce->data; + } + } + else + { + type[index] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P0[index2D]; +#endif + + /* Process Row-i data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { +// if(k==63&&i==43&&j==27) +// printf("i=%d\n", i); + //index = k*r2*r3 + i*r3 + j; + index ++; + index2D = i*r3 + j; + pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1]; + curData = spaceFillingValue[index]; + diff = curData - pred3D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred3D + 2 * (type[index] - intvRadius) * realPrecision; + + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P0[index2D])>realPrecision) + { + type[index] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + + P0[index2D] = vce->data; + } + } + else + { + type[index] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P0[index2D]; +#endif + } + } + + float *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + if(r23!=1) + free(P0); + free(P1); + size_t exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageF* tdps; + + new_TightDataPointStorageF(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0); + +//sdi:Debug +/* int sum =0; + for(i=0;isize=%d\n", +// exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size); + + //free memory + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + free(vce); + free(lce); + free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +/** + * + * @cmprType compressionType (SZ_FORCE_SNAPSHOT_COMPRESSION, SZ_FORCE_TEMPORAL_COMPRESSION or SZ_PEORI_TEMPORAL_COMPRESSION) + * + * */ +char SZ_compress_args_float_NoCkRngeNoGzip_3D(int cmprType, unsigned char** newByteData, float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, float valueRangeSize, float medianValue_f) +{ + size_t dataLength = r1*r2*r3; + char compressionType = 0; + TightDataPointStorageF* tdps = NULL; + +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + { + int timestep = sz_tsc->currentStep; + if(cmprType == SZ_PERIO_TEMPORAL_COMPRESSION) + { + if(timestep % confparams_cpr->snapshotCmprStep != 0) + { + tdps = SZ_compress_float_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_f); + compressionType = 1; //time-series based compression + } + else + { + if(confparams_cpr->withRegression == SZ_NO_REGRESSION) + tdps = SZ_compress_float_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f); + else + *newByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r1, r2, r3, realPrecision, outSize); + compressionType = 0; //snapshot-based compression + multisteps->lastSnapshotStep = timestep; + } + } + else if(cmprType == SZ_FORCE_SNAPSHOT_COMPRESSION) + { + if(confparams_cpr->withRegression == SZ_NO_REGRESSION) + tdps = SZ_compress_float_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f); + else + *newByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r1, r2, r3, realPrecision, outSize); + compressionType = 0; //snapshot-based compression + multisteps->lastSnapshotStep = timestep; + } + else if(cmprType == SZ_FORCE_TEMPORAL_COMPRESSION) + { + tdps = SZ_compress_float_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_f); + compressionType = 1; //time-series based compression + } + } + else +#endif + tdps = SZ_compress_float_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f); + + if(tdps!=NULL) + { + convertTDPStoFlatBytes_float(tdps, newByteData, outSize); + if(*outSize>3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + sizeof(float)*dataLength) + SZ_compress_args_float_StoreOriData(oriData, dataLength, newByteData, outSize); + free_TightDataPointStorageF(tdps); + } + + return compressionType; +} + +TightDataPointStorageF* SZ_compress_float_4D_MDQ(float *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, float valueRangeSize, float medianValue_f) +{ + float recip_realPrecision = 1/realPrecision; + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_float_4D(oriData, r1, r2, r3, r4, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + int intvRadius = quantization_intervals/2; + + size_t i,j,k; + int reqLength; + float pred1D, pred2D, pred3D; + float diff = 0.0; + double itvNum = 0; + float *P0, *P1; + + size_t dataLength = r1*r2*r3*r4; + + size_t r234 = r2*r3*r4; + size_t r34 = r3*r4; + + P0 = (float*)malloc(r34*sizeof(float)); + P1 = (float*)malloc(r34*sizeof(float)); + + float medianValue = medianValue_f; + short radExpo = getExponent_float(valueRangeSize/2); + computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + float* spaceFillingValue = oriData; // + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + unsigned char preDataBytes[4]; + intToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + + FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + + size_t l; + for (l = 0; l < r1; l++) + { + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + size_t index = l*r234; + size_t index2D = 0; + + type[index] = 0; + compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + + /* Process Row-0 data 1*/ + index = l*r234+1; + index2D = 1; + + pred1D = P1[index2D-1]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P1[index2D] = pred1D + 2 * (type[index] - intvRadius) * realPrecision; + } + else + { + type[index] = 0; + compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + } + + /* Process Row-0 data 2 --> data r4-1 */ + for (j = 2; j < r4; j++) + { + index = l*r234+j; + index2D = j; + + pred1D = 2*P1[index2D-1] - P1[index2D-2]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P1[index2D] = pred1D + 2 * (type[index] - intvRadius) * realPrecision; + } + else + { + type[index] = 0; + compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (i = 1; i < r3; i++) + { + /* Process row-i data 0 */ + index = l*r234+i*r4; + index2D = i*r4; + + pred1D = P1[index2D-r4]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P1[index2D] = pred1D + 2 * (type[index] - intvRadius) * realPrecision; + } + else + { + type[index] = 0; + compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + } + + /* Process row-i data 1 --> data r4-1*/ + for (j = 1; j < r4; j++) + { + index = l*r234+i*r4+j; + index2D = i*r4+j; + + pred2D = P1[index2D-1] + P1[index2D-r4] - P1[index2D-r4-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P1[index2D] = pred2D + 2 * (type[index] - intvRadius) * realPrecision; + } + else + { + type[index] = 0; + compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + } + } + } + + + /////////////////////////// Process layer-1 --> layer-r2-1 /////////////////////////// + + for (k = 1; k < r2; k++) + { + /* Process Row-0 data 0*/ + index = l*r234+k*r34; + index2D = 0; + + pred1D = P1[index2D]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred1D + 2 * (type[index] - intvRadius) * realPrecision; + } + else + { + type[index] = 0; + compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + + /* Process Row-0 data 1 --> data r4-1 */ + for (j = 1; j < r4; j++) + { + index = l*r234+k*r34+j; + index2D = j; + + pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred2D + 2 * (type[index] - intvRadius) * realPrecision; + } + else + { + type[index] = 0; + compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (i = 1; i < r3; i++) + { + /* Process Row-i data 0 */ + index = l*r234+k*r34+i*r4; + index2D = i*r4; + + pred2D = P0[index2D-r4] + P1[index2D] - P1[index2D-r4]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred2D + 2 * (type[index] - intvRadius) * realPrecision; + } + else + { + type[index] = 0; + compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + + /* Process Row-i data 1 --> data r4-1 */ + for (j = 1; j < r4; j++) + { + index = l*r234+k*r34+i*r4+j; + index2D = i*r4+j; + + pred3D = P0[index2D-1] + P0[index2D-r4]+ P1[index2D] - P0[index2D-r4-1] - P1[index2D-r4] - P1[index2D-1] + P1[index2D-r4-1]; + diff = spaceFillingValue[index] - pred3D; + + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred3D + 2 * (type[index] - intvRadius) * realPrecision; + } + else + { + type[index] = 0; + compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + } + } + + float *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + } + + free(P0); + free(P1); + size_t exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageF* tdps; + + new_TightDataPointStorageF(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0); + + //free memory + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + free(vce); + free(lce); + free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +char SZ_compress_args_float_NoCkRngeNoGzip_4D(unsigned char** newByteData, float *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, size_t *outSize, float valueRangeSize, float medianValue_f) +{ + TightDataPointStorageF* tdps = SZ_compress_float_4D_MDQ(oriData, r1, r2, r3, r4, realPrecision, valueRangeSize, medianValue_f); + + convertTDPStoFlatBytes_float(tdps, newByteData, outSize); + + int dataLength = r1*r2*r3*r4; + if(*outSize>3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + sizeof(float)*dataLength) + SZ_compress_args_float_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageF(tdps); + + return 0; +} + +/*MSST19*/ +TightDataPointStorageF* SZ_compress_float_1D_MDQ_MSST19(float *oriData, +size_t dataLength, double realPrecision, float valueRangeSize, float medianValue_f) +{ +#ifdef HAVE_TIMECMPR + float* decData = NULL; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData = (float*)(multisteps->hist_data); +#endif + + //struct ClockPoint clockPointBuild; + //TimeDurationStart("build", &clockPointBuild); + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + quantization_intervals = optimize_intervals_float_1D_opt_MSST19(oriData, dataLength, realPrecision); + else + quantization_intervals = exe_params->intvCapacity; + //updateQuantizationInfo(quantization_intervals); + int intvRadius = quantization_intervals/2; + + double* precisionTable = (double*)malloc(sizeof(double) * quantization_intervals); + double inv = 2.0-pow(2, -(confparams_cpr->plus_bits)); + for(int i=0; i30000 && i<40000) +// printf("%d %.30G\n", i, test); + } + //float smallest_precision = precisionTable[0], largest_precision = precisionTable[quantization_intervals-1]; + struct TopLevelTableWideInterval levelTable; + MultiLevelCacheTableWideIntervalBuild(&levelTable, precisionTable, quantization_intervals, realPrecision, confparams_cpr->plus_bits); + + size_t i; + int reqLength; + float medianValue = medianValue_f; + //float medianInverse = 1 / medianValue_f; + //short radExpo = getExponent_float(valueRangeSize/2); + + reqLength = computeReqLength_float_MSST19(realPrecision); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + float* spaceFillingValue = oriData; // + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, dataLength/2/8); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, dataLength/2); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + unsigned char preDataBytes[4]; + intToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + float last3CmprsData[3] = {0}; + + //size_t miss=0, hit=0; + + FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + //add the first data + type[0] = 0; + compressSingleFloatValue_MSST19(vce, spaceFillingValue[0], realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + listAdd_float(last3CmprsData, vce->data); + //miss++; +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[0] = vce->data; +#endif + + //add the second data + type[1] = 0; + compressSingleFloatValue_MSST19(vce, spaceFillingValue[1], realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + listAdd_float(last3CmprsData, vce->data); + //miss++; +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[1] = vce->data; +#endif + int state; + //double checkRadius; + float curData; + float pred = vce->data; + + double predRelErrRatio; + + const uint64_t top = levelTable.topIndex, base = levelTable.baseIndex; + const uint64_t range = top - base; + const int bits = levelTable.bits; + uint64_t* const buffer = (uint64_t*)&predRelErrRatio; + const int shift = 52-bits; + uint64_t expoIndex, mantiIndex; + uint16_t* tables[range+1]; + for(int i=0; i<=range; i++){ + tables[i] = levelTable.subTables[i].table; + } + + for(i=2;i> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if(state) + { + type[i] = state; + pred *= precisionTable[state]; + //hit++; + continue; + } + + //unpredictable data processing + type[i] = 0; + compressSingleFloatValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + pred = vce->data; + //miss++; +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[i] = vce->data; +#endif + + }//end of for + +// printf("miss:%d, hit:%d\n", miss, hit); + + size_t exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageF* tdps; + + new_TightDataPointStorageF(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0); + tdps->plus_bits = confparams_cpr->plus_bits; + + //free memory + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + free(vce); + free(lce); + free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps); + free(precisionTable); + freeTopLevelTableWideInterval(&levelTable); + return tdps; +} + +TightDataPointStorageF* SZ_compress_float_2D_MDQ_MSST19(float *oriData, size_t r1, size_t r2, double realPrecision, float valueRangeSize, float medianValue_f) +{ +#ifdef HAVE_TIMECMPR + float* decData = NULL; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData = (float*)(multisteps->hist_data); +#endif + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_float_2D_opt_MSST19(oriData, r1, r2, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + int intvRadius = quantization_intervals/2; + + double* precisionTable = (double*)malloc(sizeof(double) * quantization_intervals); + double inv = 2.0-pow(2, -(confparams_cpr->plus_bits)); + for(int i=0; iplus_bits); + + size_t i,j; + int reqLength; + float pred1D, pred2D; + //float diff = 0.0; + //double itvNum = 0; + float *P0, *P1; + double predRelErrRatio; + + size_t dataLength = r1*r2; + + P0 = (float*)malloc(r2*sizeof(float)); + memset(P0, 0, r2*sizeof(float)); + P1 = (float*)malloc(r2*sizeof(float)); + memset(P1, 0, r2*sizeof(float)); + + float medianValue = medianValue_f; + //float medianValueInverse = 1 / medianValue_f; + //short radExpo = getExponent_float(valueRangeSize/2); + reqLength = computeReqLength_double_MSST19(realPrecision); + + int* type = (int*) malloc(dataLength*sizeof(int)); + //type[dataLength]=0; + + float* spaceFillingValue = oriData; // + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + type[0] = 0; + unsigned char preDataBytes[4]; + intToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + + FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + const uint64_t top = levelTable.topIndex, base = levelTable.baseIndex; + const uint64_t range = top - base; + const int bits = levelTable.bits; + uint64_t* const buffer = (uint64_t*)&predRelErrRatio; + const int shift = 52-bits; + uint64_t expoIndex, mantiIndex; + uint16_t* tables[range+1]; + for(int i=0; i<=range; i++){ + tables[i] = levelTable.subTables[i].table; + } + + /* Process Row-0 data 0*/ + type[0] = 0; + compressSingleFloatValue_MSST19(vce, spaceFillingValue[0], realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[0] = vce->data; +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[0] = vce->data; +#endif + + float curData; + int state; + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + + curData = spaceFillingValue[1]; + predRelErrRatio = curData / pred1D; + + expoIndex = ((*buffer & 0x7fffffffffffffff) >> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if (state) + { + type[1] = state; + P1[1] = fabs(pred1D) * precisionTable[state]; + } + else + { + type[1] = 0; + compressSingleFloatValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[1] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[1] = P1[1]; +#endif + + /* Process Row-0 data 2 --> data r2-1 */ + for (j = 2; j < r2; j++) + { + pred1D = P1[j-1] * P1[j-1] / P1[j-2]; + curData = spaceFillingValue[j]; + predRelErrRatio = curData / pred1D; + + expoIndex = ((*buffer & 0x7fffffffffffffff) >> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if (state) + { + type[j] = state; + P1[j] = fabs(pred1D) * precisionTable[state]; + } + else + { + type[j] = 0; + compressSingleFloatValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[j] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[j] = P1[j]; +#endif + } + + /* Process Row-1 --> Row-r1-1 */ + size_t index; + for (i = 1; i < r1; i++) + { + /* Process row-i data 0 */ + index = i*r2; + pred1D = P1[0]; + curData = spaceFillingValue[index]; + predRelErrRatio = curData / pred1D; + + expoIndex = ((*buffer & 0x7fffffffffffffff) >> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if (state) + { + type[index] = state; + P0[0] = fabs(pred1D) * precisionTable[state]; + } + else + { + type[index] = 0; + compressSingleFloatValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[0] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P0[0]; +#endif + + /* Process row-i data 1 --> r2-1*/ + for (j = 1; j < r2; j++) + { + index = i*r2+j; + pred2D = P0[j-1] * P1[j] / P1[j-1]; + + curData = spaceFillingValue[index]; + predRelErrRatio = curData / pred2D; + + expoIndex = ((*buffer & 0x7fffffffffffffff) >> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if (state) + { + type[index] = state; + P0[j] = fabs(pred2D) * precisionTable[state]; + } + else + { + type[index] = 0; + compressSingleFloatValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[j] = vce->data; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P0[j]; +#endif + } + + float *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + + if(r2!=1) + free(P0); + free(P1); + size_t exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageF* tdps; + + new_TightDataPointStorageF(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0); + tdps->plus_bits = confparams_cpr->plus_bits; + + //free memory + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + free(vce); + free(lce); + free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps); + free(precisionTable); + freeTopLevelTableWideInterval(&levelTable); + return tdps; +} + +TightDataPointStorageF* SZ_compress_float_3D_MDQ_MSST19(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, float valueRangeSize, float medianValue_f) +{ +#ifdef HAVE_TIMECMPR + float* decData = NULL; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData = (float*)(multisteps->hist_data); +#endif + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_float_3D_opt_MSST19(oriData, r1, r2, r3, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + int intvRadius = quantization_intervals/2; + + double* precisionTable = (double*)malloc(sizeof(double) * quantization_intervals); + double inv = 2.0-pow(2, -(confparams_cpr->plus_bits)); + for(int i=0; iplus_bits); + + size_t i,j,k; + int reqLength; + float pred1D, pred2D, pred3D; + //float diff = 0.0; + //double itvNum = 0; + float *P0, *P1; + double predRelErrRatio; + + size_t dataLength = r1*r2*r3; + size_t r23 = r2*r3; + P0 = (float*)malloc(r23*sizeof(float)); + P1 = (float*)malloc(r23*sizeof(float)); + + float medianValue = medianValue_f; + //float medianValueInverse = 1/ medianValue_f; + //short radExpo = getExponent_float(valueRangeSize/2); + reqLength = computeReqLength_float_MSST19(realPrecision); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + float* spaceFillingValue = oriData; // + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + unsigned char preDataBytes[4]; + intToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + + FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + const uint64_t top = levelTable.topIndex, base = levelTable.baseIndex; + const uint64_t range = top - base; + const int bits = levelTable.bits; + uint64_t* const buffer = (uint64_t*)&predRelErrRatio; + const int shift = 52-bits; + uint64_t expoIndex, mantiIndex; + uint16_t* tables[range+1]; + for(int i=0; i<=range; i++){ + tables[i] = levelTable.subTables[i].table; + } + int state; + + double temp, temp2; + + + //size_t miss=0, hit=0; + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + type[0] = 0; + compressSingleFloatValue_MSST19(vce, spaceFillingValue[0], realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[0] = vce->data; + //miss++; +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[0] = P1[0]; +#endif + + float curData; + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + curData = spaceFillingValue[1]; + predRelErrRatio = curData / pred1D; + + expoIndex = ((*buffer & 0x7fffffffffffffff) >> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if (state) + { + type[1] = state; + P1[1] = fabsf(pred1D) * precisionTable[state]; + //hit++; + } + else + { + type[1] = 0; + compressSingleFloatValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[1] = vce->data; + //miss++; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[1] = P1[1]; +#endif + + /* Process Row-0 data 2 --> data r3-1 */ + for (j = 2; j < r3; j++) + { + temp = P1[j-1]; + pred1D = temp * temp / P1[j-2]; + curData = spaceFillingValue[j]; + predRelErrRatio = curData / pred1D; + + expoIndex = ((*buffer & 0x7fffffffffffffff) >> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if (state) + { + type[j] = state; + P1[j] = fabsf(pred1D) * precisionTable[state]; + //hit++; + } + else + { + type[j] = 0; + compressSingleFloatValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[j] = vce->data; + //miss++; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[j] = P1[j]; +#endif + } + + /* Process Row-1 --> Row-r2-1 */ + size_t index; + for (i = 1; i < r2; i++) + { + /* Process row-i data 0 */ + index = i*r3; + pred1D = P1[index-r3]; + curData = spaceFillingValue[index]; + predRelErrRatio = curData / pred1D; + + expoIndex = ((*buffer & 0x7fffffffffffffff) >> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if (state) + { + type[index] = state; + P1[index] = pred1D * precisionTable[state]; + //hit++; + } + else + { + type[index] = 0; + compressSingleFloatValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index] = vce->data; + //miss++; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P1[index]; +#endif + + /* Process row-i data 1 --> data r3-1*/ + for (j = 1; j < r3; j++) + { + index = i*r3+j; + temp = P1[index-1]; + pred2D = temp * P1[index-r3] / P1[index-r3-1]; + //float a = P1[index-1]; + //float b = P1[index-r3]; + //float c = P1[index-r3-1]; + + curData = spaceFillingValue[index]; + predRelErrRatio = curData / pred2D; + + expoIndex = ((*buffer & 0x7fffffffffffffff) >> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if (state) + { + type[index] = state; + //float temp1 = precisionTable[state]; + //float temp = fabsf(pred2D) * precisionTable[state]; + P1[index] = fabsf(pred2D) * precisionTable[state]; + //hit++; + } + else + { + type[index] = 0; + compressSingleFloatValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index] = vce->data; + //miss++; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P1[index]; +#endif + } + } + + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (k = 1; k < r1; k++) + { + /* Process Row-0 data 0*/ + index = k*r23; + pred1D = P1[0]; + curData = spaceFillingValue[index]; + predRelErrRatio = curData / pred1D; + + expoIndex = ((*buffer & 0x7fffffffffffffff) >> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if (state) + { + type[index] = state; + P0[0] = fabsf(pred1D) * precisionTable[state]; + //hit++; + } + else + { + type[index] = 0; + compressSingleFloatValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[0] = vce->data; + //miss++; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P0[0]; +#endif + + /* Process Row-0 data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + //index = k*r2*r3+j; + index ++; + temp = P0[j-1]; + pred2D = temp * P1[j] / P1[j-1]; + curData = spaceFillingValue[index]; + predRelErrRatio = curData / pred2D; + + expoIndex = ((*buffer & 0x7fffffffffffffff) >> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if (state) + { + type[index] = state; + P0[j] = fabsf(pred2D) * precisionTable[state]; + //hit++; + } + else + { + type[index] = 0; + compressSingleFloatValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[j] = vce->data; + //miss++; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P0[j]; +#endif + } + + /* Process Row-1 --> Row-r2-1 */ + size_t index2D; + for (i = 1; i < r2; i++) + { + /* Process Row-i data 0 */ + index = k*r23 + i*r3; + index2D = i*r3; + temp = P0[index2D-r3]; + pred2D = temp * P1[index2D] / P1[index2D-r3]; + curData = spaceFillingValue[index]; + predRelErrRatio = curData / pred2D; + + expoIndex = ((*buffer & 0x7fffffffffffffff) >> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if (state) + { + type[index] = state; + P0[index2D] = fabsf(pred2D) * precisionTable[state]; + //hit++; + } + else + { + type[index] = 0; + compressSingleFloatValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + //miss++; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P0[index2D]; +#endif + + /* Process Row-i data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + index ++; + index2D = i*r3 + j; + //pred3D = P0[index2D-1] * P0[index2D-r3] * P1[index2D] / P0[index2D-r3-1] / P1[index2D-r3] / P1[index2D-1] * P1[index2D-r3-1]; + temp = P0[index2D-1]; + temp2 = P0[index2D-r3-1]; + pred3D = temp * P0[index2D-r3] * P1[index2D] * P1[index2D-r3-1] / (temp2 * P1[index2D-r3] * P1[index2D-1]); + + curData = spaceFillingValue[index]; + predRelErrRatio = curData / pred3D; + + expoIndex = ((*buffer & 0x7fffffffffffffff) >> 52) - base; + if(expoIndex <= range){ + mantiIndex = (*buffer & 0x000fffffffffffff) >> shift; + state = tables[expoIndex][mantiIndex]; + }else{ + state = 0; + } + + if (state) + { + type[index] = state; + P0[index2D] = fabsf(pred3D) * precisionTable[state]; + //hit++; + } + else + { + type[index] = 0; + compressSingleFloatValue_MSST19(vce, curData, realPrecision, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + //miss++; + } +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[index] = P0[index2D]; +#endif + } + } + + float *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + if(r23!=1) + free(P0); + free(P1); + size_t exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageF* tdps; + + new_TightDataPointStorageF(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0); + tdps->plus_bits = confparams_cpr->plus_bits; + + //free memory + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + free(vce); + free(lce); + free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps); + free(precisionTable); + freeTopLevelTableWideInterval(&levelTable); + return tdps; +} + + +void SZ_compress_args_float_withinRange(unsigned char** newByteData, float *oriData, size_t dataLength, size_t *outSize) +{ + TightDataPointStorageF* tdps = (TightDataPointStorageF*) malloc(sizeof(TightDataPointStorageF)); + tdps->rtypeArray = NULL; + tdps->typeArray = NULL; + tdps->leadNumArray = NULL; + tdps->residualMidBits = NULL; + + tdps->allSameData = 1; + tdps->dataSeriesLength = dataLength; + tdps->exactMidBytes = (unsigned char*)malloc(sizeof(unsigned char)*4); + tdps->pwrErrBoundBytes = NULL; + tdps->isLossless = 0; + float value = oriData[0]; + floatToBytes(tdps->exactMidBytes, value); + tdps->exactMidBytes_size = 4; + + size_t tmpOutSize; + //unsigned char *tmpByteData; + convertTDPStoFlatBytes_float(tdps, newByteData, &tmpOutSize); + + //*newByteData = (unsigned char*)malloc(sizeof(unsigned char)*12); //for floating-point data (1+3+4+4) + //memcpy(*newByteData, tmpByteData, 12); + *outSize = tmpOutSize; //8+SZ_SIZE_TYPE; //8==3+1+4(float_size) + free_TightDataPointStorageF(tdps); +} + +/* +int SZ_compress_args_float_wRngeNoGzip(unsigned char** newByteData, float *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio, double pwrErrRatio) +{ + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + float valueRangeSize = 0, medianValue = 0; + + float min = computeRangeSize_float(oriData, dataLength, &valueRangeSize, &medianValue); + float max = min+valueRangeSize; + double realPrecision = getRealPrecision_float(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status); + + if(valueRangeSize <= realPrecision) + { + SZ_compress_args_float_withinRange(newByteData, oriData, dataLength, outSize); + } + else + { +// SZ_compress_args_float_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize); + if(r5==0&&r4==0&&r3==0&&r2==0) + { + if(errBoundMode>=PW_REL) + { + SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r1, outSize, min, max); + //SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize); + } + else + SZ_compress_args_float_NoCkRngeNoGzip_1D(newByteData, oriData, r1, realPrecision, outSize, valueRangeSize, medianValue); + } + else if(r5==0&&r4==0&&r3==0) + { + if(errBoundMode>=PW_REL) + SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r2, r1, outSize, min, max); + else + SZ_compress_args_float_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize, valueRangeSize, medianValue); + } + else if(r5==0&&r4==0) + { + if(errBoundMode>=PW_REL) + SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r3, r2, r1, outSize, min, max); + else + SZ_compress_args_float_NoCkRngeNoGzip_3D(newByteData, oriData, r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue); + } + else if(r5==0) + { + if(errBoundMode>=PW_REL) + SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r4*r3, r2, r1, outSize, min, max); + else + SZ_compress_args_float_NoCkRngeNoGzip_3D(newByteData, oriData, r4*r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue); + } + } + return status; +} +*/ + +int SZ_compress_args_float(int cmprType, int withRegression, unsigned char** newByteData, float *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRatio) +{ + confparams_cpr->errorBoundMode = errBoundMode; //this is used to print the metadata if needed... + if(errBoundMode==PW_REL) + { + confparams_cpr->pw_relBoundRatio = pwRelBoundRatio; + } + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + + if(dataLength <= MIN_NUM_OF_ELEMENTS) + { + *newByteData = SZ_skip_compress_float(oriData, dataLength, outSize); + return status; + } + + float valueRangeSize = 0, medianValue = 0; + + unsigned char * signs = NULL; + bool positive = true; + float nearZero = 0.0; + float min = 0; + if(pwRelBoundRatio < 0.000009999) + confparams_cpr->accelerate_pw_rel_compression = 0; + if(confparams_cpr->errorBoundMode == PW_REL && confparams_cpr->accelerate_pw_rel_compression) + { + signs = (unsigned char *) malloc(dataLength); + memset(signs, 0, dataLength); + min = computeRangeSize_float_MSST19(oriData, dataLength, &valueRangeSize, &medianValue, signs, &positive, &nearZero); + } + else + min = computeRangeSize_float(oriData, dataLength, &valueRangeSize, &medianValue); + float max = min+valueRangeSize; + confparams_cpr->fmin = min; + confparams_cpr->fmax = max; + + double realPrecision = 0; + + if(confparams_cpr->errorBoundMode==PSNR) + { + confparams_cpr->errorBoundMode = ABS; + realPrecision = confparams_cpr->absErrBound = computeABSErrBoundFromPSNR(confparams_cpr->psnr, (double)confparams_cpr->predThreshold, (double)valueRangeSize); + //printf("realPrecision=%lf\n", realPrecision); + } + else if(confparams_cpr->errorBoundMode==NORM) //norm error = sqrt(sum((xi-xi_)^2)) + { + confparams_cpr->errorBoundMode = ABS; + realPrecision = confparams_cpr->absErrBound = computeABSErrBoundFromNORM_ERR(confparams_cpr->normErr, dataLength); + //printf("realPrecision=%lf\n", realPrecision); + } + else + { + realPrecision = getRealPrecision_float(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status); + confparams_cpr->absErrBound = realPrecision; + } + if(valueRangeSize <= realPrecision) + { + if(confparams_cpr->errorBoundMode>=PW_REL && confparams_cpr->accelerate_pw_rel_compression == 1) + free(signs); + SZ_compress_args_float_withinRange(newByteData, oriData, dataLength, outSize); + } + else + { + size_t tmpOutSize = 0; + unsigned char* tmpByteData; + + if (r2==0) + { + if(confparams_cpr->errorBoundMode>=PW_REL) + { + if(confparams_cpr->accelerate_pw_rel_compression && confparams_cpr->maxRangeRadius <= 32768) + SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log_MSST19(&tmpByteData, oriData, pwRelBoundRatio, r1, &tmpOutSize, valueRangeSize, medianValue, signs, &positive, min, max, nearZero); + else + SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r1, &tmpOutSize, min, max); + //SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(&tmpByteData, oriData, r1, absErr_Bound, relBoundRatio, pwRelBoundRatio, valueRangeSize, medianValue, &tmpOutSize); + } + else +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_1D(cmprType, &tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + else +#endif + { +#ifdef HAVE_RANDOMACCESS + if(confparams_cpr->randomAccess == 0) + { +#endif + SZ_compress_args_float_NoCkRngeNoGzip_1D(cmprType, &tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + if(tmpOutSize>=dataLength*sizeof(float) + 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1) + SZ_compress_args_float_StoreOriData(oriData, dataLength, &tmpByteData, &tmpOutSize); +#ifdef HAVE_RANDOMACCESS + } + else + tmpByteData = SZ_compress_float_1D_MDQ_decompression_random_access_with_blocked_regression(oriData, r1, realPrecision, &tmpOutSize); +#endif + } + } + else + if (r3==0) + { + if(confparams_cpr->errorBoundMode>=PW_REL) + { + if(confparams_cpr->accelerate_pw_rel_compression && confparams_cpr->maxRangeRadius <= 32768) + SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log_MSST19(&tmpByteData, oriData, pwRelBoundRatio, r2, r1, &tmpOutSize, valueRangeSize, signs, &positive, min, max, nearZero); + else + SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r2, r1, &tmpOutSize, min, max); + } + else +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_2D(cmprType, &tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + else +#endif + { +#ifdef HAVE_RANDOMACCESS + if(confparams_cpr->randomAccess == 0) + { +#endif + if(withRegression == SZ_NO_REGRESSION) + SZ_compress_args_float_NoCkRngeNoGzip_2D(cmprType, &tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + else // SZ 2.1 (2D) + { + tmpByteData = SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(oriData, r2, r1, realPrecision, &tmpOutSize);//SZ 2.1 (2D) + if(tmpOutSize>=dataLength*sizeof(float) + 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1) + SZ_compress_args_float_StoreOriData(oriData, dataLength, &tmpByteData, &tmpOutSize); + } +#ifdef HAVE_RANDOMACCESS + } + else + tmpByteData = SZ_compress_float_2D_MDQ_decompression_random_access_with_blocked_regression(oriData, r2, r1, realPrecision, &tmpOutSize); +#endif + } + } + else + if (r4==0) + { + if(confparams_cpr->errorBoundMode>=PW_REL) + { + if(confparams_cpr->accelerate_pw_rel_compression && confparams_cpr->maxRangeRadius <= 32768) + SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log_MSST19(&tmpByteData, oriData, pwRelBoundRatio, r3, r2, r1, &tmpOutSize, valueRangeSize, signs, &positive, min, max, nearZero); + else + SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r3, r2, r1, &tmpOutSize, min, max); + } + else +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_3D(cmprType, &tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + else +#endif + { +#ifdef HAVE_RANDOMACCESS + if(confparams_cpr->randomAccess == 0) + { +#endif + if(withRegression == SZ_NO_REGRESSION) + SZ_compress_args_float_NoCkRngeNoGzip_3D(cmprType, &tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + else //SZ 2.1 (3D) + { + tmpByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r3, r2, r1, realPrecision, &tmpOutSize); //SZ 2.1 (3D) + if(tmpOutSize>=dataLength*sizeof(float) + 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1) + SZ_compress_args_float_StoreOriData(oriData, dataLength, &tmpByteData, &tmpOutSize); + } +#ifdef HAVE_RANDOMACCESS + } + else + tmpByteData = SZ_compress_float_3D_MDQ_decompression_random_access_with_blocked_regression(oriData, r3, r2, r1, realPrecision, &tmpOutSize); +#endif + } + } + else + if (r5==0) + { + if(confparams_cpr->errorBoundMode>=PW_REL) + { + if(confparams_cpr->accelerate_pw_rel_compression && confparams_cpr->maxRangeRadius <= 32768) + SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log_MSST19(&tmpByteData, oriData, pwRelBoundRatio, r4*r3, r2, r1, &tmpOutSize, valueRangeSize, signs, &positive, min, max, nearZero); + else + SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r4*r3, r2, r1, &tmpOutSize, min, max); + } + else +#ifdef HAVE_TIMECMPR + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + else +#endif + { + if(withRegression == SZ_NO_REGRESSION) + SZ_compress_args_float_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + else + { + tmpByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r4*r3, r2, r1, realPrecision, &tmpOutSize); //SZ 2.1 4D + if(tmpOutSize>=dataLength*sizeof(float) + 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1) + SZ_compress_args_float_StoreOriData(oriData, dataLength, &tmpByteData, &tmpOutSize); + } + } + } + else + { + printf("Error: doesn't support 5 dimensions for now.\n"); + status = SZ_DERR; //dimension error + } + //Call Zstd or Gzip to do the further compression. + if(confparams_cpr->szMode==SZ_BEST_SPEED) + { + *outSize = tmpOutSize; + *newByteData = tmpByteData; + } + else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION || confparams_cpr->szMode==SZ_TEMPORAL_COMPRESSION) + { + *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); + free(tmpByteData); + } + else + { + printf("Error: Wrong setting of confparams_cpr->szMode in the float compression.\n"); + status = SZ_MERR; //mode error + } + } + + return status; +} + +//TODO +int SZ_compress_args_float_subblock(unsigned char* compressedBytes, float *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, +size_t s5, size_t s4, size_t s3, size_t s2, size_t s1, +size_t e5, size_t e4, size_t e3, size_t e2, size_t e1, +size_t *outSize, int errBoundMode, double absErr_Bound, double relBoundRatio) +{ + int status = SZ_SCES; + float valueRangeSize = 0, medianValue = 0; + computeRangeSize_float_subblock(oriData, &valueRangeSize, &medianValue, r5, r4, r3, r2, r1, s5, s4, s3, s2, s1, e5, e4, e3, e2, e1); + + double realPrecision = getRealPrecision_float(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status); + + if(valueRangeSize <= realPrecision) + { + //TODO + //SZ_compress_args_float_withinRange_subblock(); + } + else + { + if (r2==0) + { + if(errBoundMode>=PW_REL) + { + //TODO + //SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_subblock(); + printf ("Current subblock version does not support point-wise relative error bound.\n"); + } + else + SZ_compress_args_float_NoCkRnge_1D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r1, s1, e1); + } + else + if (r3==0) + { + //TODO + if(errBoundMode>=PW_REL) + { + //TODO + //SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_subblock(); + printf ("Current subblock version does not support point-wise relative error bound.\n"); + } + else + SZ_compress_args_float_NoCkRnge_2D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r2, r1, s2, s1, e2, e1); + } + else + if (r4==0) + { + if(errBoundMode>=PW_REL) + { + //TODO + //SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_subblock(); + printf ("Current subblock version does not support point-wise relative error bound.\n"); + } + else + SZ_compress_args_float_NoCkRnge_3D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r3, r2, r1, s3, s2, s1, e3, e2, e1); + } + else + if (r5==0) + { + if(errBoundMode>=PW_REL) + { + //TODO + //SZ_compress_args_float_NoCkRngeNoGzip_4D_pwr_subblock(); + printf ("Current subblock version does not support point-wise relative error bound.\n"); + } + else + SZ_compress_args_float_NoCkRnge_4D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r4, r3, r2, r1, s4, s3, s2, s1, e4, e3, e2, e1); + } + else + { + printf("Error: doesn't support 5 dimensions for now.\n"); + status = SZ_DERR; //dimension error + } + } + return status; +} + +void SZ_compress_args_float_NoCkRnge_1D_subblock(unsigned char* compressedBytes, float *oriData, double realPrecision, size_t *outSize, float valueRangeSize, float medianValue_f, +size_t r1, size_t s1, size_t e1) +{ + TightDataPointStorageF* tdps = SZ_compress_float_1D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_f, r1, s1, e1); + + if (confparams_cpr->szMode==SZ_BEST_SPEED) + convertTDPStoFlatBytes_float_args(tdps, compressedBytes, outSize); + else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) + { + unsigned char *tmpCompBytes; + size_t tmpOutSize; + convertTDPStoFlatBytes_float(tdps, &tmpCompBytes, &tmpOutSize); + *outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode); + free(tmpCompBytes); + } + else + { + printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n"); + } + + //TODO +// if(*outSize>dataLength*sizeof(float)) +// SZ_compress_args_float_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageF(tdps); +} + +void SZ_compress_args_float_NoCkRnge_2D_subblock(unsigned char* compressedBytes, float *oriData, double realPrecision, size_t *outSize, float valueRangeSize, float medianValue_f, +size_t r2, size_t r1, size_t s2, size_t s1, size_t e2, size_t e1) +{ + TightDataPointStorageF* tdps = SZ_compress_float_2D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_f, r2, r1, s2, s1, e2, e1); + + if (confparams_cpr->szMode==SZ_BEST_SPEED) + convertTDPStoFlatBytes_float_args(tdps, compressedBytes, outSize); + else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) + { + unsigned char *tmpCompBytes; + size_t tmpOutSize; + convertTDPStoFlatBytes_float(tdps, &tmpCompBytes, &tmpOutSize); + *outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode); + free(tmpCompBytes); + } + else + { + printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n"); + } + + //TODO +// if(*outSize>dataLength*sizeof(float)) +// SZ_compress_args_float_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageF(tdps); +} + +void SZ_compress_args_float_NoCkRnge_3D_subblock(unsigned char* compressedBytes, float *oriData, double realPrecision, size_t *outSize, float valueRangeSize, float medianValue_f, +size_t r3, size_t r2, size_t r1, size_t s3, size_t s2, size_t s1, size_t e3, size_t e2, size_t e1) +{ + TightDataPointStorageF* tdps = SZ_compress_float_3D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_f, r3, r2, r1, s3, s2, s1, e3, e2, e1); + + if (confparams_cpr->szMode==SZ_BEST_SPEED) + convertTDPStoFlatBytes_float_args(tdps, compressedBytes, outSize); + else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) + { + unsigned char *tmpCompBytes; + size_t tmpOutSize; + convertTDPStoFlatBytes_float(tdps, &tmpCompBytes, &tmpOutSize); + *outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode); + free(tmpCompBytes); + } + else + { + printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n"); + } + + //TODO +// if(*outSize>dataLength*sizeof(float)) +// SZ_compress_args_float_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageF(tdps); +} + +void SZ_compress_args_float_NoCkRnge_4D_subblock(unsigned char* compressedBytes, float *oriData, double realPrecision, size_t *outSize, float valueRangeSize, float medianValue_f, +size_t r4, size_t r3, size_t r2, size_t r1, size_t s4, size_t s3, size_t s2, size_t s1, size_t e4, size_t e3, size_t e2, size_t e1) +{ + TightDataPointStorageF* tdps = SZ_compress_float_4D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_f, r4, r3, r2, r1, s4, s3, s2, s1, e4, e3, e2, e1); + + if (confparams_cpr->szMode==SZ_BEST_SPEED) + convertTDPStoFlatBytes_float_args(tdps, compressedBytes, outSize); + else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) + { + unsigned char *tmpCompBytes; + size_t tmpOutSize; + convertTDPStoFlatBytes_float(tdps, &tmpCompBytes, &tmpOutSize); + *outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode); + free(tmpCompBytes); + } + else + { + printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n"); + } + + //TODO +// if(*outSize>dataLength*sizeof(float)) +// SZ_compress_args_float_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageF(tdps); + +} + +unsigned int optimize_intervals_float_1D_subblock(float *oriData, double realPrecision, size_t r1, size_t s1, size_t e1) +{ + size_t dataLength = e1 - s1 + 1; + oriData = oriData + s1; + + size_t i = 0; + unsigned long radiusIndex; + float pred_value = 0, pred_err; + int *intervals = (int*)malloc(confparams_cpr->maxRangeRadius*sizeof(int)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int)); + size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance; + for(i=2;isampleDistance==0) + { + pred_value = 2*oriData[i-1] - oriData[i-2]; + //pred_value = oriData[i-1]; + pred_err = fabs(pred_value - oriData[i]); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2); + return powerOf2; +} + +unsigned int optimize_intervals_float_2D_subblock(float *oriData, double realPrecision, size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2) +{ + size_t R1 = e1 - s1 + 1; + size_t R2 = e2 - s2 + 1; + + size_t i,j, index; + unsigned long radiusIndex; + float pred_value = 0, pred_err; + int *intervals = (int*)malloc(confparams_cpr->maxRangeRadius*sizeof(int)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int)); + size_t totalSampleSize = R1*R2/confparams_cpr->sampleDistance; + for(i=s1+1;i<=e1;i++) + { + for(j=s2+1;j<=e2;j++) + { + if((i+j)%confparams_cpr->sampleDistance==0) + { + index = i*r2+j; + pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1]; + pred_err = fabs(pred_value - oriData[index]); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2); + return powerOf2; +} + +unsigned int optimize_intervals_float_3D_subblock(float *oriData, double realPrecision, size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, size_t e2, size_t e3) +{ + size_t R1 = e1 - s1 + 1; + size_t R2 = e2 - s2 + 1; + size_t R3 = e3 - s3 + 1; + + size_t r23 = r2*r3; + + size_t i,j,k, index; + unsigned long radiusIndex; + float pred_value = 0, pred_err; + int *intervals = (int*)malloc(confparams_cpr->maxRangeRadius*sizeof(int)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int)); + size_t totalSampleSize = R1*R2*R3/confparams_cpr->sampleDistance; + for(i=s1+1;i<=e1;i++) + { + for(j=s2+1;j<=e2;j++) + { + for(k=s3+1;k<=e3;k++) + { + if((i+j+k)%confparams_cpr->sampleDistance==0) + { + index = i*r23+j*r3+k; + pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] + - oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1]; + pred_err = fabs(pred_value - oriData[index]); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + return powerOf2; +} + +unsigned int optimize_intervals_float_4D_subblock(float *oriData, double realPrecision, +size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4) +{ + size_t R1 = e1 - s1 + 1; + size_t R2 = e2 - s2 + 1; + size_t R3 = e3 - s3 + 1; + size_t R4 = e4 - s4 + 1; + + size_t r34 = r3*r4; + size_t r234 = r2*r3*r4; + + size_t i,j,k,l, index; + unsigned long radiusIndex; + float pred_value = 0, pred_err; + int *intervals = (int*)malloc(confparams_cpr->maxRangeRadius*sizeof(int)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int)); + size_t totalSampleSize = R1*R2*R3*R4/confparams_cpr->sampleDistance; + for(i=s1+1;i<=e1;i++) + { + for(j=s2+1;j<=e2;j++) + { + for(k=s3+1;k<=e3;k++) + { + for (l=s4+1;l<=e4;l++) + { + if((i+j+k+l)%confparams_cpr->sampleDistance==0) + { + index = i*r234+j*r34+k*r4+l; + pred_value = oriData[index-1] + oriData[index-r4] + oriData[index-r34] + - oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1]; + pred_err = fabs(pred_value - oriData[index]); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + return powerOf2; +} + +TightDataPointStorageF* SZ_compress_float_1D_MDQ_subblock(float *oriData, double realPrecision, float valueRangeSize, float medianValue_f, +size_t r1, size_t s1, size_t e1) +{ + size_t dataLength = e1 - s1 + 1; + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + quantization_intervals = optimize_intervals_float_1D_subblock(oriData, realPrecision, r1, s1, e1); + else + quantization_intervals = exe_params->intvCapacity; + //updateQuantizationInfo(quantization_intervals); + int intvRadius = quantization_intervals/2; + + size_t i; + int reqLength; + float medianValue = medianValue_f; + short radExpo = getExponent_float(valueRangeSize/2); + + computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + float* spaceFillingValue = oriData + s1; + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + type[0] = 0; + + unsigned char preDataBytes[4]; + intToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + float last3CmprsData[3] = {0}; + + FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + //add the first data + compressSingleFloatValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + listAdd_float(last3CmprsData, vce->data); + + //add the second data + type[1] = 0; + compressSingleFloatValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + listAdd_float(last3CmprsData, vce->data); + + int state; + double checkRadius; + float curData; + float pred; + float predAbsErr; + checkRadius = (quantization_intervals-1)*realPrecision; + double interval = 2*realPrecision; + + for(i=2;i=pred) + { + type[i] = intvRadius+state; + pred = pred + state*interval; + } + else + { + type[i] = intvRadius-state; + pred = pred - state*interval; + } + + listAdd_float(last3CmprsData, pred); + continue; + } + + //unpredictable data processing + type[i] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + + listAdd_float(last3CmprsData, vce->data); + } + + size_t exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageF* tdps; + + new_TightDataPointStorageF(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0); + + //free memory + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + free(vce); + free(lce); + free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +TightDataPointStorageF* SZ_compress_float_2D_MDQ_subblock(float *oriData, double realPrecision, float valueRangeSize, float medianValue_f, +size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2) +{ + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_float_2D_subblock(oriData, realPrecision, r1, r2, s1, s2, e1, e2); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + int intvRadius = quantization_intervals/2; + + size_t i,j; + int reqLength; + float pred1D, pred2D; + float diff = 0.0; + double itvNum = 0; + float *P0, *P1; + + size_t R1 = e1 - s1 + 1; + size_t R2 = e2 - s2 + 1; + size_t dataLength = R1*R2; + + P0 = (float*)malloc(R2*sizeof(float)); + memset(P0, 0, R2*sizeof(float)); + P1 = (float*)malloc(R2*sizeof(float)); + memset(P1, 0, R2*sizeof(float)); + + float medianValue = medianValue_f; + short radExpo = getExponent_float(valueRangeSize/2); + computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + float* spaceFillingValue = oriData; // + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + unsigned char preDataBytes[4]; + intToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + + FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + /* Process Row-s1 data s2*/ + size_t gIndex; + size_t lIndex; + + gIndex = s1*r2+s2; + lIndex = 0; + + type[lIndex] = 0; + compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[0] = vce->data; + + /* Process Row-s1 data s2+1*/ + gIndex = s1*r2+(s2+1); + lIndex = 1; + + pred1D = P1[0]; + diff = spaceFillingValue[gIndex] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P1[1] = pred1D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[1] = vce->data; + } + + /* Process Row-s1 data s2+2 --> data e2 */ + for (j = 2; j < R2; j++) + { + gIndex = s1*r2+(s2+j); + lIndex = j; + + pred1D = 2*P1[j-1] - P1[j-2]; + diff = spaceFillingValue[gIndex] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P1[j] = pred1D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[j] = vce->data; + } + } + + /* Process Row-s1+1 --> Row-e1 */ + for (i = 1; i < R1; i++) + { + /* Process row-s1+i data s2 */ + gIndex = (s1+i)*r2+s2; + lIndex = i*R2; + + pred1D = P1[0]; + diff = spaceFillingValue[gIndex] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P0[0] = pred1D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[0] = vce->data; + } + + /* Process row-s1+i data s2+1 --> e2 */ + for (j = 1; j < R2; j++) + { + gIndex = (s1+i)*r2+(s2+j); + lIndex = i*R2+j; + +// printf ("global index = %d, local index = %d\n", gIndex, lIndex); + + pred2D = P0[j-1] + P1[j] - P1[j-1]; + + diff = spaceFillingValue[gIndex] - pred2D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P0[j] = pred2D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[j] = vce->data; + } + } + + float *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + + free(P0); + free(P1); + size_t exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageF* tdps; + + new_TightDataPointStorageF(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0); + + //free memory + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + free(vce); + free(lce); + free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +TightDataPointStorageF* SZ_compress_float_3D_MDQ_subblock(float *oriData, double realPrecision, float valueRangeSize, float medianValue_f, +size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, size_t e2, size_t e3) +{ + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_float_3D_subblock(oriData, realPrecision, r1, r2, r3, s1, s2, s3, e1, e2, e3); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + int intvRadius = quantization_intervals/2; + + size_t i,j,k; + int reqLength; + float pred1D, pred2D, pred3D; + float diff = 0.0; + double itvNum = 0; + float *P0, *P1; + + size_t R1 = e1 - s1 + 1; + size_t R2 = e2 - s2 + 1; + size_t R3 = e3 - s3 + 1; + size_t dataLength = R1*R2*R3; + + size_t r23 = r2*r3; + size_t R23 = R2*R3; + + P0 = (float*)malloc(R23*sizeof(float)); + P1 = (float*)malloc(R23*sizeof(float)); + + float medianValue = medianValue_f; + short radExpo = getExponent_float(valueRangeSize/2); + computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue); + + int* type = (int*) malloc(dataLength*sizeof(int)); + //type[dataLength]=0; + + float* spaceFillingValue = oriData; // + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + unsigned char preDataBytes[4]; + intToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + + FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + + /////////////////////////// Process layer-s1 /////////////////////////// + /* Process Row-s2 data s3*/ + size_t gIndex; //global index + size_t lIndex; //local index + size_t index2D; //local 2D index + + gIndex = s1*r23+s2*r3+s3; + lIndex = 0; + index2D = 0; + + type[lIndex] = 0; + compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + + /* Process Row-s2 data s3+1*/ + gIndex = s1*r23+s2*r3+s3+1; + lIndex = 1; + index2D = 1; + + pred1D = P1[index2D-1]; + diff = spaceFillingValue[gIndex] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P1[index2D] = pred1D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + } + + /* Process Row-s2 data s3+2 --> data e3 */ + for (j = 2; j < R3; j++) + { + gIndex = s1*r23+s2*r3+s3+j; + lIndex = j; + index2D = j; + + pred1D = 2*P1[index2D-1] - P1[index2D-2]; + diff = spaceFillingValue[gIndex] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P1[index2D] = pred1D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + } + } + + /* Process Row-s2+1 --> Row-e2 */ + for (i = 1; i < R2; i++) + { + /* Process row-s2+i data s3 */ + gIndex = s1*r23+(s2+i)*r3+s3; + lIndex = i*R3; + index2D = i*R3; + + pred1D = P1[index2D-R3]; + diff = spaceFillingValue[gIndex] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P1[index2D] = pred1D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + } + + /* Process row-s2+i data s3+1 --> data e3*/ + for (j = 1; j < R3; j++) + { + gIndex = s1*r23+(s2+i)*r3+s3+j; + lIndex = i*R3+j; + index2D = i*R3+j; + + pred2D = P1[index2D-1] + P1[index2D-R3] - P1[index2D-R3-1]; + diff = spaceFillingValue[gIndex] - pred2D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P1[index2D] = pred2D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + } + } + } + + + /////////////////////////// Process layer-s1+1 --> layer-e1 /////////////////////////// + + for (k = 1; k < R1; k++) + { + /* Process Row-s2 data s3*/ + gIndex = (s1+k)*r23+s2*r3+s3; + lIndex = k*R23; + index2D = 0; + + pred1D = P1[index2D]; + diff = spaceFillingValue[gIndex] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred1D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + + /* Process Row-s2 data s3+1 --> data e3 */ + for (j = 1; j < R3; j++) + { + gIndex = (s1+k)*r23+s2*r3+s3+j; + lIndex = k*R23+j; + index2D = j; + + pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1]; + diff = spaceFillingValue[gIndex] - pred2D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred2D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + } + + /* Process Row-s2+1 --> Row-e2 */ + for (i = 1; i < R2; i++) + { + /* Process Row-s2+i data s3 */ + gIndex = (s1+k)*r23+(s2+i)*r3+s3; + lIndex = k*R23+i*R3; + index2D = i*R3; + + pred2D = P0[index2D-R3] + P1[index2D] - P1[index2D-R3]; + diff = spaceFillingValue[gIndex] - pred2D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred2D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + + /* Process Row-s2+i data s3+1 --> data e3 */ + for (j = 1; j < R3; j++) + { + gIndex = (s1+k)*r23+(s2+i)*r3+s3+j; + lIndex = k*R23+i*R3+j; + index2D = i*R3+j; + +// printf ("global index = %d, local index = %d\n", gIndex, lIndex); + + pred3D = P0[index2D-1] + P0[index2D-R3]+ P1[index2D] - P0[index2D-R3-1] - P1[index2D-R3] - P1[index2D-1] + P1[index2D-R3-1]; + diff = spaceFillingValue[gIndex] - pred3D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred3D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + } + } + + float *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + + free(P0); + free(P1); + size_t exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageF* tdps; + + new_TightDataPointStorageF(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0); + + //free memory + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + free(vce); + free(lce); + free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +TightDataPointStorageF* SZ_compress_float_4D_MDQ_subblock(float *oriData, double realPrecision, float valueRangeSize, float medianValue_f, +size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4) +{ + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_float_4D_subblock(oriData, realPrecision, r1, r2, r3, r4, s1, s2, s3, s4, e1, e2, e3, e4); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + int intvRadius = quantization_intervals/2; + + size_t i,j,k; + int reqLength; + float pred1D, pred2D, pred3D; + float diff = 0.0; + double itvNum = 0; + float *P0, *P1; + + size_t R1 = e1 - s1 + 1; + size_t R2 = e2 - s2 + 1; + size_t R3 = e3 - s3 + 1; + size_t R4 = e4 - s4 + 1; + + size_t dataLength = R1*R2*R3*R4; + + size_t r34 = r3*r4; + size_t r234 = r2*r3*r4; + size_t R34 = R3*R4; + size_t R234 = R2*R3*R4; + + P0 = (float*)malloc(R34*sizeof(float)); + P1 = (float*)malloc(R34*sizeof(float)); + + float medianValue = medianValue_f; + short radExpo = getExponent_float(valueRangeSize/2); + computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + float* spaceFillingValue = oriData; // + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + unsigned char preDataBytes[4]; + intToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + + FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + + size_t l; + for (l = 0; l < R1; l++) + { + + /////////////////////////// Process layer-s2 /////////////////////////// + /* Process Row-s3 data s4*/ + size_t gIndex; //global index + size_t lIndex; //local index + size_t index2D; //local 2D index + + gIndex = (s1+l)*r234+s2*r34+s3*r4+s4; + lIndex = l*R234; + index2D = 0; + + type[lIndex] = 0; + compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + + /* Process Row-s3 data s4+1*/ + gIndex = (s1+l)*r234+s2*r34+s3*r4+s4+1; + lIndex = l*R234+1; + index2D = 1; + + pred1D = P1[index2D-1]; + diff = spaceFillingValue[gIndex] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P1[index2D] = pred1D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + } + + /* Process Row-s3 data s4+2 --> data e4 */ + for (j = 2; j < R4; j++) + { + gIndex = (s1+l)*r234+s2*r34+s3*r4+s4+j; + lIndex = l*R234+j; + index2D = j; + + pred1D = 2*P1[index2D-1] - P1[index2D-2]; + diff = spaceFillingValue[gIndex] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P1[index2D] = pred1D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + } + } + + /* Process Row-s3+1 --> Row-e3 */ + for (i = 1; i < R3; i++) + { + /* Process row-s2+i data s3 */ + gIndex = (s1+l)*r234+s2*r34+(s3+i)*r4+s4; + lIndex = l*R234+i*R4; + index2D = i*R4; + + pred1D = P1[index2D-R4]; + diff = spaceFillingValue[gIndex] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P1[index2D] = pred1D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + } + + /* Process row-s3+i data s4+1 --> data e4*/ + for (j = 1; j < R4; j++) + { + gIndex = (s1+l)*r234+s2*r34+(s3+i)*r4+s4+j; + lIndex = l*R234+i*R4+j; + index2D = i*R4+j; + + pred2D = P1[index2D-1] + P1[index2D-R4] - P1[index2D-R4-1]; + diff = spaceFillingValue[gIndex] - pred2D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P1[index2D] = pred2D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index2D] = vce->data; + } + } + } + + + /////////////////////////// Process layer-s2+1 --> layer-e2 /////////////////////////// + + for (k = 1; k < R2; k++) + { + /* Process Row-s3 data s4*/ + gIndex = (s1+l)*r234+(s2+k)*r34+s3*r4+s4; + lIndex = l*R234+k*R34; + index2D = 0; + + pred1D = P1[index2D]; + diff = spaceFillingValue[gIndex] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred1D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + + /* Process Row-s3 data s4+1 --> data e4 */ + for (j = 1; j < R4; j++) + { + gIndex = (s1+l)*r234+(s2+k)*r34+s3*r4+s4+j; + lIndex = l*R234+k*R34+j; + index2D = j; + + pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1]; + diff = spaceFillingValue[gIndex] - pred2D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred2D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + } + + /* Process Row-s3+1 --> Row-e3 */ + for (i = 1; i < R3; i++) + { + /* Process Row-s3+i data s4 */ + gIndex = (s1+l)*r234+(s2+k)*r34+(s3+i)*r4+s4; + lIndex = l*R234+k*R34+i*R4; + index2D = i*R4; + + pred2D = P0[index2D-R4] + P1[index2D] - P1[index2D-R4]; + diff = spaceFillingValue[gIndex] - pred2D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred2D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + + /* Process Row-s3+i data s4+1 --> data e4 */ + for (j = 1; j < R4; j++) + { + gIndex = (s1+l)*r234+(s2+k)*r34+(s3+i)*r4+s4+j; + lIndex = l*R234+k*R34+i*R4+j; + index2D = i*R4+j; + +// printf ("global index = %d, local index = %d\n", gIndex, lIndex); + + pred3D = P0[index2D-1] + P0[index2D-R4]+ P1[index2D] - P0[index2D-R4-1] - P1[index2D-R4] - P1[index2D-1] + P1[index2D-R4-1]; + diff = spaceFillingValue[gIndex] - pred3D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < quantization_intervals) + { + if (diff < 0) itvNum = -itvNum; + type[lIndex] = (int) (itvNum/2) + intvRadius; + P0[index2D] = pred3D + 2 * (type[lIndex] - intvRadius) * realPrecision; + } + else + { + type[lIndex] = 0; + compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + } + } + + float *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + + } + + free(P0); + free(P1); + size_t exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageF* tdps; + + new_TightDataPointStorageF(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0); + + //free memory + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + free(vce); + free(lce); + free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +unsigned int optimize_intervals_float_1D_opt_MSST19(float *oriData, size_t dataLength, double realPrecision) +{ + size_t i = 0, radiusIndex; + float pred_value = 0; + double pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = 0;//dataLength/confparams_cpr->sampleDistance; + + float * data_pos = oriData + 2; + float divider = log2(1+realPrecision)*2; + int tempIndex = 0; + while(data_pos - oriData < dataLength){ + if(*data_pos == 0){ + data_pos += confparams_cpr->sampleDistance; + continue; + } + tempIndex++; + totalSampleSize++; + pred_value = data_pos[-1]; + pred_err = fabs((double)*data_pos / pred_value); + radiusIndex = (unsigned long)fabs(log2(pred_err)/divider+0.5); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + + data_pos += confparams_cpr->sampleDistance; + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + return powerOf2; +} + +unsigned int optimize_intervals_float_2D_opt_MSST19(float *oriData, size_t r1, size_t r2, double realPrecision) +{ + size_t i; + size_t radiusIndex; + float pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = 0; + + size_t offset_count = confparams_cpr->sampleDistance - 1; // count r2 offset + size_t offset_count_2; + float * data_pos = oriData + r2 + offset_count; + float divider = log2(1+realPrecision)*2; + size_t n1_count = 1; // count i sum + size_t len = r1 * r2; + while(data_pos - oriData < len){ + if(*data_pos == 0){ + data_pos += confparams_cpr->sampleDistance; + continue; + } + totalSampleSize++; + pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1]; + pred_err = fabs(pred_value / *data_pos); + radiusIndex = (unsigned long)fabs(log2(pred_err)/divider+0.5); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + + offset_count += confparams_cpr->sampleDistance; + if(offset_count >= r2){ + n1_count ++; + offset_count_2 = n1_count % confparams_cpr->sampleDistance; + data_pos += (r2 + confparams_cpr->sampleDistance - offset_count) + (confparams_cpr->sampleDistance - offset_count_2); + offset_count = (confparams_cpr->sampleDistance - offset_count_2); + if(offset_count == 0) offset_count ++; + } + else data_pos += confparams_cpr->sampleDistance; + } + + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + return powerOf2; +} + +unsigned int optimize_intervals_float_3D_opt_MSST19(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision) +{ + size_t i; + size_t radiusIndex; + size_t r23=r2*r3; + float pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = 0; + + size_t offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset + size_t offset_count_2; + float * data_pos = oriData + r23 + r3 + offset_count; + float divider = log2(1+realPrecision)*2; + size_t n1_count = 1, n2_count = 1; // count i,j sum + size_t len = r1 * r2 * r3; + while(data_pos - oriData < len){ + if(*data_pos == 0){ + data_pos += confparams_cpr->sampleDistance; + continue; + } + totalSampleSize++; + pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1]; + pred_err = fabsf(*data_pos / pred_value); + radiusIndex = fabs(log2(pred_err)/divider+0.5); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + { + radiusIndex = confparams_cpr->maxRangeRadius - 1; + } + intervals[radiusIndex]++; + offset_count += confparams_cpr->sampleDistance; + if(offset_count >= r3){ + n2_count ++; + if(n2_count == r2){ + n1_count ++; + n2_count = 1; + data_pos += r3; + } + offset_count_2 = (n1_count + n2_count) % confparams_cpr->sampleDistance; + data_pos += (r3 + confparams_cpr->sampleDistance - offset_count) + (confparams_cpr->sampleDistance - offset_count_2); + offset_count = (confparams_cpr->sampleDistance - offset_count_2); + if(offset_count == 0) offset_count ++; + } + else data_pos += confparams_cpr->sampleDistance; + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + free(intervals); + return powerOf2; +} + + +unsigned int optimize_intervals_float_3D_opt(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision) +{ + size_t i; + size_t radiusIndex; + size_t r23=r2*r3; + float pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = 0; + + size_t offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset + size_t offset_count_2; + float * data_pos = oriData + r23 + r3 + offset_count; + size_t n1_count = 1, n2_count = 1; // count i,j sum + size_t len = r1 * r2 * r3; + while(data_pos - oriData < len){ + totalSampleSize++; + pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1]; + pred_err = fabs(pred_value - *data_pos); + radiusIndex = (pred_err/realPrecision+1)/2; + if(radiusIndex>=confparams_cpr->maxRangeRadius) + { + radiusIndex = confparams_cpr->maxRangeRadius - 1; + } + intervals[radiusIndex]++; + offset_count += confparams_cpr->sampleDistance; + if(offset_count >= r3){ + n2_count ++; + if(n2_count == r2){ + n1_count ++; + n2_count = 1; + data_pos += r3; + } + offset_count_2 = (n1_count + n2_count) % confparams_cpr->sampleDistance; + data_pos += (r3 + confparams_cpr->sampleDistance - offset_count) + (confparams_cpr->sampleDistance - offset_count_2); + offset_count = (confparams_cpr->sampleDistance - offset_count_2); + if(offset_count == 0) offset_count ++; + } + else data_pos += confparams_cpr->sampleDistance; + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + free(intervals); + return powerOf2; +} + +size_t SZ_compress_float_3D_MDQ_RA_block(float * block_ori_data, float * mean, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, float realPrecision, float * P0, float * P1, int * type, float * unpredictable_data){ + + float recip_realPrecision = 1/realPrecision; + size_t dim0_offset = dim_1 * dim_2; + size_t dim1_offset = dim_2; + + // data_pos = block_ori_data; + // for(size_t i=0; i 0) mean[0] = sum / num_elements; + // else mean[0] = 0.0; + mean[0] = block_ori_data[0]; + + size_t unpredictable_count = 0; + size_t r1, r2, r3; + r1 = block_dim_0; + r2 = block_dim_1; + r3 = block_dim_2; + + float * cur_data_pos = block_ori_data; + float curData; + float pred1D, pred2D, pred3D; + float itvNum; + float diff; + size_t i, j, k; + size_t r23 = r2*r3; + // Process Row-0 data 0 + pred1D = mean[0]; + curData = *cur_data_pos; + diff = curData - pred1D; + itvNum = fabsf(diff)*recip_realPrecision + 1; + if (itvNum < exe_params->intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[0] = (int) (itvNum/2) + exe_params->intvRadius; + P1[0] = pred1D + 2 * (type[0] - exe_params->intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabsf(curData-P1[0])>realPrecision){ + type[0] = 0; + P1[0] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[0] = 0; + P1[0] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + curData = cur_data_pos[1]; + diff = curData - pred1D; + itvNum = fabsf(diff)*recip_realPrecision + 1; + if (itvNum < exe_params->intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + exe_params->intvRadius; + P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabsf(curData-P1[1])>realPrecision){ + type[1] = 0; + P1[1] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[1] = 0; + P1[1] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + /* Process Row-0 data 2 --> data r3-1 */ + for (j = 2; j < r3; j++){ + pred1D = 2*P1[j-1] - P1[j-2]; + curData = cur_data_pos[j]; + diff = curData - pred1D; + itvNum = fabsf(diff)*recip_realPrecision + 1; + if (itvNum < exe_params->intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + exe_params->intvRadius; + P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabsf(curData-P1[j])>realPrecision){ + type[j] = 0; + P1[j] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[j] = 0; + P1[j] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + cur_data_pos += dim1_offset; + + /* Process Row-1 --> Row-r2-1 */ + size_t index; + for (i = 1; i < r2; i++) + { + /* Process row-i data 0 */ + index = i*r3; + pred1D = P1[index-r3]; + curData = *cur_data_pos; + diff = curData - pred1D; + + itvNum = fabsf(diff)*recip_realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + + //ganrantee comporession error against the case of machine-epsilon + if(fabsf(curData-P1[index])>realPrecision) + { + type[index] = 0; + P1[index] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else + { + type[index] = 0; + P1[index] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + + /* Process row-i data 1 --> data r3-1*/ + for (j = 1; j < r3; j++) + { + index = i*r3+j; + pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1]; + + curData = cur_data_pos[j]; + diff = curData - pred2D; + + itvNum = fabsf(diff)*recip_realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + + //ganrantee comporession error against the case of machine-epsilon + if(fabsf(curData-P1[index])>realPrecision) + { + type[index] = 0; + P1[index] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else + { + type[index] = 0; + P1[index] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + cur_data_pos += dim1_offset; + } + cur_data_pos += dim0_offset - r2 * dim1_offset; + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (k = 1; k < r1; k++) + { + /* Process Row-0 data 0*/ + index = k*r23; + pred1D = P1[0]; + curData = *cur_data_pos; + diff = curData - pred1D; + itvNum = fabsf(diff)*recip_realPrecision + 1; + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabsf(curData-P0[0])>realPrecision) + { + type[index] = 0; + P0[0] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else + { + type[index] = 0; + P0[0] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + /* Process Row-0 data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + //index = k*r2*r3+j; + index ++; + pred2D = P0[j-1] + P1[j] - P1[j-1]; + curData = cur_data_pos[j]; + diff = curData - pred2D; + itvNum = fabsf(diff)*recip_realPrecision + 1; + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabsf(curData-P0[j])>realPrecision) + { + type[index] = 0; + P0[j] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else + { + type[index] = 0; + P0[j] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + + cur_data_pos += dim1_offset; + /* Process Row-1 --> Row-r2-1 */ + size_t index2D; + for (i = 1; i < r2; i++) + { + /* Process Row-i data 0 */ + index = k*r23 + i*r3; + index2D = i*r3; + pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3]; + curData = *cur_data_pos; + diff = curData - pred2D; + + itvNum = fabsf(diff)*recip_realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabsf(curData-P0[index2D])>realPrecision) + { + type[index] = 0; + P0[index2D] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else + { + type[index] = 0; + P0[index2D] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + + /* Process Row-i data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + //index = k*r2*r3 + i*r3 + j; + index ++; + index2D = i*r3 + j; + pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1]; + curData = cur_data_pos[j]; + diff = curData - pred3D; + + itvNum = fabsf(diff)*recip_realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + + //ganrantee comporession error against the case of machine-epsilon + if(fabsf(curData-P0[index2D])>realPrecision) + { + type[index] = 0; + P0[index2D] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else + { + type[index] = 0; + P0[index2D] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + cur_data_pos += dim1_offset; + } + cur_data_pos += dim0_offset - r2 * dim1_offset; + float *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + + return unpredictable_count; +} + + +unsigned int optimize_intervals_float_2D_opt(float *oriData, size_t r1, size_t r2, double realPrecision) +{ + size_t i; + size_t radiusIndex; + float pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = 0; + + size_t offset_count = confparams_cpr->sampleDistance - 1; // count r2 offset + size_t offset_count_2; + float * data_pos = oriData + r2 + offset_count; + size_t n1_count = 1; // count i sum + size_t len = r1 * r2; + while(data_pos - oriData < len){ + totalSampleSize++; + pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1]; + pred_err = fabs(pred_value - *data_pos); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + + offset_count += confparams_cpr->sampleDistance; + if(offset_count >= r2){ + n1_count ++; + offset_count_2 = n1_count % confparams_cpr->sampleDistance; + data_pos += (r2 + confparams_cpr->sampleDistance - offset_count) + (confparams_cpr->sampleDistance - offset_count_2); + offset_count = (confparams_cpr->sampleDistance - offset_count_2); + if(offset_count == 0) offset_count ++; + } + else data_pos += confparams_cpr->sampleDistance; + } + + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + return powerOf2; +} + +unsigned int optimize_intervals_float_1D_opt(float *oriData, size_t dataLength, double realPrecision) +{ + size_t i = 0, radiusIndex; + float pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = 0;//dataLength/confparams_cpr->sampleDistance; + + float * data_pos = oriData + 2; + while(data_pos - oriData < dataLength){ + totalSampleSize++; + pred_value = data_pos[-1]; + pred_err = fabs(pred_value - *data_pos); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + + data_pos += confparams_cpr->sampleDistance; + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + return powerOf2; +} + + + +size_t SZ_compress_float_1D_MDQ_RA_block(float * block_ori_data, float * mean, size_t dim_0, size_t block_dim_0, double realPrecision, int * type, float * unpredictable_data){ + + mean[0] = block_ori_data[0]; + unsigned short unpredictable_count = 0; + + float curData; + double itvNum; + double diff; + float last_over_thres = mean[0]; + float pred1D; + size_t type_index = 0; + float * data_pos = block_ori_data; + for(size_t i=0; iintvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[type_index] = (int) (itvNum/2) + exe_params->intvRadius; + last_over_thres = pred1D + 2 * (type[type_index] - exe_params->intvRadius) * realPrecision; + if(fabs(curData-last_over_thres)>realPrecision){ + type[type_index] = 0; + last_over_thres = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + + } + else{ + type[type_index] = 0; + unpredictable_data[unpredictable_count ++] = curData; + last_over_thres = curData; + } + type_index ++; + data_pos ++; + } + return unpredictable_count; + +} + +size_t SZ_compress_float_2D_MDQ_RA_block(float * block_ori_data, float * mean, size_t dim_0, size_t dim_1, size_t block_dim_0, size_t block_dim_1, double realPrecision, float * P0, float * P1, int * type, float * unpredictable_data){ + + size_t dim0_offset = dim_1; + mean[0] = block_ori_data[0]; + + size_t unpredictable_count = 0; + size_t r1, r2; + r1 = block_dim_0; + r2 = block_dim_1; + + float * cur_data_pos = block_ori_data; + float curData; + float pred1D, pred2D; + double itvNum; + double diff; + size_t i, j; + /* Process Row-0 data 0*/ + curData = *cur_data_pos; + pred1D = mean[0]; + diff = curData - pred1D; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < exe_params->intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[0] = (int) (itvNum/2) + exe_params->intvRadius; + P1[0] = pred1D + 2 * (type[0] - exe_params->intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P1[0])>realPrecision){ + type[0] = 0; + P1[0] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[0] = 0; + P1[0] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + + /* Process Row-0 data 1*/ + curData = cur_data_pos[1]; + pred1D = P1[0]; + diff = curData - pred1D; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < exe_params->intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + exe_params->intvRadius; + P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P1[1])>realPrecision){ + type[1] = 0; + P1[1] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[1] = 0; + P1[1] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + + /* Process Row-0 data 2 --> data r2-1 */ + for (j = 2; j < r2; j++) + { + curData = cur_data_pos[j]; + pred1D = 2*P1[j-1] - P1[j-2]; + diff = curData - pred1D; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < exe_params->intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + exe_params->intvRadius; + P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P1[j])>realPrecision){ + type[j] = 0; + P1[j] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[j] = 0; + P1[j] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + cur_data_pos += dim0_offset; + /* Process Row-1 --> Row-r1-1 */ + size_t index; + for (i = 1; i < r1; i++) + { + /* Process row-i data 0 */ + index = i*r2; + curData = *cur_data_pos; + pred1D = P1[0]; + diff = curData - pred1D; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < exe_params->intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[0] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P0[0])>realPrecision){ + type[index] = 0; + P0[0] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + P0[0] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + + /* Process row-i data 1 --> r2-1*/ + for (j = 1; j < r2; j++) + { + index = i*r2+j; + curData = cur_data_pos[j]; + pred2D = P0[j-1] + P1[j] - P1[j-1]; + diff = curData - pred2D; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P0[j])>realPrecision) + { + type[index] = 0; + P0[j] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else + { + type[index] = 0; + P0[j] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + cur_data_pos += dim0_offset; + + float *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + return unpredictable_count; +} + +/*The above code is for sz 1.4.13; the following code is for sz 2.0*/ +static unsigned int optimize_intervals_float_1D_with_freq_and_dense_pos(float *oriData, size_t r1, double realPrecision, float * dense_pos, float * max_freq, float * mean_freq) +{ + float mean = 0.0; + size_t len = r1; + size_t mean_distance = (int) (sqrt(len)); + + float * data_pos = oriData; + size_t mean_count = 0; + while(data_pos - oriData < len){ + mean += *data_pos; + mean_count ++; + data_pos += mean_distance; + } + if(mean_count > 0) mean /= mean_count; + size_t range = 8192; + size_t radius = 4096; + size_t * freq_intervals = (size_t *) malloc(range*sizeof(size_t)); + memset(freq_intervals, 0, range*sizeof(size_t)); + + unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius; + int sampleDistance = confparams_cpr->sampleDistance; + float predThreshold = confparams_cpr->predThreshold; + + size_t i; + size_t radiusIndex; + float pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, maxRangeRadius*sizeof(size_t)); + + float mean_diff; + ptrdiff_t freq_index; + size_t freq_count = 0; + size_t sample_count = 0; + data_pos = oriData + 1; + while(data_pos - oriData < len){ + pred_value = data_pos[-1]; + pred_err = fabs(pred_value - *data_pos); + if(pred_err < realPrecision) freq_count ++; + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=maxRangeRadius) + radiusIndex = maxRangeRadius - 1; + intervals[radiusIndex]++; + + mean_diff = *data_pos - mean; + if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius; + else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius; + if(freq_index <= 0){ + freq_intervals[0] ++; + } + else if(freq_index >= range){ + freq_intervals[range - 1] ++; + } + else{ + freq_intervals[freq_index] ++; + } + data_pos += sampleDistance; + sample_count ++; + } + *max_freq = freq_count * 1.0/ sample_count; + + //compute the appropriate number + size_t targetCount = sample_count*predThreshold; + size_t sum = 0; + for(i=0;itargetCount) + break; + } + if(i>=maxRangeRadius) + i = maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + // collect frequency + size_t max_sum = 0; + size_t max_index = 0; + size_t tmp_sum; + size_t * freq_pos = freq_intervals + 1; + for(size_t i=1; i max_sum){ + max_sum = tmp_sum; + max_index = i; + } + freq_pos ++; + } + *dense_pos = mean + realPrecision * (ptrdiff_t)(max_index + 1 - radius); + *mean_freq = max_sum * 1.0 / sample_count; + + free(freq_intervals); + free(intervals); + return powerOf2; +} + +unsigned int optimize_intervals_float_2D_with_freq_and_dense_pos(float *oriData, size_t r1, size_t r2, double realPrecision, float * dense_pos, float * max_freq, float * mean_freq) +{ + float mean = 0.0; + size_t len = r1 * r2; + size_t mean_distance = (int) (sqrt(len)); + + float * data_pos = oriData; + size_t mean_count = 0; + while(data_pos - oriData < len){ + mean += *data_pos; + mean_count ++; + data_pos += mean_distance; + } + if(mean_count > 0) mean /= mean_count; + size_t range = 8192; + size_t radius = 4096; + size_t * freq_intervals = (size_t *) malloc(range*sizeof(size_t)); + memset(freq_intervals, 0, range*sizeof(size_t)); + + unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius; + int sampleDistance = confparams_cpr->sampleDistance; + float predThreshold = confparams_cpr->predThreshold; + + size_t i; + size_t radiusIndex; + float pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, maxRangeRadius*sizeof(size_t)); + + float mean_diff; + ptrdiff_t freq_index; + size_t freq_count = 0; + size_t n1_count = 1; + size_t offset_count = sampleDistance - 1; + size_t offset_count_2 = 0; + size_t sample_count = 0; + data_pos = oriData + r2 + offset_count; + while(data_pos - oriData < len){ + pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1]; + pred_err = fabs(pred_value - *data_pos); + if(pred_err < realPrecision) freq_count ++; + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=maxRangeRadius) + radiusIndex = maxRangeRadius - 1; + intervals[radiusIndex]++; + + mean_diff = *data_pos - mean; + if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius; + else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius; + if(freq_index <= 0){ + freq_intervals[0] ++; + } + else if(freq_index >= range){ + freq_intervals[range - 1] ++; + } + else{ + freq_intervals[freq_index] ++; + } + offset_count += sampleDistance; + if(offset_count >= r2){ + n1_count ++; + offset_count_2 = n1_count % sampleDistance; + data_pos += (r2 + sampleDistance - offset_count) + (sampleDistance - offset_count_2); + offset_count = (sampleDistance - offset_count_2); + if(offset_count == 0) offset_count ++; + } + else data_pos += sampleDistance; + sample_count ++; + } + *max_freq = freq_count * 1.0/ sample_count; + + //compute the appropriate number + size_t targetCount = sample_count*predThreshold; + size_t sum = 0; + for(i=0;itargetCount) + break; + } + if(i>=maxRangeRadius) + i = maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + // collect frequency + size_t max_sum = 0; + size_t max_index = 0; + size_t tmp_sum; + size_t * freq_pos = freq_intervals + 1; + for(size_t i=1; i max_sum){ + max_sum = tmp_sum; + max_index = i; + } + freq_pos ++; + } + *dense_pos = mean + realPrecision * (ptrdiff_t)(max_index + 1 - radius); + *mean_freq = max_sum * 1.0 / sample_count; + + free(freq_intervals); + free(intervals); + return powerOf2; +} + +// 2D: modified for higher performance +#define MIN(a, b) aoptQuantMode==1) + { + quantization_intervals = optimize_intervals_float_2D_with_freq_and_dense_pos(oriData, r1, r2, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq); + if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1; + updateQuantizationInfo(quantization_intervals); + } + else{ + quantization_intervals = exe_params->intvCapacity; + } + + // calculate block dims + size_t num_x, num_y; + size_t block_size = 16; + + SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r1, num_x, block_size); + SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r2, num_y, block_size); + + size_t split_index_x, split_index_y; + size_t early_blockcount_x, early_blockcount_y; + size_t late_blockcount_x, late_blockcount_y; + SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); + SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); + + size_t max_num_block_elements = early_blockcount_x * early_blockcount_y; + size_t num_blocks = num_x * num_y; + size_t num_elements = r1 * r2; + + size_t dim0_offset = r2; + + int * result_type = (int *) malloc(num_elements * sizeof(int)); + size_t unpred_data_max_size = max_num_block_elements; + float * result_unpredictable_data = (float *) malloc(unpred_data_max_size * sizeof(float) * num_blocks); + size_t total_unpred = 0; + size_t unpredictable_count; + float * data_pos = oriData; + int * type = result_type; + size_t offset_x, offset_y; + size_t current_blockcount_x, current_blockcount_y; + + float * reg_params = (float *) malloc(num_blocks * 4 * sizeof(float)); + float * reg_params_pos = reg_params; + // move regression part out + size_t params_offset_b = num_blocks; + size_t params_offset_c = 2*num_blocks; + for(size_t i=0; i 0) mean = sum / mean_count; + } + + + float tmp_realPrecision = realPrecision; + + // use two prediction buffers for higher performance + float * unpredictable_data = result_unpredictable_data; + unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char)); + memset(indicator, 0, num_blocks * sizeof(unsigned char)); + size_t reg_count = 0; + size_t strip_dim_0 = early_blockcount_x + 1; + size_t strip_dim_1 = r2 + 1; + size_t strip_dim0_offset = strip_dim_1; + unsigned char * indicator_pos = indicator; + size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(float); + float * prediction_buffer_1 = (float *) malloc(prediction_buffer_size); + memset(prediction_buffer_1, 0, prediction_buffer_size); + float * prediction_buffer_2 = (float *) malloc(prediction_buffer_size); + memset(prediction_buffer_2, 0, prediction_buffer_size); + float * cur_pb_buf = prediction_buffer_1; + float * next_pb_buf = prediction_buffer_2; + float * cur_pb_buf_pos; + float * next_pb_buf_pos; + int intvCapacity = quantization_intervals; //exe_params->intvCapacity; + int intvRadius = intvCapacity/2; //exe_params->intvRadius; + int use_reg = 0; + + reg_params_pos = reg_params; + // compress the regression coefficients on the fly + float last_coeffcients[3] = {0.0}; + int coeff_intvCapacity_sz = 65536; + int coeff_intvRadius = coeff_intvCapacity_sz / 2; + int * coeff_type[3]; + int * coeff_result_type = (int *) malloc(num_blocks*3*sizeof(int)); + float * coeff_unpred_data[3]; + float * coeff_unpredictable_data = (float *) malloc(num_blocks*3*sizeof(float)); + float precision[3], recip_precision[3]; + precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c; + //compute the recip_precision + recip_precision[0] = 1/precision_a, recip_precision[1] = 1/precision_b, recip_precision[2] = 1/precision_c; + + for(int i=0; i<3; i++){ + coeff_type[i] = coeff_result_type + i * num_blocks; + coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks; + } + int coeff_index = 0; + unsigned int coeff_unpredictable_count[3] = {0}; + float noise = realPrecision * 0.81; + if(use_mean){ + type = result_type; + int intvCapacity_sz = intvCapacity - 2; + for(size_t i=0; iprecision[e]){ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + else{ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + coeff_index ++; + } + float curData; + float pred; + float itvNum; + float diff; + size_t index = 0; + size_t block_unpredictable_count = 0; + float * cur_data_pos = data_pos; + for(size_t ii=0; iirealPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + index ++; + cur_data_pos ++; + } + /*dealing with the last jj (boundary)*/ + { + size_t jj = current_blockcount_y - 1; + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; + diff = curData - pred; + itvNum = fabsf(diff)*recip_realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabsf(curData - pred)>realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj] = pred; + index ++; + cur_data_pos ++; + } + cur_data_pos += dim0_offset - current_blockcount_y; + } + /*dealing with the last ii (boundary)*/ + { + size_t ii = current_blockcount_x - 1; + for(size_t jj=0; jjrealPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + // assign value to next prediction buffer + next_pb_pos[jj] = pred; + index ++; + cur_data_pos ++; + } + /*dealing with the last jj (boundary)*/ + { + size_t jj = current_blockcount_y - 1; + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; + diff = curData - pred; + itvNum = fabsf(diff)*recip_realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabsf(curData - pred)>realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj] = pred; + // assign value to next prediction buffer + next_pb_pos[jj] = pred; + + index ++; + cur_data_pos ++; + } + } // end ii == -1 + unpredictable_count = block_unpredictable_count; + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + reg_count ++; + }// end use_reg + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + float * cur_pb_pos = pb_pos; + float * cur_data_pos = data_pos; + float curData; + float pred2D; + float itvNum, diff; + size_t index = 0; + for(size_t ii=0; iitmp_realPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + cur_pb_pos += strip_dim0_offset - current_blockcount_y; + cur_data_pos += dim0_offset - current_blockcount_y; + } + /*dealing with the last ii (boundary)*/ + { + // ii == current_blockcount_x - 1 + for(size_t jj=0; jjtmp_realPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + next_pb_pos[jj] = *cur_pb_pos; + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + } + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + // change indicator + indicator_pos[j] = 1; + }// end SZ + reg_params_pos ++; + data_pos += current_blockcount_y; + pb_pos += current_blockcount_y; + next_pb_pos += current_blockcount_y; + type += current_blockcount_x * current_blockcount_y; + }// end j + indicator_pos += num_y; + float * tmp; + tmp = cur_pb_buf; + cur_pb_buf = next_pb_buf; + next_pb_buf = tmp; + }// end i + }// end use mean + else{ + type = result_type; + int intvCapacity_sz = intvCapacity - 2; + for(size_t i=0; iprecision[e]){ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + else{ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + coeff_index ++; + } + float curData; + float pred; + float itvNum; + float diff; + size_t index = 0; + size_t block_unpredictable_count = 0; + float * cur_data_pos = data_pos; + for(size_t ii=0; iirealPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + index ++; + cur_data_pos ++; + } + /*dealing with the last jj (boundary)*/ + { + // jj == current_blockcount_y - 1 + size_t jj = current_blockcount_y - 1; + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; + diff = curData - pred; + itvNum = fabsf(diff)*recip_realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabsf(curData - pred)>realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj] = pred; + index ++; + cur_data_pos ++; + } + cur_data_pos += dim0_offset - current_blockcount_y; + } + /*dealing with the last ii (boundary)*/ + { + size_t ii = current_blockcount_x - 1; + for(size_t jj=0; jjrealPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + // assign value to next prediction buffer + next_pb_pos[jj] = pred; + index ++; + cur_data_pos ++; + } + /*dealing with the last jj (boundary)*/ + { + // jj == current_blockcount_y - 1 + size_t jj = current_blockcount_y - 1; + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; + diff = curData - pred; + itvNum = fabsf(diff)*recip_realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabsf(curData - pred)>realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj] = pred; + // assign value to next prediction buffer + next_pb_pos[jj] = pred; + + index ++; + cur_data_pos ++; + } + } // end ii == -1 + unpredictable_count = block_unpredictable_count; + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + reg_count ++; + }// end use_reg + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + float * cur_pb_pos = pb_pos; + float * cur_data_pos = data_pos; + float curData; + float pred2D; + float itvNum, diff; + size_t index = 0; + for(size_t ii=0; iitmp_realPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + cur_pb_pos += strip_dim0_offset - current_blockcount_y; + cur_data_pos += dim0_offset - current_blockcount_y; + } + /*dealing with the last ii (boundary)*/ + { + // ii == current_blockcount_x - 1 + for(size_t jj=0; jjtmp_realPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + next_pb_pos[jj] = *cur_pb_pos; + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + } + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + // change indicator + indicator_pos[j] = 1; + }// end SZ + reg_params_pos ++; + data_pos += current_blockcount_y; + pb_pos += current_blockcount_y; + next_pb_pos += current_blockcount_y; + type += current_blockcount_x * current_blockcount_y; + }// end j + indicator_pos += num_y; + float * tmp; + tmp = cur_pb_buf; + cur_pb_buf = next_pb_buf; + next_pb_buf = tmp; + }// end i + } + free(prediction_buffer_1); + free(prediction_buffer_2); + + int stateNum = 2*quantization_intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + size_t nodeCount = 0; + size_t i = 0; + init(huffmanTree, result_type, num_elements); + for (i = 0; i < stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + + unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength; + // total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements + unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(float) + sizeof(int) + sizeof(int) + 5*treeByteSize + 3*num_blocks*sizeof(int) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1); + unsigned char * result_pos = result; + initRandomAccessBytes(result_pos); + result_pos += meta_data_offset; + + sizeToBytes(result_pos, num_elements); + result_pos += exe_params->SZ_SIZE_TYPE; + + intToBytes_bigEndian(result_pos, block_size); + result_pos += sizeof(int); + floatToBytes(result_pos, realPrecision); + result_pos += sizeof(float); + intToBytes_bigEndian(result_pos, quantization_intervals); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + + memcpy(result_pos, &use_mean, sizeof(unsigned char)); + result_pos += sizeof(unsigned char); + memcpy(result_pos, &mean, sizeof(float)); + result_pos += sizeof(float); + + size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos); + result_pos += indicator_size; + + //convert the lead/mid/resi to byte stream + if(reg_count>0){ + for(int e=0; e<3; e++){ + int stateNum = 2*coeff_intvCapacity_sz; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + size_t nodeCount = 0; + init(huffmanTree, coeff_type[e], reg_count); + size_t i = 0; + for (i = 0; i < huffmanTree->stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + floatToBytes(result_pos, precision[e]); + result_pos += sizeof(float); + intToBytes_bigEndian(result_pos, coeff_intvRadius); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + size_t typeArray_size = 0; + encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size); + sizeToBytes(result_pos, typeArray_size); + result_pos += sizeof(size_t) + typeArray_size; + intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]); + result_pos += sizeof(int); + memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float)); + result_pos += coeff_unpredictable_count[e]*sizeof(float); + SZ_ReleaseHuffman(huffmanTree); + } + } + free(coeff_result_type); + free(coeff_unpredictable_data); + + //record the number of unpredictable data and also store them + memcpy(result_pos, &total_unpred, sizeof(size_t)); + result_pos += sizeof(size_t); + memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float)); + result_pos += total_unpred * sizeof(float); + size_t typeArray_size = 0; + encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size); + result_pos += typeArray_size; + +#ifdef HAVE_WRITESTATS + writeHuffmanInfo(treeByteSize, typeArray_size, num_elements*sizeof(float), nodeCount); + writeBlockInfo(use_mean, block_size, reg_count, num_blocks); + writeUnpredictDataCounts(total_unpred, num_elements); +#endif + + size_t totalEncodeSize = result_pos - result; + free(indicator); + free(result_unpredictable_data); + free(result_type); + free(reg_params); + + SZ_ReleaseHuffman(huffmanTree); + *comp_size = totalEncodeSize; + + return result; +} + + + +unsigned int optimize_intervals_float_3D_with_freq_and_dense_pos(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, float * dense_pos, float * max_freq, float * mean_freq) +{ + float mean = 0.0; + size_t len = r1 * r2 * r3; + size_t mean_distance = (int) (sqrt(len)); + float * data_pos = oriData; + size_t offset_count = 0; + size_t offset_count_2 = 0; + size_t mean_count = 0; + while(data_pos - oriData < len){ + mean += *data_pos; + mean_count ++; + data_pos += mean_distance; + offset_count += mean_distance; + offset_count_2 += mean_distance; + if(offset_count >= r3){ + offset_count = 0; + data_pos -= 1; + } + if(offset_count_2 >= r2 * r3){ + offset_count_2 = 0; + data_pos -= 1; + } + } + if(mean_count > 0) mean /= mean_count; + size_t range = 8192; + size_t radius = 4096; + size_t * freq_intervals = (size_t *) malloc(range*sizeof(size_t)); + memset(freq_intervals, 0, range*sizeof(size_t)); + + unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius; + int sampleDistance = confparams_cpr->sampleDistance; + float predThreshold = confparams_cpr->predThreshold; + + size_t i; + size_t radiusIndex; + size_t r23=r2*r3; + float pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, maxRangeRadius*sizeof(size_t)); + + float mean_diff; + ptrdiff_t freq_index; + size_t freq_count = 0; + size_t sample_count = 0; + + offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset + data_pos = oriData + r23 + r3 + offset_count; + size_t n1_count = 1, n2_count = 1; // count i,j sum + + while(data_pos - oriData < len){ + + pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1]; + pred_err = fabs(pred_value - *data_pos); + if(pred_err < realPrecision) freq_count ++; + radiusIndex = (pred_err/realPrecision+1)/2; + if(radiusIndex>=maxRangeRadius) + { + radiusIndex = maxRangeRadius - 1; + } + intervals[radiusIndex]++; + + mean_diff = *data_pos - mean; + if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius; + else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius; + if(freq_index <= 0){ + freq_intervals[0] ++; + } + else if(freq_index >= range){ + freq_intervals[range - 1] ++; + } + else{ + freq_intervals[freq_index] ++; + } + offset_count += sampleDistance; + if(offset_count >= r3){ + n2_count ++; + if(n2_count == r2){ + n1_count ++; + n2_count = 1; + data_pos += r3; + } + offset_count_2 = (n1_count + n2_count) % sampleDistance; + data_pos += (r3 + sampleDistance - offset_count) + (sampleDistance - offset_count_2); + offset_count = (sampleDistance - offset_count_2); + if(offset_count == 0) offset_count ++; + } + else data_pos += sampleDistance; + sample_count ++; + } + *max_freq = freq_count * 1.0/ sample_count; + + //compute the appropriate number + size_t targetCount = sample_count*predThreshold; + size_t sum = 0; + for(i=0;itargetCount) + break; + } + if(i>=maxRangeRadius) + i = maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + // collect frequency + size_t max_sum = 0; + size_t max_index = 0; + size_t tmp_sum; + size_t * freq_pos = freq_intervals + 1; + for(size_t i=1; i max_sum){ + max_sum = tmp_sum; + max_index = i; + } + freq_pos ++; + } + *dense_pos = mean + realPrecision * (ptrdiff_t)(max_index + 1 - radius); + *mean_freq = max_sum * 1.0 / sample_count; + + free(freq_intervals); + free(intervals); + return powerOf2; +} + + +// 3D: modified for higher performance +unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(float *oriData, size_t r1, size_t r2, size_t r3, float realPrecision, size_t * comp_size){ + +#ifdef HAVE_TIMECMPR + float* decData = NULL; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData = (float*)(multisteps->hist_data); +#endif + + float recip_realPrecision = 1/realPrecision; + //printf("recip_realPrecision = %.20G\n", recip_realPrecision); + + unsigned int quantization_intervals; + float sz_sample_correct_freq = -1;//0.5; //-1 + float dense_pos; + float mean_flush_freq; + unsigned char use_mean = 0; + + // calculate block dims + size_t num_x, num_y, num_z; + size_t block_size = 6; + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size); + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size); + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r3, num_z, block_size); + + //printf("num_x=%zu, num_y=%zu, num_z=%zu\n", num_x, num_y, num_z); + + size_t split_index_x, split_index_y, split_index_z; + size_t early_blockcount_x, early_blockcount_y, early_blockcount_z; + size_t late_blockcount_x, late_blockcount_y, late_blockcount_z; + SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); + SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); + SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z); + + size_t max_num_block_elements = early_blockcount_x * early_blockcount_y * early_blockcount_z; + size_t num_blocks = num_x * num_y * num_z; + size_t num_elements = r1 * r2 * r3; + + size_t dim0_offset = r2 * r3; + size_t dim1_offset = r3; + + int * result_type = (int *) malloc(num_elements * sizeof(int)); + memset(result_type, 0, num_elements*sizeof(int)); + size_t unpred_data_max_size = max_num_block_elements; + float * result_unpredictable_data = (float *) malloc(unpred_data_max_size * sizeof(float) * num_blocks); + size_t total_unpred = 0; + size_t unpredictable_count; + size_t max_unpred_count = 0; + float * data_pos = oriData; + int * type = result_type; + size_t type_offset; + size_t offset_x, offset_y, offset_z; + size_t current_blockcount_x, current_blockcount_y, current_blockcount_z; + + float * reg_params = (float *) malloc(num_blocks * 4 * sizeof(float)); + float * reg_params_pos = reg_params; + // move regression part out + size_t params_offset_b = num_blocks; + size_t params_offset_c = 2*num_blocks; + size_t params_offset_d = 3*num_blocks; + for(size_t i=0; ioptQuantMode==1) + { + quantization_intervals = optimize_intervals_float_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq); + if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1; + updateQuantizationInfo(quantization_intervals); + } + else{ + quantization_intervals = exe_params->intvCapacity; + } + + float mean = 0; + if(use_mean){ + // compute mean + float sum = 0.0; + size_t mean_count = 0; + for(size_t i=0; i 0) mean = sum / mean_count; + } + + float tmp_realPrecision = realPrecision; + + // use two prediction buffers for higher performance + float * unpredictable_data = result_unpredictable_data; + unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char)); + memset(indicator, 0, num_blocks * sizeof(unsigned char)); + size_t reg_count = 0; + size_t strip_dim_0 = early_blockcount_x + 1; + size_t strip_dim_1 = r2 + 1; + size_t strip_dim_2 = r3 + 1; + size_t strip_dim0_offset = strip_dim_1 * strip_dim_2; + size_t strip_dim1_offset = strip_dim_2; + unsigned char * indicator_pos = indicator; + + size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(float); + float * prediction_buffer_1 = (float *) malloc(prediction_buffer_size); + memset(prediction_buffer_1, 0, prediction_buffer_size); + float * prediction_buffer_2 = (float *) malloc(prediction_buffer_size); + memset(prediction_buffer_2, 0, prediction_buffer_size); + float * cur_pb_buf = prediction_buffer_1; + float * next_pb_buf = prediction_buffer_2; + float * cur_pb_buf_pos; + float * next_pb_buf_pos; + int intvCapacity = quantization_intervals;// exe_params->intvCapacity; + int intvRadius = intvCapacity/2; //exe_params->intvRadius; + int use_reg = 0; + float noise = realPrecision * 1.22; + + reg_params_pos = reg_params; + // compress the regression coefficients on the fly + float last_coeffcients[4] = {0.0}; + int coeff_intvCapacity_sz = 65536; + int coeff_intvRadius = coeff_intvCapacity_sz / 2; + int * coeff_type[4]; + int * coeff_result_type = (int *) malloc(num_blocks*4*sizeof(int)); + float * coeff_unpred_data[4]; + float * coeff_unpredictable_data = (float *) malloc(num_blocks*4*sizeof(float)); + float precision[4], recip_precision[4]; + precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d; + recip_precision[0] = 1/precision_a, recip_precision[1] = 1/precision_b, recip_precision[2] = 1/precision_c, recip_precision[3] = 1/precision_d; + + for(int i=0; i<4; i++){ + coeff_type[i] = coeff_result_type + i * num_blocks; + coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks; + } + int coeff_index = 0; + unsigned int coeff_unpredictable_count[4] = {0}; + + if(use_mean){ + int intvCapacity_sz = intvCapacity - 2; + for(size_t i=0; iprecision[e]){ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + else{ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + coeff_index ++; + } + float curData; + float pred; + float itvNum; + float diff; + size_t index = 0; + size_t block_unpredictable_count = 0; + float * cur_data_pos = data_pos; + for(size_t ii=0; iitmp_realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + +#ifdef HAVE_TIMECMPR + size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[block_offset + point_offset] = pred; +#endif + + if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; + } + index ++; + cur_data_pos ++; + } + cur_data_pos += dim1_offset - current_blockcount_z; + } + cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + /*dealing with the last ii (boundary)*/ + { + // ii == current_blockcount_x - 1 + size_t ii = current_blockcount_x - 1; + for(size_t jj=0; jjrealPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + +#ifdef HAVE_TIMECMPR + size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[block_offset + point_offset] = pred; +#endif + + if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; + } + // assign value to next prediction buffer + next_pb_pos[jj * strip_dim1_offset + kk] = pred; + index ++; + cur_data_pos ++; + } + cur_data_pos += dim1_offset - current_blockcount_z; + } + } + unpredictable_count = block_unpredictable_count; + strip_unpredictable_count += unpredictable_count; + unpredictable_data += unpredictable_count; + + reg_count ++; + } + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + float * cur_pb_pos = pb_pos; + float * cur_data_pos = data_pos; + float curData; + float pred3D; + float itvNum, diff; + size_t index = 0; + for(size_t ii=0; iirealPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } +#ifdef HAVE_TIMECMPR + size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[block_offset + point_offset] = *cur_pb_pos; +#endif + + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + cur_pb_pos += strip_dim1_offset - current_blockcount_z; + cur_data_pos += dim1_offset - current_blockcount_z; + } + cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset; + cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + /*dealing with the last ii (boundary)*/ + { + // ii == current_blockcount_x - 1 + for(size_t jj=0; jjrealPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } +#ifdef HAVE_TIMECMPR + size_t ii = current_blockcount_x - 1; + size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[block_offset + point_offset] = *cur_pb_pos; +#endif + + next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos; + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + cur_pb_pos += strip_dim1_offset - current_blockcount_z; + cur_data_pos += dim1_offset - current_blockcount_z; + } + } + strip_unpredictable_count += unpredictable_count; + unpredictable_data += unpredictable_count; + // change indicator + indicator_pos[k] = 1; + }// end SZ + + reg_params_pos ++; + data_pos += current_blockcount_z; + pb_pos += current_blockcount_z; + next_pb_pos += current_blockcount_z; + type += current_blockcount_x * current_blockcount_y * current_blockcount_z; + + } // end k + + if(strip_unpredictable_count > max_unpred_count){ + max_unpred_count = strip_unpredictable_count; + } + total_unpred += strip_unpredictable_count; + indicator_pos += num_z; + }// end j + float * tmp; + tmp = cur_pb_buf; + cur_pb_buf = next_pb_buf; + next_pb_buf = tmp; + }// end i + } + else{ + int intvCapacity_sz = intvCapacity - 2; + for(size_t i=0; iprecision[e]){ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + else{ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + coeff_index ++; + } + float curData; + float pred; + float itvNum; + float diff; + size_t index = 0; + size_t block_unpredictable_count = 0; + float * cur_data_pos = data_pos; + for(size_t ii=0; iirealPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + +#ifdef HAVE_TIMECMPR + size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[block_offset + point_offset] = pred; +#endif + + + if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; + } + index ++; + cur_data_pos ++; + } + cur_data_pos += dim1_offset - current_blockcount_z; + } + cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + /*dealing with the last ii (boundary)*/ + { + // ii == current_blockcount_x - 1 + size_t ii = current_blockcount_x - 1; + for(size_t jj=0; jjrealPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + +#ifdef HAVE_TIMECMPR + size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[block_offset + point_offset] = pred; +#endif + + if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; + } + // assign value to next prediction buffer + next_pb_pos[jj * strip_dim1_offset + kk] = pred; + index ++; + cur_data_pos ++; + } + cur_data_pos += dim1_offset - current_blockcount_z; + } + } + unpredictable_count = block_unpredictable_count; + strip_unpredictable_count += unpredictable_count; + unpredictable_data += unpredictable_count; + reg_count ++; + } + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + float * cur_pb_pos = pb_pos; + float * cur_data_pos = data_pos; + float curData; + float pred3D; + float itvNum, diff; + size_t index = 0; + for(size_t ii=0; iirealPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + +#ifdef HAVE_TIMECMPR + size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[block_offset + point_offset] = *cur_pb_pos; +#endif + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + cur_pb_pos += strip_dim1_offset - current_blockcount_z; + cur_data_pos += dim1_offset - current_blockcount_z; + } + cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset; + cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + /*dealing with the last ii (boundary)*/ + { + // ii == current_blockcount_x - 1 + for(size_t jj=0; jjrealPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + +#ifdef HAVE_TIMECMPR + size_t ii = current_blockcount_x - 1; + size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[block_offset + point_offset] = *cur_pb_pos; +#endif + + // assign value to next prediction buffer + next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos; + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + cur_pb_pos += strip_dim1_offset - current_blockcount_z; + cur_data_pos += dim1_offset - current_blockcount_z; + } + } + strip_unpredictable_count += unpredictable_count; + unpredictable_data += unpredictable_count; + // change indicator + indicator_pos[k] = 1; + }// end SZ + + reg_params_pos ++; + data_pos += current_blockcount_z; + pb_pos += current_blockcount_z; + next_pb_pos += current_blockcount_z; + type += current_blockcount_x * current_blockcount_y * current_blockcount_z; + + } + + if(strip_unpredictable_count > max_unpred_count){ + max_unpred_count = strip_unpredictable_count; + } + total_unpred += strip_unpredictable_count; + indicator_pos += num_z; + } + float * tmp; + tmp = cur_pb_buf; + cur_pb_buf = next_pb_buf; + next_pb_buf = tmp; + } + } + + free(prediction_buffer_1); + free(prediction_buffer_2); + + int stateNum = 2*quantization_intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + size_t nodeCount = 0; + init(huffmanTree, result_type, num_elements); + size_t i = 0; + for (i = 0; i < huffmanTree->stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + + unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength; + // total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements + unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(float) + sizeof(int) + sizeof(int) + 5*treeByteSize + 4*num_blocks*sizeof(int) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1); + unsigned char * result_pos = result; + initRandomAccessBytes(result_pos); + + result_pos += meta_data_offset; + + sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8 + result_pos += exe_params->SZ_SIZE_TYPE; + + intToBytes_bigEndian(result_pos, block_size); + result_pos += sizeof(int); + floatToBytes(result_pos, realPrecision); + result_pos += sizeof(float); + intToBytes_bigEndian(result_pos, quantization_intervals); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + + memcpy(result_pos, &use_mean, sizeof(unsigned char)); + result_pos += sizeof(unsigned char); + memcpy(result_pos, &mean, sizeof(float)); + result_pos += sizeof(float); + + size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos); + result_pos += indicator_size; + + //convert the lead/mid/resi to byte stream + + //printf("reg_count = %d, num_blocks = %d\n", reg_count, num_blocks); + if(reg_count > 0){ + for(int e=0; e<4; e++){ + int stateNum = 2*coeff_intvCapacity_sz; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + size_t nodeCount = 0; + init(huffmanTree, coeff_type[e], reg_count); + size_t i = 0; + for (i = 0; i < huffmanTree->stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + floatToBytes(result_pos, precision[e]); + result_pos += sizeof(float); + intToBytes_bigEndian(result_pos, coeff_intvRadius); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + size_t typeArray_size = 0; + encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size); + sizeToBytes(result_pos, typeArray_size); + result_pos += sizeof(size_t) + typeArray_size; + intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]); + result_pos += sizeof(int); + memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float)); + result_pos += coeff_unpredictable_count[e]*sizeof(float); + SZ_ReleaseHuffman(huffmanTree); + } + } + free(coeff_result_type); + free(coeff_unpredictable_data); + + //record the number of unpredictable data and also store them + memcpy(result_pos, &total_unpred, sizeof(size_t)); + result_pos += sizeof(size_t); + memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float)); + result_pos += total_unpred * sizeof(float); + size_t typeArray_size = 0; + encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size); + result_pos += typeArray_size; + size_t totalEncodeSize = result_pos - result; + free(indicator); + free(result_unpredictable_data); + free(result_type); + free(reg_params); + +#ifdef HAVE_WRITESTATS + writeHuffmanInfo(treeByteSize, typeArray_size, num_elements*sizeof(float), nodeCount); + writeBlockInfo(use_mean, block_size, reg_count, num_blocks); + writeUnpredictDataCounts(total_unpred, num_elements); +#endif + + SZ_ReleaseHuffman(huffmanTree); + *comp_size = totalEncodeSize; + return result; +} + + +unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size){ + + unsigned int quantization_intervals; + float sz_sample_correct_freq = -1;//0.5; //-1 + float dense_pos; + float mean_flush_freq; + unsigned char use_mean = 0; + + // calculate block dims + size_t num_x, num_y, num_z; + size_t block_size = 6; + num_x = (r1 - 1) / block_size + 1; + num_y = (r2 - 1) / block_size + 1; + num_z = (r3 - 1) / block_size + 1; + + size_t max_num_block_elements = block_size * block_size * block_size; + size_t num_blocks = num_x * num_y * num_z; + size_t num_elements = r1 * r2 * r3; + + size_t dim0_offset = r2 * r3; + size_t dim1_offset = r3; + + int * result_type = (int *) malloc(num_blocks*max_num_block_elements * sizeof(int)); + size_t unpred_data_max_size = max_num_block_elements; + float * result_unpredictable_data = (float *) malloc(unpred_data_max_size * sizeof(float) * num_blocks); + size_t total_unpred = 0; + size_t unpredictable_count; + float * data_pos = oriData; + int * type = result_type; + float * reg_params = (float *) malloc(num_blocks * 4 * sizeof(float)); + float * reg_params_pos = reg_params; + // move regression part out + size_t params_offset_b = num_blocks; + size_t params_offset_c = 2*num_blocks; + size_t params_offset_d = 3*num_blocks; + float * pred_buffer = (float *) malloc((block_size+1)*(block_size+1)*(block_size+1)*sizeof(float)); + float * pred_buffer_pos = NULL; + float * block_data_pos_x = NULL; + float * block_data_pos_y = NULL; + float * block_data_pos_z = NULL; + for(size_t i=0; ioptQuantMode==1) + { + quantization_intervals = optimize_intervals_float_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq); + if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1; + updateQuantizationInfo(quantization_intervals); + } + else{ + quantization_intervals = exe_params->intvCapacity; + } + + float mean = 0; + if(use_mean){ + // compute mean + double sum = 0.0; + size_t mean_count = 0; + for(size_t i=0; i 0) mean = sum / mean_count; + } + + double tmp_realPrecision = realPrecision; + + // use two prediction buffers for higher performance + float * unpredictable_data = result_unpredictable_data; + unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char)); + memset(indicator, 0, num_blocks * sizeof(unsigned char)); + size_t reg_count = 0; + unsigned char * indicator_pos = indicator; + + int intvCapacity = quantization_intervals; //exe_params->intvCapacity; + int intvRadius = intvCapacity/2; //exe_params->intvRadius; + int use_reg = 0; + float noise = realPrecision * 1.22; + + reg_params_pos = reg_params; + // compress the regression coefficients on the fly + float last_coeffcients[4] = {0.0}; + int coeff_intvCapacity_sz = 65536; + int coeff_intvRadius = coeff_intvCapacity_sz / 2; + int * coeff_type[4]; + int * coeff_result_type = (int *) malloc(num_blocks*4*sizeof(int)); + float * coeff_unpred_data[4]; + float * coeff_unpredictable_data = (float *) malloc(num_blocks*4*sizeof(float)); + double precision[4]; + precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d; + for(int i=0; i<4; i++){ + coeff_type[i] = coeff_result_type + i * num_blocks; + coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks; + } + int coeff_index = 0; + unsigned int coeff_unpredictable_count[4] = {0}; + + memset(pred_buffer, 0, (block_size+1)*(block_size+1)*(block_size+1)*sizeof(float)); + int pred_buffer_block_size = block_size + 1; + int strip_dim0_offset = pred_buffer_block_size * pred_buffer_block_size; + int strip_dim1_offset = pred_buffer_block_size; + + if(use_mean){ + int intvCapacity_sz = intvCapacity - 2; + type = result_type; + for(size_t i=0; iprecision[e]){ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + else{ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + coeff_index ++; + } + float curData; + float pred; + double itvNum; + double diff; + size_t index = 0; + size_t block_unpredictable_count = 0; + float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1; + for(size_t ii=0; iitmp_realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + index ++; + cur_data_pos ++; + } + cur_data_pos ++; + } + cur_data_pos += pred_buffer_block_size; + } + + total_unpred += block_unpredictable_count; + unpredictable_data += block_unpredictable_count; + reg_count ++; + } + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1; + float curData; + float pred3D; + double itvNum, diff; + size_t index = 0; + for(size_t ii=0; iitmp_realPrecision){ + type[index] = 0; + *cur_data_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_data_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + index ++; + cur_data_pos ++; + } + cur_data_pos ++; + } + cur_data_pos += pred_buffer_block_size; + } + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + // change indicator + indicator_pos[k] = 1; + }// end SZ + reg_params_pos ++; + type += block_size * block_size * block_size; + } // end k + indicator_pos += num_z; + }// end j + }// end i + } + else{ + int intvCapacity_sz = intvCapacity - 2; + type = result_type; + for(size_t i=0; iprecision[e]){ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + else{ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + coeff_index ++; + } + float curData; + float pred; + double itvNum; + double diff; + size_t index = 0; + size_t block_unpredictable_count = 0; + float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1; + for(size_t ii=0; iitmp_realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + index ++; + cur_data_pos ++; + } + cur_data_pos ++; + } + cur_data_pos += pred_buffer_block_size; + } + total_unpred += block_unpredictable_count; + unpredictable_data += block_unpredictable_count; + reg_count ++; + } + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1; + float curData; + float pred3D; + double itvNum, diff; + size_t index = 0; + for(size_t ii=0; iitmp_realPrecision){ + type[index] = 0; + *cur_data_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_data_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + index ++; + cur_data_pos ++; + } + cur_data_pos ++; + } + cur_data_pos += pred_buffer_block_size; + } + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + // change indicator + indicator_pos[k] = 1; + }// end SZ + reg_params_pos ++; + type += block_size * block_size * block_size; + } + indicator_pos += num_z; + } + } + } + free(pred_buffer); + int stateNum = 2*quantization_intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + size_t nodeCount = 0; + init(huffmanTree, result_type, num_blocks*max_num_block_elements); + size_t i = 0; + for (i = 0; i < huffmanTree->stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + + unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength; + // total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements + unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + 5*treeByteSize + 4*num_blocks*sizeof(int)+ num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1); + unsigned char * result_pos = result; + initRandomAccessBytes(result_pos); + + result_pos += meta_data_offset; + + sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8 + result_pos += exe_params->SZ_SIZE_TYPE; + + intToBytes_bigEndian(result_pos, block_size); + result_pos += sizeof(int); + doubleToBytes(result_pos, realPrecision); + result_pos += sizeof(double); + intToBytes_bigEndian(result_pos, quantization_intervals); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + + memcpy(result_pos, &use_mean, sizeof(unsigned char)); + result_pos += sizeof(unsigned char); + memcpy(result_pos, &mean, sizeof(float)); + result_pos += sizeof(float); + size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos); + result_pos += indicator_size; + + //convert the lead/mid/resi to byte stream + if(reg_count > 0){ + for(int e=0; e<4; e++){ + int stateNum = 2*coeff_intvCapacity_sz; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + size_t nodeCount = 0; + init(huffmanTree, coeff_type[e], reg_count); + size_t i = 0; + for (i = 0; i < huffmanTree->stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + doubleToBytes(result_pos, precision[e]); + result_pos += sizeof(double); + intToBytes_bigEndian(result_pos, coeff_intvRadius); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + size_t typeArray_size = 0; + encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size); + sizeToBytes(result_pos, typeArray_size); + result_pos += sizeof(size_t) + typeArray_size; + intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]); + result_pos += sizeof(int); + memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float)); + result_pos += coeff_unpredictable_count[e]*sizeof(float); + SZ_ReleaseHuffman(huffmanTree); + } + } + free(coeff_result_type); + free(coeff_unpredictable_data); + + //record the number of unpredictable data and also store them + memcpy(result_pos, &total_unpred, sizeof(size_t)); + result_pos += sizeof(size_t); + memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float)); + result_pos += total_unpred * sizeof(float); + size_t typeArray_size = 0; + encode(huffmanTree, result_type, num_blocks*max_num_block_elements, result_pos, &typeArray_size); + result_pos += typeArray_size; + size_t totalEncodeSize = result_pos - result; + free(indicator); + free(result_unpredictable_data); + free(result_type); + free(reg_params); + + + SZ_ReleaseHuffman(huffmanTree); + *comp_size = totalEncodeSize; + return result; +} + +// random access +unsigned char * SZ_compress_float_1D_MDQ_decompression_random_access_with_blocked_regression(float *oriData, size_t r1, double realPrecision, size_t * comp_size){ + + unsigned int quantization_intervals; + float sz_sample_correct_freq = -1;//0.5; //-1 + float dense_pos; + float mean_flush_freq; + unsigned char use_mean = 0; + + // calculate block dims + size_t num_x; + size_t block_size = 256; + num_x = (r1 - 1) / block_size + 1; + + size_t max_num_block_elements = block_size; + size_t num_blocks = num_x; + size_t num_elements = r1; + + int * result_type = (int *) malloc(num_blocks*max_num_block_elements * sizeof(int)); + size_t unpred_data_max_size = max_num_block_elements; + float * result_unpredictable_data = (float *) malloc(unpred_data_max_size * sizeof(float) * num_blocks); + size_t total_unpred = 0; + size_t unpredictable_count; + float * data_pos = oriData; + int * type = result_type; + float * reg_params = (float *) malloc(num_blocks * 2 * sizeof(float)); + float * reg_params_pos = reg_params; + // move regression part out + size_t params_offset_b = num_blocks; + float * pred_buffer = (float *) malloc((block_size+1)*sizeof(float)); + float * pred_buffer_pos = NULL; + float * block_data_pos_x = NULL; + for(size_t i=0; ioptQuantMode==1) + { + quantization_intervals = optimize_intervals_float_1D_with_freq_and_dense_pos(oriData, r1, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq); + if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1; + updateQuantizationInfo(quantization_intervals); + } + else{ + quantization_intervals = exe_params->intvCapacity; + } + + float mean = 0; + if(use_mean){ + // compute mean + double sum = 0.0; + size_t mean_count = 0; + for(size_t i=0; i 0) mean = sum / mean_count; + } + + double tmp_realPrecision = realPrecision; + + // use two prediction buffers for higher performance + float * unpredictable_data = result_unpredictable_data; + unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char)); + memset(indicator, 0, num_blocks * sizeof(unsigned char)); + unsigned char * indicator_pos = indicator; + + int intvCapacity = quantization_intervals; //exe_params->intvCapacity; + int intvRadius = intvCapacity/2; //exe_params->intvRadius; + float noise = realPrecision * 0.5; + reg_params_pos = reg_params; + + memset(pred_buffer, 0, (block_size+1)*sizeof(float)); + // select + int sample_distance = sqrt(block_size) + 1; + if(use_mean){ + for(size_t i=0; iprecision[e]){ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + else{ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + reg_params_separte[e][i] = last_coeffcients[e]; + } + coeff_index ++; + } + // pred & quantization + int * blockwise_unpred_count = (int *) malloc(num_blocks * sizeof(int)); + int * blockwise_unpred_count_pos = blockwise_unpred_count; + reg_params_pos = reg_params; + indicator_pos = indicator; + if(use_mean){ + int intvCapacity_sz = intvCapacity - 2; + type = result_type; + for(size_t i=0; itmp_realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + index ++; + cur_data_pos ++; + } + reg_params_pos ++; + total_unpred += block_unpredictable_count; + unpredictable_data += block_unpredictable_count; + *blockwise_unpred_count_pos = block_unpredictable_count; + } + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + float * cur_data_pos = pred_buffer + 1; + float curData; + float pred3D; + double itvNum, diff; + size_t index = 0; + for(size_t ii=0; iitmp_realPrecision){ + type[index] = 0; + *cur_data_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_data_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + index ++; + cur_data_pos ++; + } + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + *blockwise_unpred_count_pos = unpredictable_count; + }// end SZ + blockwise_unpred_count_pos ++; + type += block_size; + indicator_pos ++; + }// end i + } + else{ + int intvCapacity_sz = intvCapacity; + type = result_type; + for(size_t i=0; itmp_realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + index ++; + cur_data_pos ++; + } + reg_params_pos ++; + total_unpred += block_unpredictable_count; + unpredictable_data += block_unpredictable_count; + *blockwise_unpred_count_pos = block_unpredictable_count; + } + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + float * cur_data_pos = pred_buffer + 1; + float curData; + float pred3D; + double itvNum, diff; + size_t index = 0; + for(size_t ii=0; iitmp_realPrecision){ + type[index] = 0; + *cur_data_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_data_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + index ++; + cur_data_pos ++; + } + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + *blockwise_unpred_count_pos = unpredictable_count; + }// end SZ + blockwise_unpred_count_pos ++; + type += block_size; + indicator_pos ++; + }// end i + } + free(pred_buffer); + int stateNum = 2*quantization_intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + size_t nodeCount = 0; + init(huffmanTree, result_type, num_blocks*max_num_block_elements); + size_t i = 0; + for (i = 0; i < huffmanTree->stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + + unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength; + // total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements + unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + 5*treeByteSize +4*num_blocks*sizeof(int) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1); + unsigned char * result_pos = result; + initRandomAccessBytes(result_pos); + + result_pos += meta_data_offset; + + sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8 + result_pos += exe_params->SZ_SIZE_TYPE; + + intToBytes_bigEndian(result_pos, block_size); + result_pos += sizeof(int); + doubleToBytes(result_pos, realPrecision); + result_pos += sizeof(double); + intToBytes_bigEndian(result_pos, quantization_intervals); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + + memcpy(result_pos, &use_mean, sizeof(unsigned char)); + result_pos += sizeof(unsigned char); + memcpy(result_pos, &mean, sizeof(float)); + result_pos += sizeof(float); + size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos); + result_pos += indicator_size; + + //convert the lead/mid/resi to byte stream + if(reg_count > 0){ + for(int e=0; e<2; e++){ + int stateNum = 2*coeff_intvCapacity_sz; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + size_t nodeCount = 0; + init(huffmanTree, coeff_type[e], reg_count); + size_t i = 0; + for (i = 0; i < huffmanTree->stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + doubleToBytes(result_pos, precision[e]); + result_pos += sizeof(double); + intToBytes_bigEndian(result_pos, coeff_intvRadius); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + size_t typeArray_size = 0; + encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size); + sizeToBytes(result_pos, typeArray_size); + result_pos += sizeof(size_t) + typeArray_size; + intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]); + result_pos += sizeof(int); + memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float)); + result_pos += coeff_unpredictable_count[e]*sizeof(float); + SZ_ReleaseHuffman(huffmanTree); + } + } + free(coeff_result_type); + free(coeff_unpredictable_data); + + //record the number of unpredictable data and also store them + memcpy(result_pos, &total_unpred, sizeof(size_t)); + result_pos += sizeof(size_t); + // record blockwise unpred data + size_t compressed_blockwise_unpred_count_size; + unsigned char * compressed_bw_unpred_count = SZ_compress_args(SZ_INT32, blockwise_unpred_count, &compressed_blockwise_unpred_count_size, ABS, 0.5, 0, 0, 0, 0, 0, 0, num_blocks); + memcpy(result_pos, &compressed_blockwise_unpred_count_size, sizeof(size_t)); + result_pos += sizeof(size_t); + memcpy(result_pos, compressed_bw_unpred_count, compressed_blockwise_unpred_count_size); + result_pos += compressed_blockwise_unpred_count_size; + free(blockwise_unpred_count); + free(compressed_bw_unpred_count); + memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float)); + result_pos += total_unpred * sizeof(float); + + free(reg_params); + free(indicator); + free(result_unpredictable_data); + // encode type array by block + type = result_type; + size_t total_type_array_size = 0; + unsigned char * type_array_buffer = (unsigned char *) malloc(num_blocks*max_num_block_elements*sizeof(int)); + unsigned short * type_array_block_size = (unsigned short *) malloc(num_blocks*sizeof(unsigned short)); + unsigned char * type_array_buffer_pos = type_array_buffer; + unsigned short * type_array_block_size_pos = type_array_block_size; + + for(size_t i=0; ioptQuantMode==1) + { + quantization_intervals = optimize_intervals_float_2D_with_freq_and_dense_pos(oriData, r1, r2, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq); + if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1; + updateQuantizationInfo(quantization_intervals); + } + else{ + quantization_intervals = exe_params->intvCapacity; + } + + float mean = 0; + if(use_mean){ + // compute mean + double sum = 0.0; + size_t mean_count = 0; + for(size_t i=0; i 0) mean = sum / mean_count; + } + + double tmp_realPrecision = realPrecision; + + // use two prediction buffers for higher performance + float * unpredictable_data = result_unpredictable_data; + unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char)); + memset(indicator, 0, num_blocks * sizeof(unsigned char)); + unsigned char * indicator_pos = indicator; + + int intvCapacity = quantization_intervals; //exe_params->intvCapacity; + int intvRadius = intvCapacity/2; //exe_params->intvRadius; + float noise = realPrecision * 0.81; + reg_params_pos = reg_params; + + memset(pred_buffer, 0, (block_size+1)*(block_size+1)*sizeof(float)); + int pred_buffer_block_size = block_size + 1; + int strip_dim0_offset = pred_buffer_block_size; + + // select + if(use_mean){ + for(size_t i=0; iprecision[e]){ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + else{ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + reg_params_separte[e][i] = last_coeffcients[e]; + } + coeff_index ++; + } + // pred & quantization + int * blockwise_unpred_count = (int *) malloc(num_blocks * sizeof(int)); + int * blockwise_unpred_count_pos = blockwise_unpred_count; + reg_params_pos = reg_params; + indicator_pos = indicator; + if(use_mean){ + int intvCapacity_sz = intvCapacity - 2; + type = result_type; + for(size_t i=0; itmp_realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + index ++; + cur_data_pos ++; + } + cur_data_pos ++; + } + reg_params_pos ++; + total_unpred += block_unpredictable_count; + unpredictable_data += block_unpredictable_count; + *blockwise_unpred_count_pos = block_unpredictable_count; + } + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + float * cur_data_pos = pred_buffer + pred_buffer_block_size + 1; + float curData; + float pred3D; + double itvNum, diff; + size_t index = 0; + for(size_t ii=0; iitmp_realPrecision){ + type[index] = 0; + *cur_data_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_data_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + index ++; + cur_data_pos ++; + } + cur_data_pos ++; + } + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + *blockwise_unpred_count_pos = unpredictable_count; + }// end SZ + blockwise_unpred_count_pos ++; + type += block_size * block_size; + indicator_pos ++; + }// end j + }// end i + } + else{ + int intvCapacity_sz = intvCapacity; + type = result_type; + for(size_t i=0; itmp_realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + index ++; + cur_data_pos ++; + } + cur_data_pos ++; + } + reg_params_pos ++; + total_unpred += block_unpredictable_count; + unpredictable_data += block_unpredictable_count; + *blockwise_unpred_count_pos = block_unpredictable_count; + } + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + float * cur_data_pos = pred_buffer + pred_buffer_block_size + 1; + float curData; + float pred3D; + double itvNum, diff; + size_t index = 0; + for(size_t ii=0; iitmp_realPrecision){ + type[index] = 0; + *cur_data_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_data_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + index ++; + cur_data_pos ++; + } + cur_data_pos ++; + } + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + *blockwise_unpred_count_pos = unpredictable_count; + }// end SZ + blockwise_unpred_count_pos ++; + type += block_size * block_size; + indicator_pos ++; + }// end j + }// end i + } + + free(pred_buffer); + int stateNum = 2*quantization_intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + size_t nodeCount = 0; + init(huffmanTree, result_type, num_blocks*max_num_block_elements); + size_t i = 0; + for (i = 0; i < huffmanTree->stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + + unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength; + // total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements + unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + 5*treeByteSize + 4*num_blocks*sizeof(int) +num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1); + unsigned char * result_pos = result; + initRandomAccessBytes(result_pos); + + result_pos += meta_data_offset; + + sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8 + result_pos += exe_params->SZ_SIZE_TYPE; + + intToBytes_bigEndian(result_pos, block_size); + result_pos += sizeof(int); + doubleToBytes(result_pos, realPrecision); + result_pos += sizeof(double); + intToBytes_bigEndian(result_pos, quantization_intervals); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + + memcpy(result_pos, &use_mean, sizeof(unsigned char)); + result_pos += sizeof(unsigned char); + memcpy(result_pos, &mean, sizeof(float)); + result_pos += sizeof(float); + size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos); + result_pos += indicator_size; + + //convert the lead/mid/resi to byte stream + if(reg_count > 0){ + for(int e=0; e<3; e++){ + int stateNum = 2*coeff_intvCapacity_sz; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + size_t nodeCount = 0; + init(huffmanTree, coeff_type[e], reg_count); + size_t i = 0; + for (i = 0; i < huffmanTree->stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + doubleToBytes(result_pos, precision[e]); + result_pos += sizeof(double); + intToBytes_bigEndian(result_pos, coeff_intvRadius); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + size_t typeArray_size = 0; + encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size); + sizeToBytes(result_pos, typeArray_size); + result_pos += sizeof(size_t) + typeArray_size; + intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]); + result_pos += sizeof(int); + memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float)); + result_pos += coeff_unpredictable_count[e]*sizeof(float); + SZ_ReleaseHuffman(huffmanTree); + } + } + free(coeff_result_type); + free(coeff_unpredictable_data); + + //record the number of unpredictable data and also store them + memcpy(result_pos, &total_unpred, sizeof(size_t)); + result_pos += sizeof(size_t); + // record blockwise unpred data + size_t compressed_blockwise_unpred_count_size; + unsigned char * compressed_bw_unpred_count = SZ_compress_args(SZ_INT32, blockwise_unpred_count, &compressed_blockwise_unpred_count_size, ABS, 0.5, 0, 0, 0, 0, 0, 0, num_blocks); + memcpy(result_pos, &compressed_blockwise_unpred_count_size, sizeof(size_t)); + result_pos += sizeof(size_t); + memcpy(result_pos, compressed_bw_unpred_count, compressed_blockwise_unpred_count_size); + result_pos += compressed_blockwise_unpred_count_size; + free(blockwise_unpred_count); + free(compressed_bw_unpred_count); + memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float)); + result_pos += total_unpred * sizeof(float); + + free(reg_params); + free(indicator); + free(result_unpredictable_data); + // encode type array by block + type = result_type; + size_t total_type_array_size = 0; + unsigned char * type_array_buffer = (unsigned char *) malloc(num_blocks*max_num_block_elements*sizeof(int)); + unsigned short * type_array_block_size = (unsigned short *) malloc(num_blocks*sizeof(unsigned short)); + unsigned char * type_array_buffer_pos = type_array_buffer; + unsigned short * type_array_block_size_pos = type_array_block_size; + + for(size_t i=0; ioptQuantMode==1) + { + quantization_intervals = optimize_intervals_float_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq); + if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1; + updateQuantizationInfo(quantization_intervals); + } + else{ + quantization_intervals = exe_params->intvCapacity; + } + + float mean = 0; + if(use_mean){ + // compute mean + double sum = 0.0; + size_t mean_count = 0; + for(size_t i=0; i 0) mean = sum / mean_count; + } + + double tmp_realPrecision = realPrecision; + + // use two prediction buffers for higher performance + float * unpredictable_data = result_unpredictable_data; + unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char)); + memset(indicator, 0, num_blocks * sizeof(unsigned char)); + unsigned char * indicator_pos = indicator; + + int intvCapacity = quantization_intervals; //exe_params->intvCapacity; + int intvRadius = intvCapacity/2; //exe_params->intvRadius; + float noise = realPrecision * 1.22; + reg_params_pos = reg_params; + + memset(pred_buffer, 0, (block_size+1)*(block_size+1)*(block_size+1)*sizeof(float)); + int pred_buffer_block_size = block_size + 1; + int strip_dim0_offset = pred_buffer_block_size * pred_buffer_block_size; + int strip_dim1_offset = pred_buffer_block_size; + + // select + if(use_mean){ + for(size_t i=0; iprecision[e]){ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + else{ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + reg_params_separte[e][i] = last_coeffcients[e]; + } + coeff_index ++; + } + // pred & quantization + int * blockwise_unpred_count = (int *) malloc(num_blocks * sizeof(int)); + int * blockwise_unpred_count_pos = blockwise_unpred_count; + reg_params_pos = reg_params; + indicator_pos = indicator; + if(use_mean){ + int intvCapacity_sz = intvCapacity - 2; + type = result_type; + for(size_t i=0; itmp_realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + index ++; + cur_data_pos ++; + } + cur_data_pos ++; + } + cur_data_pos += pred_buffer_block_size; + } + reg_params_pos ++; + total_unpred += block_unpredictable_count; + unpredictable_data += block_unpredictable_count; + *blockwise_unpred_count_pos = block_unpredictable_count; + } + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1; + float curData; + float pred3D; + double itvNum, diff; + size_t index = 0; + for(size_t ii=0; iitmp_realPrecision){ + type[index] = 0; + *cur_data_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_data_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + index ++; + cur_data_pos ++; + } + cur_data_pos ++; + } + cur_data_pos += pred_buffer_block_size; + } + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + *blockwise_unpred_count_pos = unpredictable_count; + }// end SZ + blockwise_unpred_count_pos ++; + type += block_size * block_size * block_size; + } // end k + indicator_pos += num_z; + }// end j + }// end i + } + else{ + int intvCapacity_sz = intvCapacity - 2; + type = result_type; + for(size_t i=0; itmp_realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + index ++; + cur_data_pos ++; + } + cur_data_pos ++; + } + cur_data_pos += pred_buffer_block_size; + } + reg_params_pos ++; + total_unpred += block_unpredictable_count; + unpredictable_data += block_unpredictable_count; + *blockwise_unpred_count_pos = block_unpredictable_count; + } + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1; + float curData; + float pred3D; + double itvNum, diff; + size_t index = 0; + for(size_t ii=0; iitmp_realPrecision){ + type[index] = 0; + *cur_data_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_data_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + index ++; + cur_data_pos ++; + } + cur_data_pos ++; + } + cur_data_pos += pred_buffer_block_size; + } + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + *blockwise_unpred_count_pos = unpredictable_count; + }// end SZ + blockwise_unpred_count_pos ++; + type += block_size * block_size * block_size; + } + indicator_pos += num_z; + } + } + } + + free(pred_buffer); + int stateNum = 2*quantization_intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + size_t nodeCount = 0; + init(huffmanTree, result_type, num_blocks*max_num_block_elements); + size_t i = 0; + for (i = 0; i < huffmanTree->stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + + unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength; + // total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements + unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + 5*treeByteSize + 4*num_blocks*sizeof(int)+num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1); + unsigned char * result_pos = result; + initRandomAccessBytes(result_pos); + + result_pos += meta_data_offset; + + sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8 + result_pos += exe_params->SZ_SIZE_TYPE; + + intToBytes_bigEndian(result_pos, block_size); + result_pos += sizeof(int); + doubleToBytes(result_pos, realPrecision); + result_pos += sizeof(double); + intToBytes_bigEndian(result_pos, quantization_intervals); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + + memcpy(result_pos, &use_mean, sizeof(unsigned char)); + result_pos += sizeof(unsigned char); + memcpy(result_pos, &mean, sizeof(float)); + result_pos += sizeof(float); + size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos); + result_pos += indicator_size; + + //convert the lead/mid/resi to byte stream + if(reg_count > 0){ + for(int e=0; e<4; e++){ + int stateNum = 2*coeff_intvCapacity_sz; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + size_t nodeCount = 0; + init(huffmanTree, coeff_type[e], reg_count); + size_t i = 0; + for (i = 0; i < huffmanTree->stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + doubleToBytes(result_pos, precision[e]); + result_pos += sizeof(double); + intToBytes_bigEndian(result_pos, coeff_intvRadius); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + size_t typeArray_size = 0; + encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size); + sizeToBytes(result_pos, typeArray_size); + result_pos += sizeof(size_t) + typeArray_size; + intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]); + result_pos += sizeof(int); + memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float)); + result_pos += coeff_unpredictable_count[e]*sizeof(float); + SZ_ReleaseHuffman(huffmanTree); + } + } + free(coeff_result_type); + free(coeff_unpredictable_data); + + //record the number of unpredictable data and also store them + memcpy(result_pos, &total_unpred, sizeof(size_t)); + result_pos += sizeof(size_t); + // record blockwise unpred data + size_t compressed_blockwise_unpred_count_size; + unsigned char * compressed_bw_unpred_count = SZ_compress_args(SZ_INT32, blockwise_unpred_count, &compressed_blockwise_unpred_count_size, ABS, 0.5, 0, 0, 0, 0, 0, 0, num_blocks); + memcpy(result_pos, &compressed_blockwise_unpred_count_size, sizeof(size_t)); + result_pos += sizeof(size_t); + memcpy(result_pos, compressed_bw_unpred_count, compressed_blockwise_unpred_count_size); + result_pos += compressed_blockwise_unpred_count_size; + free(blockwise_unpred_count); + free(compressed_bw_unpred_count); + memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float)); + result_pos += total_unpred * sizeof(float); + + free(reg_params); + free(indicator); + free(result_unpredictable_data); + // encode type array by block + type = result_type; + size_t total_type_array_size = 0; + unsigned char * type_array_buffer = (unsigned char *) malloc(num_blocks*max_num_block_elements*sizeof(int)); + unsigned short * type_array_block_size = (unsigned short *) malloc(num_blocks*sizeof(unsigned short)); + unsigned char * type_array_buffer_pos = type_array_buffer; + unsigned short * type_array_block_size_pos = type_array_block_size; + for(size_t i=0; i +#include +#include +#include +#include +#include "sz.h" +#include "CompressElement.h" +#include "DynamicByteArray.h" +#include "DynamicIntArray.h" +#include "TightDataPointStorageF.h" +#include "sz_float.h" +#include "sz_float_pwr.h" +#include "zlib.h" +#include "rw.h" +#include "utility.h" + +void compute_segment_precisions_float_1D(float *oriData, size_t dataLength, float* pwrErrBound, unsigned char* pwrErrBoundBytes, double globalPrecision) +{ + size_t i = 0, j = 0, k = 0; + float realPrecision = oriData[0]!=0?fabs(confparams_cpr->pw_relBoundRatio*oriData[0]):confparams_cpr->pw_relBoundRatio; + float approxPrecision; + unsigned char realPrecBytes[4]; + float curPrecision; + float curValue; + float sum = 0; + for(i=0;isegment_size==0&&i>0) + { + //get two first bytes of the realPrecision + if(confparams_cpr->pwr_type==SZ_PWR_AVG_TYPE) + { + realPrecision = sum/confparams_cpr->segment_size; + sum = 0; + } + realPrecision *= confparams_cpr->pw_relBoundRatio; + + if(confparams_cpr->errorBoundMode==ABS_AND_PW_REL||confparams_cpr->errorBoundMode==REL_AND_PW_REL) + realPrecision = realPrecisionerrorBoundMode==ABS_OR_PW_REL||confparams_cpr->errorBoundMode==REL_OR_PW_REL) + realPrecision = realPrecisionpwr_type) + { + case SZ_PWR_MIN_TYPE: + if(realPrecision>curPrecision) + realPrecision = curPrecision; + break; + case SZ_PWR_AVG_TYPE: + sum += curPrecision; + break; + case SZ_PWR_MAX_TYPE: + if(realPrecisionpwr_type==SZ_PWR_AVG_TYPE) + { + int size = dataLength%confparams_cpr->segment_size==0?confparams_cpr->segment_size:dataLength%confparams_cpr->segment_size; + realPrecision = sum/size; + } + if(confparams_cpr->errorBoundMode==ABS_AND_PW_REL||confparams_cpr->errorBoundMode==REL_AND_PW_REL) + realPrecision = realPrecisionerrorBoundMode==ABS_OR_PW_REL||confparams_cpr->errorBoundMode==REL_OR_PW_REL) + realPrecision = realPrecisionmaxRangeRadius*sizeof(int)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int)); + int totalSampleSize = dataLength/confparams_cpr->sampleDistance; + for(i=2;isegment_size==0) + realPrecision = pwrErrBound[j++]; + if(i%confparams_cpr->sampleDistance==0) + { + //pred_value = 2*oriData[i-1] - oriData[i-2]; + pred_value = oriData[i-1]; + pred_err = fabs(pred_value - oriData[i]); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2); + return powerOf2; +} + +void compute_segment_precisions_float_2D(float *oriData, float* pwrErrBound, +size_t r1, size_t r2, size_t R2, size_t edgeSize, unsigned char* pwrErrBoundBytes, float Min, float Max, double globalPrecision) +{ + size_t i = 0, j = 0, k = 0, p = 0, index = 0, J = 0; //I=-1,J=-1 if they are needed + float realPrecision; + float approxPrecision; + unsigned char realPrecBytes[4]; + float curValue, curAbsValue; + float* statAbsValues = (float*)malloc(R2*sizeof(float)); + + float max = fabs(Min)pwr_type == SZ_PWR_MIN_TYPE) + statAbsValues[i] = max; + else if(confparams_cpr->pwr_type == SZ_PWR_MAX_TYPE) + statAbsValues[i] = min; + else + statAbsValues[i] = 0; //for SZ_PWR_AVG_TYPE + } + for(i=0;i0) || (i%edgeSize==0&&j==0&&i>0)) + { + if(confparams_cpr->pwr_type==SZ_PWR_AVG_TYPE) + { + int a = edgeSize, b = edgeSize; + if(j==0) + { + if(r2%edgeSize==0) + b = edgeSize; + else + b = r2%edgeSize; + } + if(i==r1-1) + { + if(r1%edgeSize==0) + a = edgeSize; + else + a = r1%edgeSize; + } + realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J]/(a*b); + } + else + realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J]; + + if(confparams_cpr->errorBoundMode==ABS_AND_PW_REL||confparams_cpr->errorBoundMode==REL_AND_PW_REL) + realPrecision = realPrecisionerrorBoundMode==ABS_OR_PW_REL||confparams_cpr->errorBoundMode==REL_OR_PW_REL) + realPrecision = realPrecisionpwr_type == SZ_PWR_MIN_TYPE) + statAbsValues[J] = max; + else if(confparams_cpr->pwr_type == SZ_PWR_MAX_TYPE) + statAbsValues[J] = min; + else + statAbsValues[J] = 0; //for SZ_PWR_AVG_TYPE + } + if(j==0) + J = 0; + else if(j%edgeSize==0) + J++; + if(curValue!=0) + { + curAbsValue = fabs(curValue); + + switch(confparams_cpr->pwr_type) + { + case SZ_PWR_MIN_TYPE: + if(statAbsValues[J]>curAbsValue) + statAbsValues[J] = curAbsValue; + break; + case SZ_PWR_AVG_TYPE: + statAbsValues[J] += curAbsValue; + break; + case SZ_PWR_MAX_TYPE: + if(statAbsValues[J]pwr_type==SZ_PWR_AVG_TYPE) + { + int a = edgeSize, b = edgeSize; + if(r2%edgeSize==0) + b = edgeSize; + else + b = r2%edgeSize; + if(r1%edgeSize==0) + a = edgeSize; + else + a = r1%edgeSize; + realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J]/(a*b); + } + else + realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J]; + + if(confparams_cpr->errorBoundMode==ABS_AND_PW_REL||confparams_cpr->errorBoundMode==REL_AND_PW_REL) + realPrecision = realPrecisionerrorBoundMode==ABS_OR_PW_REL||confparams_cpr->errorBoundMode==REL_OR_PW_REL) + realPrecision = realPrecisionmaxRangeRadius*sizeof(int)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int)); + size_t totalSampleSize = (r1-1)*(r2-1)/confparams_cpr->sampleDistance; + size_t ir2; + for(i=1;isampleDistance==0) + { + realPrecision = pwrErrBound[I*R2+J]; + pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1]; + pred_err = fabs(pred_value - oriData[index]); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2); + return powerOf2; +} + +void compute_segment_precisions_float_3D(float *oriData, float* pwrErrBound, +size_t r1, size_t r2, size_t r3, size_t R2, size_t R3, size_t edgeSize, unsigned char* pwrErrBoundBytes, float Min, float Max, double globalPrecision) +{ + size_t i = 0, j = 0, k = 0, p = 0, q = 0, index = 0, J = 0, K = 0; //I=-1,J=-1 if they are needed + size_t r23 = r2*r3, ir, jr; + float realPrecision; + float approxPrecision; + unsigned char realPrecBytes[4]; + float curValue, curAbsValue; + + float** statAbsValues = create2DArray_float(R2, R3); + float max = fabs(Min)pwr_type == SZ_PWR_MIN_TYPE) + statAbsValues[i][j] = max; + else if(confparams_cpr->pwr_type == SZ_PWR_MAX_TYPE) + statAbsValues[i][j] = min; + else + statAbsValues[i][j] = 0; + } + for(i=0;i0) + { + realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J][K]; + floatToBytes(realPrecBytes, realPrecision); + memset(&realPrecBytes[2], 0, 2); + approxPrecision = bytesToFloat(realPrecBytes); + //put the realPrecision in float* pwrErBound + pwrErrBound[p++] = approxPrecision; + //put the two bytes in pwrErrBoundBytes + //printf("q=%d, i=%d, j=%d, k=%d\n",q,i,j,k); + pwrErrBoundBytes[q++] = realPrecBytes[0]; + pwrErrBoundBytes[q++] = realPrecBytes[1]; + if(confparams_cpr->pwr_type == SZ_PWR_MIN_TYPE) + statAbsValues[J][K] = max; + else if(confparams_cpr->pwr_type == SZ_PWR_MAX_TYPE) + statAbsValues[J][K] = min; + + } + for(j=0;j0) + { + realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J][K]; + floatToBytes(realPrecBytes, realPrecision); + memset(&realPrecBytes[2], 0, 2); + approxPrecision = bytesToFloat(realPrecBytes); + //put the realPrecision in float* pwrErBound + pwrErrBound[p++] = approxPrecision; + //put the two bytes in pwrErrBoundBytes + //printf("q=%d, i=%d, j=%d, k=%d\n",q,i,j,k); + pwrErrBoundBytes[q++] = realPrecBytes[0]; + pwrErrBoundBytes[q++] = realPrecBytes[1]; + if(confparams_cpr->pwr_type == SZ_PWR_MIN_TYPE) + statAbsValues[J][K] = max; + else if(confparams_cpr->pwr_type == SZ_PWR_MAX_TYPE) + statAbsValues[J][K] = min; + } + + if(j==0) + J = 0; + else if(j%edgeSize==0) + J++; + + for(k=0;k0) + { + realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J][K]; + floatToBytes(realPrecBytes, realPrecision); + memset(&realPrecBytes[2], 0, 2); + approxPrecision = bytesToFloat(realPrecBytes); + //put the realPrecision in float* pwrErBound + pwrErrBound[p++] = approxPrecision; + //put the two bytes in pwrErrBoundBytes + //printf("q=%d, i=%d, j=%d, k=%d\n",q,i,j,k); + pwrErrBoundBytes[q++] = realPrecBytes[0]; + pwrErrBoundBytes[q++] = realPrecBytes[1]; + + if(confparams_cpr->pwr_type == SZ_PWR_MIN_TYPE) + statAbsValues[J][K] = max; + else if(confparams_cpr->pwr_type == SZ_PWR_MAX_TYPE) + statAbsValues[J][K] = min; + } + + if(k==0) + K = 0; + else if(k%edgeSize==0) + K++; + + if(curValue!=0) + { + curAbsValue = fabs(curValue); + if(confparams_cpr->pwr_type == SZ_PWR_MIN_TYPE) + { + if(statAbsValues[J][K]>curAbsValue) + { + statAbsValues[J][K] = curAbsValue; + } + } + else if(confparams_cpr->pwr_type == SZ_PWR_MAX_TYPE) + { + if(statAbsValues[J][K]pw_relBoundRatio*statAbsValues[J][K]; + floatToBytes(realPrecBytes, realPrecision); + realPrecBytes[2] = realPrecBytes[3] = 0; + approxPrecision = bytesToFloat(realPrecBytes); + //put the realPrecision in float* pwrErBound + pwrErrBound[p++] = approxPrecision; + //put the two bytes in pwrErrBoundBytes + pwrErrBoundBytes[q++] = realPrecBytes[0]; + pwrErrBoundBytes[q++] = realPrecBytes[1]; + + free2DArray_float(statAbsValues, R2); +} + +unsigned int optimize_intervals_float_3D_pwr(float *oriData, size_t r1, size_t r2, size_t r3, size_t R2, size_t R3, size_t edgeSize, float* pwrErrBound) +{ + size_t i,j,k, ir,jr,index, I = 0,J=0,K=0; + float realPrecision = pwrErrBound[0]; + unsigned long radiusIndex; + size_t r23=r2*r3; + size_t R23 = R2*R3; + float pred_value = 0, pred_err; + int *intervals = (int*)malloc(confparams_cpr->maxRangeRadius*sizeof(int)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int)); + size_t totalSampleSize = (r1-1)*(r2-1)*(r3-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + realPrecision = pwrErrBound[I*R23+J*R2+K]; + pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] + - oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1]; + pred_err = fabs(pred_value - oriData[index]); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2); + return powerOf2; +} + +void SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr(unsigned char** newByteData, float *oriData, double globalPrecision, +size_t dataLength, size_t *outSize, float min, float max) +{ + size_t pwrLength = dataLength%confparams_cpr->segment_size==0?dataLength/confparams_cpr->segment_size:dataLength/confparams_cpr->segment_size+1; + float* pwrErrBound = (float*)malloc(sizeof(float)*pwrLength); + size_t pwrErrBoundBytes_size = sizeof(unsigned char)*pwrLength*2; + unsigned char* pwrErrBoundBytes = (unsigned char*)malloc(pwrErrBoundBytes_size); + + compute_segment_precisions_float_1D(oriData, dataLength, pwrErrBound, pwrErrBoundBytes, globalPrecision); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_float_1D_pwr(oriData, dataLength, pwrErrBound); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i = 0, j = 0; + int reqLength; + float realPrecision = pwrErrBound[j++]; + float medianValue = 0; + float radius = fabs(max)curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + listAdd_float(last3CmprsData, vce->data); + //printf("%.30G\n",last3CmprsData[0]); + + //add the second data + type[1] = 0; + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleFloatValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + listAdd_float(last3CmprsData, vce->data); + //printf("%.30G\n",last3CmprsData[0]); + + int state; + double checkRadius; + float curData; + float pred; + double predAbsErr; + checkRadius = (exe_params->intvCapacity-1)*realPrecision; + double interval = 2*realPrecision; + int updateReqLength = 0; //a marker: 1 means already updated + + for(i=2;isegment_size==0) + { + realPrecision = pwrErrBound[j++]; + checkRadius = (exe_params->intvCapacity-1)*realPrecision; + interval = 2*realPrecision; + updateReqLength = 0; + } + //pred = 2*last3CmprsData[0] - last3CmprsData[1]; + pred = last3CmprsData[0]; + predAbsErr = fabs(curData - pred); + if(predAbsErr=pred) + { + type[i] = exe_params->intvRadius+state; + pred = pred + state*interval; + } + else //curDataintvRadius-state; + pred = pred - state*interval; + } + listAdd_float(last3CmprsData, pred); + continue; + } + + //unpredictable data processing + if(updateReqLength==0) + { + computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + type[i] = 0; + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + + listAdd_float(last3CmprsData, vce->data); + }//end of for + +// char* expSegmentsInBytes; +// int expSegmentsInBytes_size = convertESCToBytes(esc, &expSegmentsInBytes); + int exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageF* tdps; + + new_TightDataPointStorageF2(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitLengthArray->array, resiBitLengthArray->size, + realPrecision, medianValue, (char)reqLength, quantization_intervals, pwrErrBoundBytes, pwrErrBoundBytes_size, radExpo); + +//sdi:Debug +/* int sum =0; + for(i=0;itypeArray, tdps->typeArray_size, type_); +// printf("tdps->typeArray_size=%d\n", tdps->typeArray_size); + + //free memory + free_DBA(resiBitLengthArray); + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + + convertTDPStoFlatBytes_float(tdps, newByteData, outSize); + + int floatSize=sizeof(float); + if(*outSize>dataLength*floatSize) + { + size_t k = 0, i; + tdps->isLossless = 1; + size_t totalByteLength = 3 + exe_params->SZ_SIZE_TYPE + 1 + floatSize*dataLength; + *newByteData = (unsigned char*)malloc(totalByteLength); + + unsigned char dsLengthBytes[exe_params->SZ_SIZE_TYPE]; + intToBytes_bigEndian(dsLengthBytes, dataLength);//4 + for (i = 0; i < 3; i++)//3 + (*newByteData)[k++] = versionNumber[i]; + + if(exe_params->SZ_SIZE_TYPE==4) + { + (*newByteData)[k++] = 16; //=00010000 + } + else + { + (*newByteData)[k++] = 80; + } + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)//4 or 8 + (*newByteData)[k++] = dsLengthBytes[i]; + + + if(sysEndianType==BIG_ENDIAN_SYSTEM) + memcpy((*newByteData)+4+exe_params->SZ_SIZE_TYPE, oriData, dataLength*floatSize); + else + { + unsigned char* p = (*newByteData)+4+exe_params->SZ_SIZE_TYPE; + for(i=0;isegment_size); + size_t R1 = 1+(r1-1)/blockEdgeSize; + size_t R2 = 1+(r2-1)/blockEdgeSize; + float* pwrErrBound = (float*)malloc(sizeof(float)*R1*R2); + size_t pwrErrBoundBytes_size = sizeof(unsigned char)*R1*R2*2; + unsigned char* pwrErrBoundBytes = (unsigned char*)malloc(pwrErrBoundBytes_size); + + compute_segment_precisions_float_2D(oriData, pwrErrBound, r1, r2, R2, blockEdgeSize, pwrErrBoundBytes, min, max, globalPrecision); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_float_2D_pwr(oriData, r1, r2, R2, blockEdgeSize, pwrErrBound); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + //printf("quantization_intervals=%d\n",quantization_intervals); + + size_t i=0,j=0,I=0,J=0; + int reqLength; + float realPrecision = pwrErrBound[I*R2+J]; + float pred1D, pred2D; + float diff = 0.0; + double itvNum = 0; + float *P0, *P1; + + P0 = (float*)malloc(r2*sizeof(float)); + memset(P0, 0, r2*sizeof(float)); + P1 = (float*)malloc(r2*sizeof(float)); + memset(P1, 0, r2*sizeof(float)); + + float medianValue = 0; + float radius = fabs(max)curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[0] = vce->data; + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + diff = spaceFillingValue[1] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + exe_params->intvRadius; + P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision; + } + else + { + type[1] = 0; + + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleFloatValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[1] = vce->data; + } + + /* Process Row-0 data 2 --> data r2-1 */ + for (j = 2; j < r2; j++) + { + if(j%blockEdgeSize==0) + { + J++; + realPrecision = pwrErrBound[I*R2+J]; + updateReqLength = 0; + } + + pred1D = 2*P1[j-1] - P1[j-2]; + diff = spaceFillingValue[j] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + exe_params->intvRadius; + P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision; + } + else + { + if(updateReqLength==0) + { + computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + type[j] = 0; + + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleFloatValue(vce, spaceFillingValue[j], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[j] = vce->data; + } + } + + /* Process Row-1 --> Row-r1-1 */ + size_t index; + for (i = 1; i < r1; i++) + { + /* Process row-i data 0 */ + index = i*r2; + J = 0; + if(i%blockEdgeSize==0) + I++; + realPrecision = pwrErrBound[I*R2+J]; //J==0 + updateReqLength = 0; + + pred1D = P1[0]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + if(updateReqLength==0) + { + computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + type[index] = 0; + + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[0] = vce->data; + } + + /* Process row-i data 1 --> r2-1*/ + for (j = 1; j < r2; j++) + { + index = i*r2+j; + + if(j%blockEdgeSize==0) + { + J++; + realPrecision = pwrErrBound[I*R2+J]; + updateReqLength = 0; + } + pred2D = P0[j-1] + P1[j] - P1[j-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + if(updateReqLength==0) + { + computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + type[index] = 0; + + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[j] = vce->data; + } + } + + float *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + + if(r2!=1) + free(P0); + free(P1); + int exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageF* tdps; + + new_TightDataPointStorageF2(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitLengthArray->array, resiBitLengthArray->size, + realPrecision, medianValue, (char)reqLength, quantization_intervals, pwrErrBoundBytes, pwrErrBoundBytes_size, radExpo); + + //free memory + free_DBA(resiBitLengthArray); + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + + convertTDPStoFlatBytes_float(tdps, newByteData, outSize); + + free(pwrErrBound); + + free(vce); + free(lce); + free_TightDataPointStorageF(tdps); + free(exactMidByteArray); +} + +void SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr(unsigned char** newByteData, float *oriData, double globalPrecision, +size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max) +{ + size_t dataLength=r1*r2*r3; + + int blockEdgeSize = computeBlockEdgeSize_3D(confparams_cpr->segment_size); + size_t R1 = 1+(r1-1)/blockEdgeSize; + size_t R2 = 1+(r2-1)/blockEdgeSize; + size_t R3 = 1+(r3-1)/blockEdgeSize; + float* pwrErrBound = (float*)malloc(sizeof(float)*R1*R2*R3); + size_t pwrErrBoundBytes_size = sizeof(unsigned char)*R1*R2*R3*2; + unsigned char* pwrErrBoundBytes = (unsigned char*)malloc(pwrErrBoundBytes_size); + + compute_segment_precisions_float_3D(oriData, pwrErrBound, r1, r2, r3, R2, R3, blockEdgeSize, pwrErrBoundBytes, min, max, globalPrecision); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_float_3D_pwr(oriData, r1, r2, r3, R2, R3, blockEdgeSize, pwrErrBound); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i=0,j=0,k=0, I = 0, J = 0, K = 0; + int reqLength; + float realPrecision = pwrErrBound[0]; + float pred1D, pred2D, pred3D; + float diff = 0.0; + double itvNum = 0; + float *P0, *P1; + + size_t r23 = r2*r3; + size_t R23 = R2*R3; + P0 = (float*)malloc(r23*sizeof(float)); + P1 = (float*)malloc(r23*sizeof(float)); + float radius = fabs(max)curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[0] = vce->data; + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + diff = spaceFillingValue[1] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + exe_params->intvRadius; + P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision; + } + else + { + if(updateReqLength==0) + { + computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + type[1] = 0; + + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleFloatValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[1] = vce->data; + } + + /* Process Row-0 data 2 --> data r3-1 */ + for (j = 2; j < r3; j++) + { + if(j%blockEdgeSize==0) + { + J++; + realPrecision = pwrErrBound[J]; + updateReqLength = 0; + } + pred1D = 2*P1[j-1] - P1[j-2]; + diff = spaceFillingValue[j] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + exe_params->intvRadius; + P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision; + } + else + { + if(updateReqLength==0) + { + computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + type[j] = 0; + + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleFloatValue(vce, spaceFillingValue[j], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[j] = vce->data; + } + } + + /* Process Row-1 --> Row-r2-1 */ + size_t index; + K = 0; + for (i = 1; i < r2; i++) + { + /* Process row-i data 0 */ + index = i*r3; + + J = 0; + if(i%blockEdgeSize==0) + I++; + realPrecision = pwrErrBound[I*R3+J]; //J==0 + updateReqLength = 0; + + pred1D = P1[index-r3]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + if(updateReqLength==0) + { + computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + type[index] = 0; + + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index] = vce->data; + } + + /* Process row-i data 1 --> data r3-1*/ + for (j = 1; j < r3; j++) //note that this j refers to fastest dimension (lowest order) + { + index = i*r3+j; + if(j%blockEdgeSize==0) + { + J++; + realPrecision = pwrErrBound[I*R3+J]; + updateReqLength = 0; + } + + pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + if(updateReqLength==0) + { + computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + type[index] = 0; + + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P1[index] = vce->data; + } + } + } + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (k = 1; k < r1; k++) + { + /* Process Row-0 data 0*/ + index = k*r23; + I = 0; + J = 0; + if(k%blockEdgeSize==0) + K++; + realPrecision = pwrErrBound[K*R23]; //J==0 + updateReqLength = 0; + + pred1D = P1[0]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + if(updateReqLength==0) + { + computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + type[index] = 0; + + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[0] = vce->data; + } + + /* Process Row-0 data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + index = k*r23+j; + + if(j%blockEdgeSize==0) + { + J++; + realPrecision = pwrErrBound[K*R23+J]; + updateReqLength = 0; + } + pred2D = P0[j-1] + P1[j] - P1[j-1]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; +/* if(type[index]==0) + printf("err:type[%d]=0, index4\n", index); */ + } + else + { + if(updateReqLength==0) + { + computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + type[index] = 0; + + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[j] = vce->data; + } + } + + /* Process Row-1 --> Row-r2-1 */ + size_t index2D; + for (i = 1; i < r2; i++) + { + /* Process Row-i data 0 */ + index = k*r23 + i*r3; + J = 0; + if(i%blockEdgeSize==0) + I++; + realPrecision = pwrErrBound[K*R23+I*R3+J]; //J==0 + updateReqLength = 0; + + index2D = i*r3; + pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + if(updateReqLength==0) + { + computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + type[index] = 0; + + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + + /* Process Row-i data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + index = k*r23 + i*r3 + j; + if(j%blockEdgeSize==0) + { + J++; + realPrecision = pwrErrBound[K*R23+I*R3+J]; + updateReqLength = 0; + } + index2D = i*r3 + j; + pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1]; + diff = spaceFillingValue[index] - pred3D; + + itvNum = fabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + if(updateReqLength==0) + { + computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + type[index] = 0; + + addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength); + compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + P0[index2D] = vce->data; + } + } + } + + float *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + if(r23!=1) + free(P0); + free(P1); + int exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageF* tdps; + + new_TightDataPointStorageF2(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitLengthArray->array, resiBitLengthArray->size, + realPrecision, medianValue, (char)reqLength, quantization_intervals, pwrErrBoundBytes, pwrErrBoundBytes_size, radExpo); + +//sdi:Debug +/* int sum =0; + for(i=0;idataSeriesLength; + int* standGroupID = (int*)malloc(dataLength*sizeof(int)); + + size_t i; + standGroupID[0] = groupID[0]+GROUP_COUNT; //plus an offset such that it would not be a negative number. + char lastGroupIDValue = groupID[0], curGroupIDValue; + int offset = 2*(GROUP_COUNT + 2); + for(i=1; ipwrErrBoundBytes = out; //groupIDArray + tdps->pwrErrBoundBytes_size = outSize; + + free(standGroupID); +} + +TightDataPointStorageF* SZ_compress_float_1D_MDQ_pwrGroup(float* oriData, size_t dataLength, int errBoundMode, +double absErrBound, double relBoundRatio, double pwrErrRatio, float valueRangeSize, float medianValue_f) +{ + size_t i; + float *posGroups, *negGroups, *groups; + float pos_01_group = 0, neg_01_group = 0; //[0,1] and [-1,0] + int *posFlags, *negFlags, *flags; + int pos_01_flag = 0, neg_01_flag = 0; + createRangeGroups_float(&posGroups, &negGroups, &posFlags, &negFlags); + size_t nbBins = (size_t)(1/pwrErrRatio); + if(nbBins%2==1) + nbBins++; + exe_params->intvRadius = nbBins; + + int reqLength, status; + float medianValue = medianValue_f; + float realPrecision = (float)getRealPrecision_float(valueRangeSize, errBoundMode, absErrBound, relBoundRatio, &status); + if(realPrecision<0) + realPrecision = pwrErrRatio; + float realGroupPrecision; //precision (error) based on group ID + getPrecisionReqLength_float(realPrecision); + short radExpo = getExponent_float(valueRangeSize/2); + short lastGroupNum = 0, groupNum, grpNum = 0; + + double* groupErrorBounds = generateGroupErrBounds(errBoundMode, realPrecision, pwrErrRatio); + exe_params->intvRadius = generateGroupMaxIntervalCount(groupErrorBounds); + + computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue); + + int* type = (int*) malloc(dataLength*sizeof(int)); + char *groupID = (char*) malloc(dataLength*sizeof(char)); + char *gp = groupID; + + float* spaceFillingValue = oriData; + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + unsigned char preDataBytes[4]; + intToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + + FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + int state; + float curData, decValue; + float pred; + float predAbsErr; + double interval = 0; + + //add the first data + type[0] = 0; + compressSingleFloatValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + + curData = spaceFillingValue[0]; + groupNum = computeGroupNum_float(vce->data); + + if(curData > 0 && groupNum >= 0) + { + groups = posGroups; + flags = posFlags; + grpNum = groupNum; + } + else if(curData < 0 && groupNum >= 0) + { + groups = negGroups; + flags = negFlags; + grpNum = groupNum; + } + else if(curData >= 0 && groupNum == -1) + { + groups = &pos_01_group; + flags = &pos_01_flag; + grpNum = 0; + } + else //curData < 0 && groupNum == -1 + { + groups = &neg_01_group; + flags = &neg_01_flag; + grpNum = 0; + } + + listAdd_float_group(groups, flags, groupNum, spaceFillingValue[0], vce->data, gp); + gp++; + + for(i=1;i 0 && groupNum >= 0) + { + groups = posGroups; + flags = posFlags; + grpNum = groupNum; + } + else if(curData < 0 && groupNum >= 0) + { + groups = negGroups; + flags = negFlags; + grpNum = groupNum; + } + else if(curData >= 0 && groupNum == -1) + { + groups = &pos_01_group; + flags = &pos_01_flag; + grpNum = 0; + } + else //curData < 0 && groupNum == -1 + { + groups = &neg_01_group; + flags = &neg_01_flag; + grpNum = 0; + } + + if(groupNum>=GROUP_COUNT) + { + type[i] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + listAdd_float_group(groups, flags, lastGroupNum, curData, vce->data, gp); //set the group number to be last one in order to get the groupID array as smooth as possible. + } + else if(flags[grpNum]==0) //the dec value may not be in the same group + { + type[i] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + //decGroupNum = computeGroupNum_float(vce->data); + + //if(decGroupNum < groupNum) + // decValue = curData>0?pow(2, groupNum):-pow(2, groupNum); + //else if(decGroupNum > groupNum) + // decValue = curData>0?pow(2, groupNum+1):-pow(2, groupNum+1); + //else + // decValue = vce->data; + + decValue = vce->data; + listAdd_float_group(groups, flags, groupNum, curData, decValue, gp); + lastGroupNum = curData>0?groupNum + 2: -(groupNum+2); + } + else //if flags[groupNum]==1, the dec value must be in the same group + { + pred = groups[grpNum]; + predAbsErr = fabs(curData - pred); + realGroupPrecision = groupErrorBounds[grpNum]; //compute real error bound + interval = realGroupPrecision*2; + state = (predAbsErr/realGroupPrecision+1)/2; + if(curData>=pred) + { + type[i] = exe_params->intvRadius+state; + decValue = pred + state*interval; + } + else //curDataintvRadius-state; + decValue = pred - state*interval; + } + //decGroupNum = computeGroupNum_float(pred); + + if((decValue>0&&curData<0)||(decValue<0&&curData>=0)) + decValue = 0; + //else + //{ + // if(decGroupNum < groupNum) + // decValue = curData>0?pow(2, groupNum):-pow(2, groupNum); + // else if(decGroupNum > groupNum) + // decValue = curData>0?pow(2, groupNum+1):-pow(2, groupNum+1); + // else + // decValue = pred; + //} + + if(fabs(curData-decValue)>realGroupPrecision) + { + type[i] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + + decValue = vce->data; + } + + listAdd_float_group(groups, flags, groupNum, curData, decValue, gp); + lastGroupNum = curData>=0?groupNum + 2: -(groupNum+2); + } + gp++; + + } + + int exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageF* tdps; + + //combineTypeAndGroupIDArray(nbBins, dataLength, &type, groupID); + + new_TightDataPointStorageF(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, nbBins, NULL, 0, radExpo); + + compressGroupIDArray_float(groupID, tdps); + + free(posGroups); + free(negGroups); + free(posFlags); + free(negFlags); + free(groupID); + free(groupErrorBounds); + + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + free(vce); + free(lce); + free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +void SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(unsigned char** newByteData, float *oriData, +size_t dataLength, double absErrBound, double relBoundRatio, double pwrErrRatio, float valueRangeSize, float medianValue_f, size_t *outSize) +{ + TightDataPointStorageF* tdps = SZ_compress_float_1D_MDQ_pwrGroup(oriData, dataLength, confparams_cpr->errorBoundMode, + absErrBound, relBoundRatio, pwrErrRatio, + valueRangeSize, medianValue_f); + + convertTDPStoFlatBytes_float(tdps, newByteData, outSize); + + if(*outSize>3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + sizeof(float)*dataLength) + SZ_compress_args_float_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageF(tdps); +} + +#include + +void SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t dataLength, size_t *outSize, float min, float max){ + + float * log_data = (float *) malloc(dataLength * sizeof(float)); + + unsigned char * signs = (unsigned char *) malloc(dataLength); + memset(signs, 0, dataLength); + // preprocess + float max_abs_log_data; + if(min == 0) max_abs_log_data = fabs(log2(fabs(max))); + else if(max == 0) max_abs_log_data = fabs(log2(fabs(min))); + else max_abs_log_data = fabs(log2(fabs(min))) > fabs(log2(fabs(max))) ? fabs(log2(fabs(min))) : fabs(log2(fabs(max))); + float min_log_data = max_abs_log_data; + bool positive = true; + for(size_t i=0; i 0){ + log_data[i] = log2(log_data[i]); + if(log_data[i] > max_abs_log_data) max_abs_log_data = log_data[i]; + if(log_data[i] < min_log_data) min_log_data = log_data[i]; + } + } + + float valueRangeSize, medianValue_f; + computeRangeSize_float(log_data, dataLength, &valueRangeSize, &medianValue_f); + if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data); + double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 1.2e-7; + for(size_t i=0; iminLogValue = min_log_data - 1.0001*realPrecision; + free(log_data); + if(!positive){ + unsigned char * comp_signs; + // compress signs + unsigned long signSize = sz_lossless_compress(ZSTD_COMPRESSOR, 3, signs, dataLength, &comp_signs); + tdps->pwrErrBoundBytes = comp_signs; + tdps->pwrErrBoundBytes_size = signSize; + } + else{ + tdps->pwrErrBoundBytes = NULL; + tdps->pwrErrBoundBytes_size = 0; + } + free(signs); + + convertTDPStoFlatBytes_float(tdps, newByteData, outSize); + if(*outSize>3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + sizeof(float)*dataLength) + SZ_compress_args_float_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageF(tdps); +} + +void SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t *outSize, float min, float max){ + + size_t dataLength = r1 * r2; + float * log_data = (float *) malloc(dataLength * sizeof(float)); + + unsigned char * signs = (unsigned char *) malloc(dataLength); + memset(signs, 0, dataLength); + // preprocess + float max_abs_log_data; + if(min == 0) max_abs_log_data = fabs(log2(fabs(max))); + else if(max == 0) max_abs_log_data = fabs(log2(fabs(min))); + else max_abs_log_data = fabs(log2(fabs(min))) > fabs(log2(fabs(max))) ? fabs(log2(fabs(min))) : fabs(log2(fabs(max))); + float min_log_data = max_abs_log_data; + bool positive = true; + for(size_t i=0; i 0){ + log_data[i] = log2(log_data[i]); + if(log_data[i] > max_abs_log_data) max_abs_log_data = log_data[i]; + if(log_data[i] < min_log_data) min_log_data = log_data[i]; + } + } + + float valueRangeSize, medianValue_f; + computeRangeSize_float(log_data, dataLength, &valueRangeSize, &medianValue_f); + if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data); + double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 1.2e-7; + for(size_t i=0; iminLogValue = min_log_data - 1.0001*realPrecision; + free(log_data); + if(!positive){ + unsigned char * comp_signs; + // compress signs + unsigned long signSize = sz_lossless_compress(ZSTD_COMPRESSOR, 3, signs, dataLength, &comp_signs); + tdps->pwrErrBoundBytes = comp_signs; + tdps->pwrErrBoundBytes_size = signSize; + } + else{ + tdps->pwrErrBoundBytes = NULL; + tdps->pwrErrBoundBytes_size = 0; + } + free(signs); + + convertTDPStoFlatBytes_float(tdps, newByteData, outSize); + if(*outSize>3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + sizeof(float)*dataLength) + SZ_compress_args_float_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageF(tdps); +} + +void SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max){ + + size_t dataLength = r1 * r2 * r3; + float * log_data = (float *) malloc(dataLength * sizeof(float)); + + unsigned char * signs = (unsigned char *) malloc(dataLength); + memset(signs, 0, dataLength); + // preprocess + float max_abs_log_data; + if(min == 0) max_abs_log_data = fabs(log2(fabs(max))); + else if(max == 0) max_abs_log_data = fabs(log2(fabs(min))); + else max_abs_log_data = fabs(log2(fabs(min))) > fabs(log2(fabs(max))) ? fabs(log2(fabs(min))) : fabs(log2(fabs(max))); + float min_log_data = max_abs_log_data; + bool positive = true; + for(size_t i=0; i 0){ + log_data[i] = log2(log_data[i]); + if(log_data[i] > max_abs_log_data) max_abs_log_data = log_data[i]; + if(log_data[i] < min_log_data) min_log_data = log_data[i]; + } + } + + float valueRangeSize, medianValue_f; + computeRangeSize_float(log_data, dataLength, &valueRangeSize, &medianValue_f); + if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data); + double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 1.2e-7; + for(size_t i=0; iminLogValue = min_log_data - 1.0001*realPrecision; + free(log_data); + if(!positive){ + unsigned char * comp_signs; + // compress signs + unsigned long signSize = sz_lossless_compress(ZSTD_COMPRESSOR, 3, signs, dataLength, &comp_signs); + tdps->pwrErrBoundBytes = comp_signs; + tdps->pwrErrBoundBytes_size = signSize; + } + else{ + tdps->pwrErrBoundBytes = NULL; + tdps->pwrErrBoundBytes_size = 0; + } + free(signs); + + convertTDPStoFlatBytes_float(tdps, newByteData, outSize); + if(*outSize>3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + sizeof(float)*dataLength) + SZ_compress_args_float_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageF(tdps); +} + + +void SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log_MSST19(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t dataLength, size_t *outSize, float valueRangeSize, float medianValue_f, + unsigned char* signs, bool* positive, float min, float max, float nearZero){ + float multiplier = pow((1+pwrErrRatio), -3.0001); + for(int i=0; iminLogValue = nearZero / ((1+pwrErrRatio)*(1+pwrErrRatio)); + if(!(*positive)){ + unsigned char * comp_signs; + // compress signs + unsigned long signSize = sz_lossless_compress(ZSTD_COMPRESSOR, 3, signs, dataLength, &comp_signs); + tdps->pwrErrBoundBytes = comp_signs; + tdps->pwrErrBoundBytes_size = signSize; + } + else{ + tdps->pwrErrBoundBytes = NULL; + tdps->pwrErrBoundBytes_size = 0; + } + free(signs); + + convertTDPStoFlatBytes_float(tdps, newByteData, outSize); + if(*outSize>3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + sizeof(float)*dataLength) + SZ_compress_args_float_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageF(tdps); +} + +void SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log_MSST19(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t *outSize, float valueRangeSize, + unsigned char* signs, bool* positive, float min, float max, float nearZero){ + + size_t dataLength = r1 * r2; + + float multiplier = pow((1+pwrErrRatio), -3.0001); + for(int i=0; iminLogValue = nearZero / ((1+pwrErrRatio)*(1+pwrErrRatio)); + + if(!*positive){ + unsigned char * comp_signs; + // compress signs + unsigned long signSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, signs, dataLength, &comp_signs); + tdps->pwrErrBoundBytes = comp_signs; + tdps->pwrErrBoundBytes_size = signSize; + } + else{ + tdps->pwrErrBoundBytes = NULL; + tdps->pwrErrBoundBytes_size = 0; + } + free(signs); + + convertTDPStoFlatBytes_float(tdps, newByteData, outSize); + if(*outSize>3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + sizeof(float)*dataLength) + SZ_compress_args_float_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageF(tdps); +} + +void SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log_MSST19(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t r3, size_t *outSize, float valueRangeSize, unsigned char* signs, bool* positive, float min, float max, float nearZero){ + + size_t dataLength = r1 * r2 * r3; + + float multiplier = pow((1+pwrErrRatio), -3.0001); + for(int i=0; iminLogValue = nearZero / ((1+pwrErrRatio)*(1+pwrErrRatio)); + + if(!*positive){ + unsigned char * comp_signs; + // compress signs + unsigned long signSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, signs, dataLength, &comp_signs); + tdps->pwrErrBoundBytes = comp_signs; + tdps->pwrErrBoundBytes_size = signSize; + } + else{ + tdps->pwrErrBoundBytes = NULL; + tdps->pwrErrBoundBytes_size = 0; + } + free(signs); + + + convertTDPStoFlatBytes_float(tdps, newByteData, outSize); + if(*outSize>3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + sizeof(float)*dataLength) + SZ_compress_args_float_StoreOriData(oriData, dataLength, newByteData, outSize); + + free_TightDataPointStorageF(tdps); +} diff --git a/deps/SZ/sz/src/sz_float_ts.c b/deps/SZ/sz/src/sz_float_ts.c new file mode 100644 index 0000000000000000000000000000000000000000..ea29245ec5b7ab383c47efd2cc4d48cc5e81128c --- /dev/null +++ b/deps/SZ/sz/src/sz_float_ts.c @@ -0,0 +1,207 @@ +/** + * @file sz_float.c + * @author Sheng Di and Dingwen Tao + * @date Aug, 2016 + * @brief SZ_Init, Compression and Decompression functions + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + + +#include +#include +#include +#include +#include +#include "sz.h" +#include "CompressElement.h" +#include "DynamicByteArray.h" +#include "DynamicIntArray.h" +#include "TightDataPointStorageF.h" +#include "zlib.h" +#include "rw.h" +#include "sz_float_ts.h" + +unsigned int optimize_intervals_float_1D_ts(float *oriData, size_t dataLength, float* preData, double realPrecision) +{ + size_t i = 0, radiusIndex; + float pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance; + for(i=2;isampleDistance==0) + { + pred_value = preData[i]; + pred_err = fabs(pred_value - oriData[i]); + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + return powerOf2; +} + +TightDataPointStorageF* SZ_compress_float_1D_MDQ_ts(float *oriData, size_t dataLength, sz_multisteps* multisteps, +double realPrecision, float valueRangeSize, float medianValue_f) +{ + float* preStepData = (float*)(multisteps->hist_data); + + //store the decompressed data + //float* decData = (float*)malloc(sizeof(float)*dataLength); + //memset(decData, 0, sizeof(float)*dataLength); + float* decData = preStepData; + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + quantization_intervals = optimize_intervals_float_1D_ts(oriData, dataLength, preStepData, realPrecision); + else + quantization_intervals = exe_params->intvCapacity; + updateQuantizationInfo(quantization_intervals); + + size_t i; + int reqLength; + float medianValue = medianValue_f; + short radExpo = getExponent_float(valueRangeSize/2); + + computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + float* spaceFillingValue = oriData; // + + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + + unsigned char preDataBytes[4]; + intToBytes_bigEndian(preDataBytes, 0); + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + + FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + //add the first data + type[0] = 0; + compressSingleFloatValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + decData[0] = vce->data; + + //add the second data + type[1] = 0; + compressSingleFloatValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + decData[1] = vce->data; + + int state = 0; + double checkRadius = 0; + float curData = 0; + float pred = 0; + float predAbsErr = 0; + checkRadius = (exe_params->intvCapacity-1)*realPrecision; + double interval = 2*realPrecision; + + for(i=2;i=pred) + { + type[i] = exe_params->intvRadius+state; + pred = pred + state*interval; + } + else //curDataintvRadius-state; + pred = pred - state*interval; + } + + //double-check the prediction error in case of machine-epsilon impact + if(fabs(curData-pred)>realPrecision) + { + type[i] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + decData[i] = vce->data; + } + else + { + decData[i] = pred; + } + + continue; + } + + //unpredictable data processing + type[i] = 0; + compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + decData[i] = vce->data; + }//end of for + + size_t exactDataNum = exactLeadNumArray->size; + + TightDataPointStorageF* tdps; + + new_TightDataPointStorageF(&tdps, dataLength, exactDataNum, + type, exactMidByteArray->array, exactMidByteArray->size, + exactLeadNumArray->array, + resiBitArray->array, resiBitArray->size, + resiBitsLength, + realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0); + + //free memory + free_DIA(exactLeadNumArray); + free_DIA(resiBitArray); + free(type); + free(vce); + free(lce); + free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps); + + //memcpy(preStepData, decData, dataLength*sizeof(float)); //update the data + //free(decData); + + return tdps; +} + + diff --git a/deps/SZ/sz/src/sz_int16.c b/deps/SZ/sz/src/sz_int16.c new file mode 100644 index 0000000000000000000000000000000000000000..0d0c229930632d3091eab006a5cc5251fa53a16e --- /dev/null +++ b/deps/SZ/sz/src/sz_int16.c @@ -0,0 +1,1385 @@ +/** + * @file sz_int16.c + * @author Sheng Di + * @date Aug, 2017 + * @brief sz_int16, Compression and Decompression functions + * (C) 2017 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + + +#include +#include +#include +#include +#include +#include "sz.h" +#include "CompressElement.h" +#include "DynamicByteArray.h" +#include "DynamicIntArray.h" +#include "zlib.h" +#include "rw.h" +#include "TightDataPointStorageI.h" +#include "sz_int16.h" +#include "utility.h" + +unsigned int optimize_intervals_int16_1D(int16_t *oriData, size_t dataLength, double realPrecision) +{ + size_t i = 0, radiusIndex; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance; + for(i=2;isampleDistance==0) + { + //pred_value = 2*oriData[i-1] - oriData[i-2]; + pred_value = oriData[i-1]; + pred_err = llabs(pred_value - oriData[i]); + radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2); + return powerOf2; +} + +unsigned int optimize_intervals_int16_2D(int16_t *oriData, size_t r1, size_t r2, double realPrecision) +{ + size_t i,j, index; + size_t radiusIndex; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r2+j; + pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1]; + pred_err = llabs(pred_value - oriData[index]); + radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2); + return powerOf2; +} + +unsigned int optimize_intervals_int16_3D(int16_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision) +{ + size_t i,j,k, index; + size_t radiusIndex; + size_t r23=r2*r3; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)*(r3-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r23+j*r3+k; + pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] + - oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1]; + pred_err = llabs(pred_value - oriData[index]); + radiusIndex = (pred_err/realPrecision+1)/2; + if(radiusIndex>=confparams_cpr->maxRangeRadius) + { + radiusIndex = confparams_cpr->maxRangeRadius - 1; + //printf("radiusIndex=%d\n", radiusIndex); + } + intervals[radiusIndex]++; + } + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2); + return powerOf2; +} + + +unsigned int optimize_intervals_int16_4D(int16_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision) +{ + size_t i,j,k,l, index; + size_t radiusIndex; + size_t r234=r2*r3*r4; + size_t r34=r3*r4; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)*(r3-1)*(r4-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r234+j*r34+k*r4+l; + pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r34] + - oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1]; + pred_err = llabs(pred_value - oriData[index]); + radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + return powerOf2; +} + +TightDataPointStorageI* SZ_compress_int16_1D_MDQ(int16_t *oriData, size_t dataLength, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + quantization_intervals = optimize_intervals_int16_1D(oriData, dataLength, realPrecision); + else + quantization_intervals = exe_params->intvCapacity; + updateQuantizationInfo(quantization_intervals); + size_t i; + + int* type = (int*) malloc(dataLength*sizeof(int)); + + int16_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + int64_t last3CmprsData[3] = {0,0,0}; + + //add the first data + type[0] = 0; + compressInt16Value(spaceFillingValue[0], minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + listAdd_int(last3CmprsData, spaceFillingValue[0]); + + type[1] = 0; + compressInt16Value(spaceFillingValue[1], minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + listAdd_int(last3CmprsData, spaceFillingValue[1]); + //printf("%.30G\n",last3CmprsData[0]); + + int state; + double checkRadius = (exe_params->intvCapacity-1)*realPrecision; + int64_t curData; + int64_t pred, predAbsErr; + double interval = 2*realPrecision; + + for(i=2;i=pred) + { + type[i] = exe_params->intvRadius+state; + pred = pred + state*interval; + } + else //curDataintvRadius-state; + pred = pred - state*interval; + } + if(pred>SZ_INT16_MAX) pred = SZ_INT16_MAX; + if(predsize / byteSize; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_INT16); + +//sdi:Debug +/* int sum =0; + for(i=0;iarray has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +void SZ_compress_args_int16_StoreOriData(int16_t* oriData, size_t dataLength, TightDataPointStorageI* tdps, +unsigned char** newByteData, size_t *outSize) +{ + int intSize=sizeof(int16_t); + size_t k = 0, i; + tdps->isLossless = 1; + size_t totalByteLength = 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + intSize*dataLength; + *newByteData = (unsigned char*)malloc(totalByteLength); + + unsigned char dsLengthBytes[8]; + for (i = 0; i < 3; i++)//3 + (*newByteData)[k++] = versionNumber[i]; + + if(exe_params->SZ_SIZE_TYPE==4)//1 + (*newByteData)[k++] = 16; //00010000 + else + (*newByteData)[k++] = 80; //01010000: 01000000 indicates the SZ_SIZE_TYPE=8 + + convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k])); + k = k + MetaDataByteLength; + + sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8 + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + (*newByteData)[k++] = dsLengthBytes[i]; + + if(sysEndianType==BIG_ENDIAN_SYSTEM) + memcpy((*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, oriData, dataLength*intSize); + else + { + unsigned char* p = (*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE; + for(i=0;i dataLength*sizeof(int16_t)) + SZ_compress_args_int16_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize); + free_TightDataPointStorageI(tdps); +} + +TightDataPointStorageI* SZ_compress_int16_2D_MDQ(int16_t *oriData, size_t r1, size_t r2, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_int16_2D(oriData, r1, r2, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i,j; + int64_t pred1D, pred2D, curValue, tmp; + int diff = 0.0; + double itvNum = 0; + int16_t *P0, *P1; + + size_t dataLength = r1*r2; + + P0 = (int16_t*)malloc(r2*sizeof(int16_t)); + memset(P0, 0, r2*sizeof(int16_t)); + P1 = (int16_t*)malloc(r2*sizeof(int16_t)); + memset(P1, 0, r2*sizeof(int16_t)); + + int* type = (int*) malloc(dataLength*sizeof(int)); + //type[dataLength]=0; + + int16_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + type[0] = 0; + curValue = P1[0] = spaceFillingValue[0]; + compressInt16Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + diff = spaceFillingValue[1] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp data r2-1 */ + for (j = 2; j < r2; j++) + { + pred1D = 2*P1[j-1] - P1[j-2]; + diff = spaceFillingValue[j] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp Row-r1-1 */ + size_t index; + for (i = 1; i < r1; i++) + { + /* Process row-i data 0 */ + index = i*r2; + pred1D = P1[0]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp r2-1*/ + for (j = 1; j < r2; j++) + { + index = i*r2+j; + pred2D = P0[j-1] + P1[j] - P1[j-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmpsize; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_INT16); + + //free memory + free(type); + free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +/** + * + * Note: @r1 is high dimension + * @r2 is low dimension + * */ +void SZ_compress_args_int16_NoCkRngeNoGzip_2D(unsigned char** newByteData, int16_t *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize, +int64_t valueRangeSize, int16_t minValue) +{ + TightDataPointStorageI* tdps = SZ_compress_int16_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, minValue); + + convertTDPStoFlatBytes_int(tdps, newByteData, outSize); + + size_t dataLength = r1*r2; + if(*outSize>dataLength*sizeof(int16_t)) + SZ_compress_args_int16_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); + + free_TightDataPointStorageI(tdps); +} + +TightDataPointStorageI* SZ_compress_int16_3D_MDQ(int16_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_int16_3D(oriData, r1, r2, r3, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i,j,k; + int64_t pred1D, pred2D, pred3D, curValue, tmp; + int diff = 0.0; + double itvNum = 0; + int16_t *P0, *P1; + + size_t dataLength = r1*r2*r3; + + size_t r23 = r2*r3; + P0 = (int16_t*)malloc(r23*sizeof(int16_t)); + P1 = (int16_t*)malloc(r23*sizeof(int16_t)); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + int16_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + type[0] = 0; + P1[0] = spaceFillingValue[0]; + compressInt16Value(spaceFillingValue[0], minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + diff = spaceFillingValue[1] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp data r3-1 */ + for (j = 2; j < r3; j++) + { + pred1D = 2*P1[j-1] - P1[j-2]; + diff = spaceFillingValue[j] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp Row-r2-1 */ + size_t index; + for (i = 1; i < r2; i++) + { + /* Process row-i data 0 */ + index = i*r3; + pred1D = P1[index-r3]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp data r3-1*/ + for (j = 1; j < r3; j++) + { + index = i*r3+j; + pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp layer-r1-1 /////////////////////////// + + for (k = 1; k < r1; k++) + { + /* Process Row-0 data 0*/ + index = k*r23; + pred1D = P1[0]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp data r3-1 */ + for (j = 1; j < r3; j++) + { + //index = k*r2*r3+j; + index ++; + pred2D = P0[j-1] + P1[j] - P1[j-1]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp Row-r2-1 */ + size_t index2D; + for (i = 1; i < r2; i++) + { + /* Process Row-i data 0 */ + index = k*r23 + i*r3; + index2D = i*r3; + pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp data r3-1 */ + for (j = 1; j < r3; j++) + { +// if(k==63&&i==43&&j==27) +// printf("i=%d\n", i); + //index = k*r2*r3 + i*r3 + j; + index ++; + index2D = i*r3 + j; + pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1]; + diff = spaceFillingValue[index] - pred3D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmpsize; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_INT16); + + //free memory + free(type); + free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + + +void SZ_compress_args_int16_NoCkRngeNoGzip_3D(unsigned char** newByteData, int16_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, +int64_t valueRangeSize, int64_t minValue) +{ + TightDataPointStorageI* tdps = SZ_compress_int16_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, minValue); + + convertTDPStoFlatBytes_int(tdps, newByteData, outSize); + + size_t dataLength = r1*r2*r3; + if(*outSize>dataLength*sizeof(int16_t)) + SZ_compress_args_int16_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); + + free_TightDataPointStorageI(tdps); +} + + +TightDataPointStorageI* SZ_compress_int16_4D_MDQ(int16_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_int16_4D(oriData, r1, r2, r3, r4, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i,j,k; + int64_t pred1D, pred2D, pred3D, curValue, tmp; + int diff = 0.0; + double itvNum = 0; + int16_t *P0, *P1; + + size_t dataLength = r1*r2*r3*r4; + + size_t r234 = r2*r3*r4; + size_t r34 = r3*r4; + + P0 = (int16_t*)malloc(r34*sizeof(int16_t)); + P1 = (int16_t*)malloc(r34*sizeof(int16_t)); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + int16_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + size_t l; + for (l = 0; l < r1; l++) + { + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + size_t index = l*r234; + size_t index2D = 0; + + type[index] = 0; + curValue = P1[index2D] = spaceFillingValue[index]; + compressInt16Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + + /* Process Row-0 data 1*/ + index = l*r234+1; + index2D = 1; + + pred1D = P1[index2D-1]; + diff = curValue - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp data r4-1 */ + for (j = 2; j < r4; j++) + { + index = l*r234+j; + index2D = j; + + pred1D = 2*P1[index2D-1] - P1[index2D-2]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp Row-r3-1 */ + for (i = 1; i < r3; i++) + { + /* Process row-i data 0 */ + index = l*r234+i*r4; + index2D = i*r4; + + pred1D = P1[index2D-r4]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp data r4-1*/ + for (j = 1; j < r4; j++) + { + index = l*r234+i*r4+j; + index2D = i*r4+j; + + pred2D = P1[index2D-1] + P1[index2D-r4] - P1[index2D-r4-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp layer-r2-1 /////////////////////////// + + for (k = 1; k < r2; k++) + { + /* Process Row-0 data 0*/ + index = l*r234+k*r34; + index2D = 0; + + pred1D = P1[index2D]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp data r4-1 */ + for (j = 1; j < r4; j++) + { + index = l*r234+k*r34+j; + index2D = j; + + pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp Row-r3-1 */ + for (i = 1; i < r3; i++) + { + /* Process Row-i data 0 */ + index = l*r234+k*r34+i*r4; + index2D = i*r4; + + pred2D = P0[index2D-r4] + P1[index2D] - P1[index2D-r4]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp data r4-1 */ + for (j = 1; j < r4; j++) + { + index = l*r234+k*r34+i*r4+j; + index2D = i*r4+j; + + pred3D = P0[index2D-1] + P0[index2D-r4]+ P1[index2D] - P0[index2D-r4-1] - P1[index2D-r4] - P1[index2D-1] + P1[index2D-r4-1]; + diff = spaceFillingValue[index] - pred3D; + + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmpsize; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_INT16); + + //free memory + free(type); + free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +void SZ_compress_args_int16_NoCkRngeNoGzip_4D(unsigned char** newByteData, int16_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, +size_t *outSize, int64_t valueRangeSize, int64_t minValue) +{ + TightDataPointStorageI* tdps = SZ_compress_int16_4D_MDQ(oriData, r1, r2, r3, r4, realPrecision, valueRangeSize, minValue); + + convertTDPStoFlatBytes_int(tdps, newByteData, outSize); + + size_t dataLength = r1*r2*r3*r4; + if(*outSize>dataLength*sizeof(int16_t)) + SZ_compress_args_int16_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); + + free_TightDataPointStorageI(tdps); +} + +void SZ_compress_args_int16_withinRange(unsigned char** newByteData, int16_t *oriData, size_t dataLength, size_t *outSize) +{ + TightDataPointStorageI* tdps = (TightDataPointStorageI*) malloc(sizeof(TightDataPointStorageI)); + tdps->typeArray = NULL; + + tdps->allSameData = 1; + tdps->dataSeriesLength = dataLength; + tdps->exactDataBytes = (unsigned char*)malloc(sizeof(unsigned char)*2); + tdps->isLossless = 0; + //tdps->exactByteSize = 4; + tdps->exactDataNum = 1; + tdps->exactDataBytes_size = 2; + tdps->dataTypeSize = convertDataTypeSize(sizeof(int16_t)); + + int16_t value = oriData[0]; + int16ToBytes_bigEndian(tdps->exactDataBytes, value); + + size_t tmpOutSize; + convertTDPStoFlatBytes_int(tdps, newByteData, &tmpOutSize); + + *outSize = tmpOutSize;//3+1+sizeof(int16_t)+SZ_SIZE_TYPE; //8==3+1+4(int16_size) + free_TightDataPointStorageI(tdps); +} + +int SZ_compress_args_int16_wRngeNoGzip(unsigned char** newByteData, int16_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio) +{ + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + int64_t valueRangeSize = 0; + + int16_t minValue = computeRangeSize_int(oriData, SZ_INT16, dataLength, &valueRangeSize); + double realPrecision = getRealPrecision_int(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status); + + if(valueRangeSize <= realPrecision) + { + SZ_compress_args_int16_withinRange(newByteData, oriData, dataLength, outSize); + } + else + { +// SZ_compress_args_int16_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize); + if(r5==0&&r4==0&&r3==0&&r2==0) + { + SZ_compress_args_int16_NoCkRngeNoGzip_1D(newByteData, oriData, r1, realPrecision, outSize, valueRangeSize, minValue); + } + else if(r5==0&&r4==0&&r3==0) + { + SZ_compress_args_int16_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize, valueRangeSize, minValue); + } + else if(r5==0&&r4==0) + { + SZ_compress_args_int16_NoCkRngeNoGzip_3D(newByteData, oriData, r3, r2, r1, realPrecision, outSize, valueRangeSize, minValue); + } + else if(r5==0) + { + SZ_compress_args_int16_NoCkRngeNoGzip_3D(newByteData, oriData, r4*r3, r2, r1, realPrecision, outSize, valueRangeSize, minValue); + } + } + return status; +} + +int SZ_compress_args_int16(unsigned char** newByteData, int16_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio) +{ + confparams_cpr->errorBoundMode = errBoundMode; + + if(errBoundMode>=PW_REL) + { + printf("Error: Current SZ version doesn't support integer data compression with point-wise relative error bound being based on pwrType=AVG\n"); + exit(0); + return SZ_NSCS; + } + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + int64_t valueRangeSize = 0; + + int16_t minValue = (int16_t)computeRangeSize_int(oriData, SZ_INT16, dataLength, &valueRangeSize); + double realPrecision = 0; + + if(confparams_cpr->errorBoundMode==PSNR) + { + confparams_cpr->errorBoundMode = ABS; + realPrecision = confparams_cpr->absErrBound = computeABSErrBoundFromPSNR(confparams_cpr->psnr, (double)confparams_cpr->predThreshold, (double)valueRangeSize); + //printf("realPrecision=%lf\n", realPrecision); + } + else + realPrecision = getRealPrecision_int(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status); + + if(valueRangeSize <= realPrecision) + { + SZ_compress_args_int16_withinRange(newByteData, oriData, dataLength, outSize); + } + else + { + size_t tmpOutSize = 0; + unsigned char* tmpByteData; + if (r2==0) + { + SZ_compress_args_int16_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + if (r3==0) + { + SZ_compress_args_int16_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + if (r4==0) + { + SZ_compress_args_int16_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + if (r5==0) + { + SZ_compress_args_int16_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + { + printf("Error: doesn't support 5 dimensions for now.\n"); + status = SZ_DERR; //dimension error + } + //Call Gzip to do the further compression. + if(confparams_cpr->szMode==SZ_BEST_SPEED) + { + *outSize = tmpOutSize; + *newByteData = tmpByteData; + } + else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) + { + *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); + free(tmpByteData); + } + else + { + printf("Error: Wrong setting of confparams_cpr->szMode in the int16_t compression.\n"); + status = SZ_MERR; //mode error + } + } + + return status; +} diff --git a/deps/SZ/sz/src/sz_int32.c b/deps/SZ/sz/src/sz_int32.c new file mode 100644 index 0000000000000000000000000000000000000000..7b559c94a8461c0eed1fa4014d5fd15def2b2f3a --- /dev/null +++ b/deps/SZ/sz/src/sz_int32.c @@ -0,0 +1,1269 @@ +/** + * @file sz_int32.c + * @author Sheng Di + * @date Aug, 2017 + * @brief sz_int32, Compression and Decompression functions + * (C) 2017 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + + +#include +#include +#include +#include +#include +#include "sz.h" +#include "CompressElement.h" +#include "DynamicByteArray.h" +#include "DynamicIntArray.h" +#include "zlib.h" +#include "rw.h" +#include "TightDataPointStorageI.h" +#include "sz_int32.h" +#include "utility.h" + +unsigned int optimize_intervals_int32_1D(int32_t *oriData, size_t dataLength, double realPrecision) +{ + size_t i = 0, radiusIndex; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance; + for(i=2;isampleDistance==0) + { + //pred_value = 2*oriData[i-1] - oriData[i-2]; + pred_value = oriData[i-1]; + pred_err = llabs(pred_value - oriData[i]); + radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2); + return powerOf2; +} + +unsigned int optimize_intervals_int32_2D(int32_t *oriData, size_t r1, size_t r2, double realPrecision) +{ + size_t i,j, index; + size_t radiusIndex; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = r1*r2/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r2+j; + pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1]; + pred_err = llabs(pred_value - oriData[index]); + radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2); + return powerOf2; +} + +unsigned int optimize_intervals_int32_3D(int32_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision) +{ + size_t i,j,k, index; + size_t radiusIndex; + size_t r23=r2*r3; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)*(r3-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r23+j*r3+k; + pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] + - oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1]; + pred_err = llabs(pred_value - oriData[index]); + radiusIndex = (pred_err/realPrecision+1)/2; + if(radiusIndex>=confparams_cpr->maxRangeRadius) + { + radiusIndex = confparams_cpr->maxRangeRadius - 1; + //printf("radiusIndex=%d\n", radiusIndex); + } + intervals[radiusIndex]++; + } + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2); + return powerOf2; +} + + +unsigned int optimize_intervals_int32_4D(int32_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision) +{ + size_t i,j,k,l, index; + size_t radiusIndex; + size_t r234=r2*r3*r4; + size_t r34=r3*r4; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)*(r3-1)*(r4-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r234+j*r34+k*r4+l; + pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r34] + - oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1]; + pred_err = llabs(pred_value - oriData[index]); + radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + return powerOf2; +} + +TightDataPointStorageI* SZ_compress_int32_1D_MDQ(int32_t *oriData, size_t dataLength, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + quantization_intervals = optimize_intervals_int32_1D(oriData, dataLength, realPrecision); + else + quantization_intervals = exe_params->intvCapacity; + updateQuantizationInfo(quantization_intervals); + size_t i; + + int* type = (int*) malloc(dataLength*sizeof(int)); + + int32_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + int64_t last3CmprsData[3] = {0,0,0}; + + //add the first data + type[0] = 0; + compressInt32Value(spaceFillingValue[0], minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + listAdd_int(last3CmprsData, spaceFillingValue[0]); + + type[1] = 0; + compressInt32Value(spaceFillingValue[1], minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + listAdd_int(last3CmprsData, spaceFillingValue[1]); + //printf("%.30G\n",last3CmprsData[0]); + + int state; + double checkRadius = (exe_params->intvCapacity-1)*realPrecision; + int64_t curData; + int32_t pred, predAbsErr; + double interval = 2*realPrecision; + + for(i=2;i=pred) + { + type[i] = exe_params->intvRadius+state; + pred = pred + state*interval; + } + else //curDataintvRadius-state; + pred = pred - state*interval; + } +/* if(type[i]==0) + printf("err:type[%d]=0\n", i);*/ + listAdd_int(last3CmprsData, pred); + continue; + } + + //unpredictable data processing + type[i] = 0; + compressInt32Value(curData, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + listAdd_int(last3CmprsData, curData); + }//end of for + + size_t exactDataNum = exactDataByteArray->size / byteSize; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_INT32); + +//sdi:Debug +/* int sum =0; + for(i=0;iarray has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +void SZ_compress_args_int32_StoreOriData(int32_t* oriData, size_t dataLength, TightDataPointStorageI* tdps, +unsigned char** newByteData, size_t *outSize) +{ + int intSize=sizeof(int32_t); + size_t k = 0, i; + tdps->isLossless = 1; + size_t totalByteLength = 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + intSize*dataLength; + *newByteData = (unsigned char*)malloc(totalByteLength); + + unsigned char dsLengthBytes[8]; + for (i = 0; i < 3; i++)//3 + (*newByteData)[k++] = versionNumber[i]; + + if(exe_params->SZ_SIZE_TYPE==4)//1 + (*newByteData)[k++] = 16; //00010000 + else + (*newByteData)[k++] = 80; //01010000: 01000000 indicates the SZ_SIZE_TYPE=8 + + convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k])); + k = k + MetaDataByteLength; + + sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8 + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + (*newByteData)[k++] = dsLengthBytes[i]; + + if(sysEndianType==BIG_ENDIAN_SYSTEM) + memcpy((*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, oriData, dataLength*intSize); + else + { + unsigned char* p = (*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE; + for(i=0;i dataLength*sizeof(int32_t)) + SZ_compress_args_int32_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize); + free_TightDataPointStorageI(tdps); +} + +TightDataPointStorageI* SZ_compress_int32_2D_MDQ(int32_t *oriData, size_t r1, size_t r2, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_int32_2D(oriData, r1, r2, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i,j; + int32_t pred1D, pred2D, curValue; + int32_t diff = 0.0; + double itvNum = 0; + int32_t *P0, *P1; + + size_t dataLength = r1*r2; + + P0 = (int32_t*)malloc(r2*sizeof(int32_t)); + memset(P0, 0, r2*sizeof(int32_t)); + P1 = (int32_t*)malloc(r2*sizeof(int32_t)); + memset(P1, 0, r2*sizeof(int32_t)); + + int* type = (int*) malloc(dataLength*sizeof(int)); + //type[dataLength]=0; + + int32_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + type[0] = 0; + curValue = P1[0] = spaceFillingValue[0]; + compressInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + diff = spaceFillingValue[1] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + exe_params->intvRadius; + P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision; + } + else + { + type[1] = 0; + curValue = P1[1] = spaceFillingValue[1]; + compressInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process Row-0 data 2 --> data r2-1 */ + for (j = 2; j < r2; j++) + { + pred1D = 2*P1[j-1] - P1[j-2]; + diff = spaceFillingValue[j] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + exe_params->intvRadius; + P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision; + } + else + { + type[j] = 0; + curValue = P1[j] = spaceFillingValue[j]; + compressInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + + /* Process Row-1 --> Row-r1-1 */ + size_t index; + for (i = 1; i < r1; i++) + { + /* Process row-i data 0 */ + index = i*r2; + pred1D = P1[0]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P0[0] = spaceFillingValue[index]; + compressInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process row-i data 1 --> r2-1*/ + for (j = 1; j < r2; j++) + { + index = i*r2+j; + pred2D = P0[j-1] + P1[j] - P1[j-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P0[j] = spaceFillingValue[index]; + compressInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + + int32_t *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + + if(r2!=1) + free(P0); + free(P1); + + size_t exactDataNum = exactDataByteArray->size; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_INT32); + + //free memory + free(type); + free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +/** + * + * Note: @r1 is high dimension + * @r2 is low dimension + * */ +void SZ_compress_args_int32_NoCkRngeNoGzip_2D(unsigned char** newByteData, int32_t *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize, +int64_t valueRangeSize, int32_t minValue) +{ + TightDataPointStorageI* tdps = SZ_compress_int32_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, minValue); + + convertTDPStoFlatBytes_int(tdps, newByteData, outSize); + + size_t dataLength = r1*r2; + if(*outSize>dataLength*sizeof(int32_t)) + SZ_compress_args_int32_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); + + free_TightDataPointStorageI(tdps); +} + +TightDataPointStorageI* SZ_compress_int32_3D_MDQ(int32_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_int32_3D(oriData, r1, r2, r3, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i,j,k; + int32_t pred1D, pred2D, pred3D, curValue; + int32_t diff = 0.0; + double itvNum = 0; + int32_t *P0, *P1; + + size_t dataLength = r1*r2*r3; + + size_t r23 = r2*r3; + P0 = (int32_t*)malloc(r23*sizeof(int32_t)); + P1 = (int32_t*)malloc(r23*sizeof(int32_t)); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + int32_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + type[0] = 0; + P1[0] = spaceFillingValue[0]; + compressInt32Value(spaceFillingValue[0], minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + diff = spaceFillingValue[1] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + exe_params->intvRadius; + P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision; + } + else + { + type[1] = 0; + curValue = P1[1] = spaceFillingValue[1]; + compressInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process Row-0 data 2 --> data r3-1 */ + for (j = 2; j < r3; j++) + { + pred1D = 2*P1[j-1] - P1[j-2]; + diff = spaceFillingValue[j] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + exe_params->intvRadius; + P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision; + } + else + { + type[j] = 0; + curValue = P1[j] = spaceFillingValue[j]; + compressInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + + /* Process Row-1 --> Row-r2-1 */ + size_t index; + for (i = 1; i < r2; i++) + { + /* Process row-i data 0 */ + index = i*r3; + pred1D = P1[index-r3]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P1[index] = spaceFillingValue[index]; + compressInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process row-i data 1 --> data r3-1*/ + for (j = 1; j < r3; j++) + { + index = i*r3+j; + pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P1[index] = spaceFillingValue[index]; + compressInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + } + + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (k = 1; k < r1; k++) + { + /* Process Row-0 data 0*/ + index = k*r23; + pred1D = P1[0]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P0[0] = spaceFillingValue[index]; + compressInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + + /* Process Row-0 data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + //index = k*r2*r3+j; + index ++; + pred2D = P0[j-1] + P1[j] - P1[j-1]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; +/* if(type[index]==0) + printf("err:type[%d]=0, index4\n", index); */ + } + else + { + type[index] = 0; + curValue = P0[j] = spaceFillingValue[index]; + compressInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + + /* Process Row-1 --> Row-r2-1 */ + size_t index2D; + for (i = 1; i < r2; i++) + { + /* Process Row-i data 0 */ + index = k*r23 + i*r3; + index2D = i*r3; + pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P0[index2D] = spaceFillingValue[index]; + compressInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process Row-i data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { +// if(k==63&&i==43&&j==27) +// printf("i=%d\n", i); + //index = k*r2*r3 + i*r3 + j; + index ++; + index2D = i*r3 + j; + pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1]; + diff = spaceFillingValue[index] - pred3D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P0[index2D] = spaceFillingValue[index]; + compressInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + } + + int32_t *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + if(r23!=1) + free(P0); + free(P1); + + size_t exactDataNum = exactDataByteArray->size; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_INT32); + + //free memory + free(type); + free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + + +void SZ_compress_args_int32_NoCkRngeNoGzip_3D(unsigned char** newByteData, int32_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, +int64_t valueRangeSize, int64_t minValue) +{ + TightDataPointStorageI* tdps = SZ_compress_int32_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, minValue); + + convertTDPStoFlatBytes_int(tdps, newByteData, outSize); + + size_t dataLength = r1*r2*r3; + if(*outSize>dataLength*sizeof(int32_t)) + SZ_compress_args_int32_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); + + free_TightDataPointStorageI(tdps); +} + + +TightDataPointStorageI* SZ_compress_int32_4D_MDQ(int32_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_int32_4D(oriData, r1, r2, r3, r4, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i,j,k; + int32_t pred1D, pred2D, pred3D, curValue; + int32_t diff = 0.0; + double itvNum = 0; + int32_t *P0, *P1; + + size_t dataLength = r1*r2*r3*r4; + + size_t r234 = r2*r3*r4; + size_t r34 = r3*r4; + + P0 = (int32_t*)malloc(r34*sizeof(int32_t)); + P1 = (int32_t*)malloc(r34*sizeof(int32_t)); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + int32_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + size_t l; + for (l = 0; l < r1; l++) + { + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + size_t index = l*r234; + size_t index2D = 0; + + type[index] = 0; + curValue = P1[index2D] = spaceFillingValue[index]; + compressInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + + /* Process Row-0 data 1*/ + index = l*r234+1; + index2D = 1; + + pred1D = P1[index2D-1]; + diff = curValue - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P1[index2D] = spaceFillingValue[0]; + compressInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process Row-0 data 2 --> data r4-1 */ + for (j = 2; j < r4; j++) + { + index = l*r234+j; + index2D = j; + + pred1D = 2*P1[index2D-1] - P1[index2D-2]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P1[index2D] = spaceFillingValue[0]; + compressInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (i = 1; i < r3; i++) + { + /* Process row-i data 0 */ + index = l*r234+i*r4; + index2D = i*r4; + + pred1D = P1[index2D-r4]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P1[index2D] = spaceFillingValue[0]; + compressInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process row-i data 1 --> data r4-1*/ + for (j = 1; j < r4; j++) + { + index = l*r234+i*r4+j; + index2D = i*r4+j; + + pred2D = P1[index2D-1] + P1[index2D-r4] - P1[index2D-r4-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P1[index2D] = spaceFillingValue[0]; + compressInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + } + + + /////////////////////////// Process layer-1 --> layer-r2-1 /////////////////////////// + + for (k = 1; k < r2; k++) + { + /* Process Row-0 data 0*/ + index = l*r234+k*r34; + index2D = 0; + + pred1D = P1[index2D]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P0[index2D] = spaceFillingValue[0]; + compressInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process Row-0 data 1 --> data r4-1 */ + for (j = 1; j < r4; j++) + { + index = l*r234+k*r34+j; + index2D = j; + + pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P0[index2D] = spaceFillingValue[0]; + compressInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (i = 1; i < r3; i++) + { + /* Process Row-i data 0 */ + index = l*r234+k*r34+i*r4; + index2D = i*r4; + + pred2D = P0[index2D-r4] + P1[index2D] - P1[index2D-r4]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P0[index2D] = spaceFillingValue[0]; + compressInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process Row-i data 1 --> data r4-1 */ + for (j = 1; j < r4; j++) + { + index = l*r234+k*r34+i*r4+j; + index2D = i*r4+j; + + pred3D = P0[index2D-1] + P0[index2D-r4]+ P1[index2D] - P0[index2D-r4-1] - P1[index2D-r4] - P1[index2D-1] + P1[index2D-r4-1]; + diff = spaceFillingValue[index] - pred3D; + + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P0[index2D] = spaceFillingValue[0]; + compressInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + } + + int32_t *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + } + + free(P0); + free(P1); + + size_t exactDataNum = exactDataByteArray->size; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_INT32); + + //free memory + free(type); + free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +void SZ_compress_args_int32_NoCkRngeNoGzip_4D(unsigned char** newByteData, int32_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, +size_t *outSize, int64_t valueRangeSize, int64_t minValue) +{ + TightDataPointStorageI* tdps = SZ_compress_int32_4D_MDQ(oriData, r1, r2, r3, r4, realPrecision, valueRangeSize, minValue); + + convertTDPStoFlatBytes_int(tdps, newByteData, outSize); + + size_t dataLength = r1*r2*r3*r4; + if(*outSize>dataLength*sizeof(int32_t)) + SZ_compress_args_int32_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); + + free_TightDataPointStorageI(tdps); +} + +void SZ_compress_args_int32_withinRange(unsigned char** newByteData, int32_t *oriData, size_t dataLength, size_t *outSize) +{ + TightDataPointStorageI* tdps = (TightDataPointStorageI*) malloc(sizeof(TightDataPointStorageI)); + tdps->typeArray = NULL; + + tdps->allSameData = 1; + tdps->dataSeriesLength = dataLength; + tdps->exactDataBytes = (unsigned char*)malloc(sizeof(unsigned char)*4); + tdps->isLossless = 0; + //tdps->exactByteSize = 4; + tdps->exactDataNum = 1; + tdps->exactDataBytes_size = 4; + tdps->dataTypeSize = convertDataTypeSize(sizeof(int32_t)); + + int32_t value = oriData[0]; + int32ToBytes_bigEndian(tdps->exactDataBytes, value); + + size_t tmpOutSize; + convertTDPStoFlatBytes_int(tdps, newByteData, &tmpOutSize); + + *outSize = tmpOutSize;//3+1+sizeof(int32_t)+SZ_SIZE_TYPE; //8==3+1+4(int32_size) + free_TightDataPointStorageI(tdps); +} + +int SZ_compress_args_int32_wRngeNoGzip(unsigned char** newByteData, int32_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio) +{ + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + int64_t valueRangeSize = 0; + + int32_t minValue = computeRangeSize_int(oriData, SZ_INT32, dataLength, &valueRangeSize); + double realPrecision = getRealPrecision_int(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status); + + if(valueRangeSize <= realPrecision) + { + SZ_compress_args_int32_withinRange(newByteData, oriData, dataLength, outSize); + } + else + { +// SZ_compress_args_int32_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize); + if(r5==0&&r4==0&&r3==0&&r2==0) + { + SZ_compress_args_int32_NoCkRngeNoGzip_1D(newByteData, oriData, r1, realPrecision, outSize, valueRangeSize, minValue); + } + else if(r5==0&&r4==0&&r3==0) + { + SZ_compress_args_int32_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize, valueRangeSize, minValue); + } + else if(r5==0&&r4==0) + { + SZ_compress_args_int32_NoCkRngeNoGzip_3D(newByteData, oriData, r3, r2, r1, realPrecision, outSize, valueRangeSize, minValue); + } + else if(r5==0) + { + SZ_compress_args_int32_NoCkRngeNoGzip_3D(newByteData, oriData, r4*r3, r2, r1, realPrecision, outSize, valueRangeSize, minValue); + } + } + return status; +} + +int SZ_compress_args_int32(unsigned char** newByteData, int32_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio) +{ + confparams_cpr->errorBoundMode = errBoundMode; + + if(errBoundMode>=PW_REL) + { + printf("Error: Current SZ version doesn't support integer data compression with point-wise relative error bound being based on pwrType=AVG\n"); + exit(0); + return SZ_NSCS; + } + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + int64_t valueRangeSize = 0; + + int32_t minValue = (int32_t)computeRangeSize_int(oriData, SZ_INT32, dataLength, &valueRangeSize); + double realPrecision = 0; + + if(confparams_cpr->errorBoundMode==PSNR) + { + confparams_cpr->errorBoundMode = ABS; + realPrecision = confparams_cpr->absErrBound = computeABSErrBoundFromPSNR(confparams_cpr->psnr, (double)confparams_cpr->predThreshold, (double)valueRangeSize); + //printf("realPrecision=%lf\n", realPrecision); + } + else + realPrecision = getRealPrecision_int(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status); + + if(valueRangeSize <= realPrecision) + { + SZ_compress_args_int32_withinRange(newByteData, oriData, dataLength, outSize); + } + else + { + size_t tmpOutSize = 0; + unsigned char* tmpByteData; + if (r2==0) + { + SZ_compress_args_int32_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + if (r3==0) + { + SZ_compress_args_int32_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + if (r4==0) + { + SZ_compress_args_int32_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + if (r5==0) + { + SZ_compress_args_int32_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + { + printf("Error: doesn't support 5 dimensions for now.\n"); + status = SZ_DERR; //dimension error + } + //Call Gzip to do the further compression. + if(confparams_cpr->szMode==SZ_BEST_SPEED) + { + *outSize = tmpOutSize; + *newByteData = tmpByteData; + } + else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) + { + *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); + free(tmpByteData); + } + else + { + printf("Error: Wrong setting of confparams_cpr->szMode in the int32_t compression.\n"); + status = SZ_MERR; //mode error + } + } + + return status; +} diff --git a/deps/SZ/sz/src/sz_int64.c b/deps/SZ/sz/src/sz_int64.c new file mode 100644 index 0000000000000000000000000000000000000000..065fb16e49dcd2e68a546610a85d9b3f17c44154 --- /dev/null +++ b/deps/SZ/sz/src/sz_int64.c @@ -0,0 +1,1269 @@ +/** + * @file sz_int64.c + * @author Sheng Di + * @date Aug, 2017 + * @brief sz_int64, Compression and Decompression functions + * (C) 2017 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + + +#include +#include +#include +#include +#include +#include "sz.h" +#include "CompressElement.h" +#include "DynamicByteArray.h" +#include "DynamicIntArray.h" +#include "zlib.h" +#include "rw.h" +#include "TightDataPointStorageI.h" +#include "sz_int64.h" +#include "utility.h" + +unsigned int optimize_intervals_int64_1D(int64_t *oriData, size_t dataLength, double realPrecision) +{ + size_t i = 0, radiusIndex; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance; + for(i=2;isampleDistance==0) + { + //pred_value = 2*oriData[i-1] - oriData[i-2]; + pred_value = oriData[i-1]; + pred_err = llabs(pred_value - oriData[i]); + radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2); + return powerOf2; +} + +unsigned int optimize_intervals_int64_2D(int64_t *oriData, size_t r1, size_t r2, double realPrecision) +{ + size_t i,j, index; + size_t radiusIndex; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r2+j; + pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1]; + pred_err = llabs(pred_value - oriData[index]); + radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2); + return powerOf2; +} + +unsigned int optimize_intervals_int64_3D(int64_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision) +{ + size_t i,j,k, index; + size_t radiusIndex; + size_t r23=r2*r3; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)*(r3-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r23+j*r3+k; + pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] + - oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1]; + pred_err = llabs(pred_value - oriData[index]); + radiusIndex = (pred_err/realPrecision+1)/2; + if(radiusIndex>=confparams_cpr->maxRangeRadius) + { + radiusIndex = confparams_cpr->maxRangeRadius - 1; + //printf("radiusIndex=%d\n", radiusIndex); + } + intervals[radiusIndex]++; + } + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2); + return powerOf2; +} + + +unsigned int optimize_intervals_int64_4D(int64_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision) +{ + size_t i,j,k,l, index; + size_t radiusIndex; + size_t r234=r2*r3*r4; + size_t r34=r3*r4; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)*(r3-1)*(r4-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r234+j*r34+k*r4+l; + pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r34] + - oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1]; + pred_err = llabs(pred_value - oriData[index]); + radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + return powerOf2; +} + +TightDataPointStorageI* SZ_compress_int64_1D_MDQ(int64_t *oriData, size_t dataLength, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + quantization_intervals = optimize_intervals_int64_1D(oriData, dataLength, realPrecision); + else + quantization_intervals = exe_params->intvCapacity; + updateQuantizationInfo(quantization_intervals); + size_t i; + + int* type = (int*) malloc(dataLength*sizeof(int)); + + int64_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + int64_t last3CmprsData[3] = {0,0,0}; + + //add the first data + type[0] = 0; + compressInt64Value(spaceFillingValue[0], minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + listAdd_int(last3CmprsData, spaceFillingValue[0]); + + type[1] = 0; + compressInt64Value(spaceFillingValue[1], minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + listAdd_int(last3CmprsData, spaceFillingValue[1]); + //printf("%.30G\n",last3CmprsData[0]); + + int state; + double checkRadius = (exe_params->intvCapacity-1)*realPrecision; + int64_t curData; + int64_t pred; + int64_t predAbsErr; + double interval = 2*realPrecision; + + for(i=2;i=pred) + { + type[i] = exe_params->intvRadius+state; + pred = pred + state*interval; + } + else //curDataintvRadius-state; + pred = pred - state*interval; + } +/* if(type[i]==0) + printf("err:type[%d]=0\n", i);*/ + listAdd_int(last3CmprsData, pred); + continue; + } + + //unpredictable data processing + type[i] = 0; + compressInt64Value(curData, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + listAdd_int(last3CmprsData, curData); + }//end of for + + size_t exactDataNum = exactDataByteArray->size / byteSize; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_INT64); + +//sdi:Debug +/* int sum =0; + for(i=0;iarray has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +void SZ_compress_args_int64_StoreOriData(int64_t* oriData, size_t dataLength, TightDataPointStorageI* tdps, +unsigned char** newByteData, size_t *outSize) +{ + int intSize=sizeof(int64_t); + size_t k = 0, i; + tdps->isLossless = 1; + size_t totalByteLength = 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + intSize*dataLength; + *newByteData = (unsigned char*)malloc(totalByteLength); + + unsigned char dsLengthBytes[8]; + for (i = 0; i < 3; i++)//3 + (*newByteData)[k++] = versionNumber[i]; + + if(exe_params->SZ_SIZE_TYPE==4)//1 + (*newByteData)[k++] = 16; //00010000 + else + (*newByteData)[k++] = 80; //01010000: 01000000 indicates the SZ_SIZE_TYPE=8 + + convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k])); + k = k + MetaDataByteLength; + + sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8 + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + (*newByteData)[k++] = dsLengthBytes[i]; + + if(sysEndianType==BIG_ENDIAN_SYSTEM) + memcpy((*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, oriData, dataLength*intSize); + else + { + unsigned char* p = (*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE; + for(i=0;i dataLength*sizeof(int64_t)) + SZ_compress_args_int64_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize); + free_TightDataPointStorageI(tdps); +} + +TightDataPointStorageI* SZ_compress_int64_2D_MDQ(int64_t *oriData, size_t r1, size_t r2, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_int64_2D(oriData, r1, r2, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i,j; + int64_t pred1D, pred2D, curValue; + int64_t diff = 0.0; + double itvNum = 0; + int64_t *P0, *P1; + + size_t dataLength = r1*r2; + + P0 = (int64_t*)malloc(r2*sizeof(int64_t)); + memset(P0, 0, r2*sizeof(int64_t)); + P1 = (int64_t*)malloc(r2*sizeof(int64_t)); + memset(P1, 0, r2*sizeof(int64_t)); + + int* type = (int*) malloc(dataLength*sizeof(int)); + //type[dataLength]=0; + + int64_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + type[0] = 0; + curValue = P1[0] = spaceFillingValue[0]; + compressInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + diff = spaceFillingValue[1] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + exe_params->intvRadius; + P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision; + } + else + { + type[1] = 0; + curValue = P1[1] = spaceFillingValue[1]; + compressInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process Row-0 data 2 --> data r2-1 */ + for (j = 2; j < r2; j++) + { + pred1D = 2*P1[j-1] - P1[j-2]; + diff = spaceFillingValue[j] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + exe_params->intvRadius; + P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision; + } + else + { + type[j] = 0; + curValue = P1[j] = spaceFillingValue[j]; + compressInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + + /* Process Row-1 --> Row-r1-1 */ + size_t index; + for (i = 1; i < r1; i++) + { + /* Process row-i data 0 */ + index = i*r2; + pred1D = P1[0]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P0[0] = spaceFillingValue[index]; + compressInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process row-i data 1 --> r2-1*/ + for (j = 1; j < r2; j++) + { + index = i*r2+j; + pred2D = P0[j-1] + P1[j] - P1[j-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P0[j] = spaceFillingValue[index]; + compressInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + + int64_t *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + + if(r2!=1) + free(P0); + free(P1); + + size_t exactDataNum = exactDataByteArray->size; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_INT64); + + //free memory + free(type); + free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +/** + * + * Note: @r1 is high dimension + * @r2 is low dimension + * */ +void SZ_compress_args_int64_NoCkRngeNoGzip_2D(unsigned char** newByteData, int64_t *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize, +int64_t valueRangeSize, int64_t minValue) +{ + TightDataPointStorageI* tdps = SZ_compress_int64_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, minValue); + + convertTDPStoFlatBytes_int(tdps, newByteData, outSize); + + size_t dataLength = r1*r2; + if(*outSize>dataLength*sizeof(int64_t)) + SZ_compress_args_int64_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); + + free_TightDataPointStorageI(tdps); +} + +TightDataPointStorageI* SZ_compress_int64_3D_MDQ(int64_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_int64_3D(oriData, r1, r2, r3, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i,j,k; + int64_t pred1D, pred2D, pred3D, curValue; + int64_t diff = 0.0; + double itvNum = 0; + int64_t *P0, *P1; + + size_t dataLength = r1*r2*r3; + + size_t r23 = r2*r3; + P0 = (int64_t*)malloc(r23*sizeof(int64_t)); + P1 = (int64_t*)malloc(r23*sizeof(int64_t)); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + int64_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + type[0] = 0; + P1[0] = spaceFillingValue[0]; + compressInt64Value(spaceFillingValue[0], minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + diff = spaceFillingValue[1] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + exe_params->intvRadius; + P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision; + } + else + { + type[1] = 0; + curValue = P1[1] = spaceFillingValue[1]; + compressInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process Row-0 data 2 --> data r3-1 */ + for (j = 2; j < r3; j++) + { + pred1D = 2*P1[j-1] - P1[j-2]; + diff = spaceFillingValue[j] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + exe_params->intvRadius; + P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision; + } + else + { + type[j] = 0; + curValue = P1[j] = spaceFillingValue[j]; + compressInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + + /* Process Row-1 --> Row-r2-1 */ + size_t index; + for (i = 1; i < r2; i++) + { + /* Process row-i data 0 */ + index = i*r3; + pred1D = P1[index-r3]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P1[index] = spaceFillingValue[index]; + compressInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process row-i data 1 --> data r3-1*/ + for (j = 1; j < r3; j++) + { + index = i*r3+j; + pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P1[index] = spaceFillingValue[index]; + compressInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + } + + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (k = 1; k < r1; k++) + { + /* Process Row-0 data 0*/ + index = k*r23; + pred1D = P1[0]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P0[0] = spaceFillingValue[index]; + compressInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + + /* Process Row-0 data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + //index = k*r2*r3+j; + index ++; + pred2D = P0[j-1] + P1[j] - P1[j-1]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; +/* if(type[index]==0) + printf("err:type[%d]=0, index4\n", index); */ + } + else + { + type[index] = 0; + curValue = P0[j] = spaceFillingValue[index]; + compressInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + + /* Process Row-1 --> Row-r2-1 */ + size_t index2D; + for (i = 1; i < r2; i++) + { + /* Process Row-i data 0 */ + index = k*r23 + i*r3; + index2D = i*r3; + pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P0[index2D] = spaceFillingValue[index]; + compressInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process Row-i data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { +// if(k==63&&i==43&&j==27) +// printf("i=%d\n", i); + //index = k*r2*r3 + i*r3 + j; + index ++; + index2D = i*r3 + j; + pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1]; + diff = spaceFillingValue[index] - pred3D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P0[index2D] = spaceFillingValue[index]; + compressInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + } + + int64_t *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + if(r23!=1) + free(P0); + free(P1); + + size_t exactDataNum = exactDataByteArray->size; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_INT64); + + //free memory + free(type); + free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + + +void SZ_compress_args_int64_NoCkRngeNoGzip_3D(unsigned char** newByteData, int64_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, +int64_t valueRangeSize, int64_t minValue) +{ + TightDataPointStorageI* tdps = SZ_compress_int64_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, minValue); + + convertTDPStoFlatBytes_int(tdps, newByteData, outSize); + + size_t dataLength = r1*r2*r3; + if(*outSize>dataLength*sizeof(int64_t)) + SZ_compress_args_int64_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); + + free_TightDataPointStorageI(tdps); +} + + +TightDataPointStorageI* SZ_compress_int64_4D_MDQ(int64_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_int64_4D(oriData, r1, r2, r3, r4, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i,j,k; + int64_t pred1D, pred2D, pred3D, curValue; + int64_t diff = 0.0; + double itvNum = 0; + int64_t *P0, *P1; + + size_t dataLength = r1*r2*r3*r4; + + size_t r234 = r2*r3*r4; + size_t r34 = r3*r4; + + P0 = (int64_t*)malloc(r34*sizeof(int64_t)); + P1 = (int64_t*)malloc(r34*sizeof(int64_t)); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + int64_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + size_t l; + for (l = 0; l < r1; l++) + { + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + size_t index = l*r234; + size_t index2D = 0; + + type[index] = 0; + curValue = P1[index2D] = spaceFillingValue[index]; + compressInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + + /* Process Row-0 data 1*/ + index = l*r234+1; + index2D = 1; + + pred1D = P1[index2D-1]; + diff = curValue - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P1[index2D] = spaceFillingValue[0]; + compressInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process Row-0 data 2 --> data r4-1 */ + for (j = 2; j < r4; j++) + { + index = l*r234+j; + index2D = j; + + pred1D = 2*P1[index2D-1] - P1[index2D-2]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P1[index2D] = spaceFillingValue[0]; + compressInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (i = 1; i < r3; i++) + { + /* Process row-i data 0 */ + index = l*r234+i*r4; + index2D = i*r4; + + pred1D = P1[index2D-r4]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P1[index2D] = spaceFillingValue[0]; + compressInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process row-i data 1 --> data r4-1*/ + for (j = 1; j < r4; j++) + { + index = l*r234+i*r4+j; + index2D = i*r4+j; + + pred2D = P1[index2D-1] + P1[index2D-r4] - P1[index2D-r4-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P1[index2D] = spaceFillingValue[0]; + compressInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + } + + + /////////////////////////// Process layer-1 --> layer-r2-1 /////////////////////////// + + for (k = 1; k < r2; k++) + { + /* Process Row-0 data 0*/ + index = l*r234+k*r34; + index2D = 0; + + pred1D = P1[index2D]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P0[index2D] = spaceFillingValue[0]; + compressInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process Row-0 data 1 --> data r4-1 */ + for (j = 1; j < r4; j++) + { + index = l*r234+k*r34+j; + index2D = j; + + pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P0[index2D] = spaceFillingValue[0]; + compressInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (i = 1; i < r3; i++) + { + /* Process Row-i data 0 */ + index = l*r234+k*r34+i*r4; + index2D = i*r4; + + pred2D = P0[index2D-r4] + P1[index2D] - P1[index2D-r4]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P0[index2D] = spaceFillingValue[0]; + compressInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process Row-i data 1 --> data r4-1 */ + for (j = 1; j < r4; j++) + { + index = l*r234+k*r34+i*r4+j; + index2D = i*r4+j; + + pred3D = P0[index2D-1] + P0[index2D-r4]+ P1[index2D] - P0[index2D-r4-1] - P1[index2D-r4] - P1[index2D-1] + P1[index2D-r4-1]; + diff = spaceFillingValue[index] - pred3D; + + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P0[index2D] = spaceFillingValue[0]; + compressInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + } + + int64_t *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + } + + free(P0); + free(P1); + + size_t exactDataNum = exactDataByteArray->size; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_INT64); + + //free memory + free(type); + free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +void SZ_compress_args_int64_NoCkRngeNoGzip_4D(unsigned char** newByteData, int64_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, +size_t *outSize, int64_t valueRangeSize, int64_t minValue) +{ + TightDataPointStorageI* tdps = SZ_compress_int64_4D_MDQ(oriData, r1, r2, r3, r4, realPrecision, valueRangeSize, minValue); + + convertTDPStoFlatBytes_int(tdps, newByteData, outSize); + + size_t dataLength = r1*r2*r3*r4; + if(*outSize>dataLength*sizeof(int64_t)) + SZ_compress_args_int64_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); + + free_TightDataPointStorageI(tdps); +} + +void SZ_compress_args_int64_withinRange(unsigned char** newByteData, int64_t *oriData, size_t dataLength, size_t *outSize) +{ + TightDataPointStorageI* tdps = (TightDataPointStorageI*) malloc(sizeof(TightDataPointStorageI)); + tdps->typeArray = NULL; + + tdps->allSameData = 1; + tdps->dataSeriesLength = dataLength; + tdps->exactDataBytes = (unsigned char*)malloc(sizeof(unsigned char)*8); + tdps->isLossless = 0; + //tdps->exactByteSize = 4; + tdps->exactDataNum = 1; + tdps->exactDataBytes_size = 8; + + int64_t value = oriData[0]; + int64ToBytes_bigEndian(tdps->exactDataBytes, value); + + size_t tmpOutSize; + convertTDPStoFlatBytes_int(tdps, newByteData, &tmpOutSize); + + *outSize = tmpOutSize;//3+1+sizeof(int64_t)+SZ_SIZE_TYPE; //8==3+1+4(int64_size) + free_TightDataPointStorageI(tdps); +} + +int SZ_compress_args_int64_wRngeNoGzip(unsigned char** newByteData, int64_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio) +{ + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + int64_t valueRangeSize = 0; + + int64_t minValue = computeRangeSize_int(oriData, SZ_INT64, dataLength, &valueRangeSize); + double realPrecision = getRealPrecision_int(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status); + + if(valueRangeSize <= realPrecision) + { + SZ_compress_args_int64_withinRange(newByteData, oriData, dataLength, outSize); + } + else + { +// SZ_compress_args_int64_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize); + if(r5==0&&r4==0&&r3==0&&r2==0) + { + SZ_compress_args_int64_NoCkRngeNoGzip_1D(newByteData, oriData, r1, realPrecision, outSize, valueRangeSize, minValue); + } + else if(r5==0&&r4==0&&r3==0) + { + SZ_compress_args_int64_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize, valueRangeSize, minValue); + } + else if(r5==0&&r4==0) + { + SZ_compress_args_int64_NoCkRngeNoGzip_3D(newByteData, oriData, r3, r2, r1, realPrecision, outSize, valueRangeSize, minValue); + } + else if(r5==0) + { + SZ_compress_args_int64_NoCkRngeNoGzip_3D(newByteData, oriData, r4*r3, r2, r1, realPrecision, outSize, valueRangeSize, minValue); + } + } + return status; +} + +int SZ_compress_args_int64(unsigned char** newByteData, int64_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio) +{ + confparams_cpr->errorBoundMode = errBoundMode; + + if(errBoundMode>=PW_REL) + { + printf("Error: Current SZ version doesn't support integer data compression with point-wise relative error bound being based on pwrType=AVG\n"); + exit(0); + return SZ_NSCS; + } + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + int64_t valueRangeSize = 0; + + int64_t minValue = (int64_t)computeRangeSize_int(oriData, SZ_INT64, dataLength, &valueRangeSize); + double realPrecision = 0; + + if(confparams_cpr->errorBoundMode==PSNR) + { + confparams_cpr->errorBoundMode = ABS; + realPrecision = confparams_cpr->absErrBound = computeABSErrBoundFromPSNR(confparams_cpr->psnr, (double)confparams_cpr->predThreshold, (double)valueRangeSize); + //printf("realPrecision=%lf\n", realPrecision); + } + else + realPrecision = getRealPrecision_int(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status); + + if(valueRangeSize <= realPrecision) + { + SZ_compress_args_int64_withinRange(newByteData, oriData, dataLength, outSize); + } + else + { + size_t tmpOutSize = 0; + unsigned char* tmpByteData; + if (r2==0) + { + SZ_compress_args_int64_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + if (r3==0) + { + SZ_compress_args_int64_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + if (r4==0) + { + SZ_compress_args_int64_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + if (r5==0) + { + SZ_compress_args_int64_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + { + printf("Error: doesn't support 5 dimensions for now.\n"); + status = SZ_DERR; //dimension error + } + //Call Gzip to do the further compression. + if(confparams_cpr->szMode==SZ_BEST_SPEED) + { + *outSize = tmpOutSize; + *newByteData = tmpByteData; + } + else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) + { + *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); + free(tmpByteData); + } + else + { + printf("Error: Wrong setting of confparams_cpr->szMode in the int64_t compression.\n"); + status = SZ_MERR; //mode error + } + } + + return status; +} diff --git a/deps/SZ/sz/src/sz_int8.c b/deps/SZ/sz/src/sz_int8.c new file mode 100644 index 0000000000000000000000000000000000000000..83febd0de64b14be3915a9fa81e5fdb907345fe6 --- /dev/null +++ b/deps/SZ/sz/src/sz_int8.c @@ -0,0 +1,1385 @@ +/** + * @file sz_int8.c + * @author Sheng Di + * @date Aug, 2017 + * @brief sz_int8, Compression and Decompression functions + * (C) 2017 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + + +#include +#include +#include +#include +#include +#include "sz.h" +#include "CompressElement.h" +#include "DynamicByteArray.h" +#include "DynamicIntArray.h" +#include "zlib.h" +#include "rw.h" +#include "TightDataPointStorageI.h" +#include "sz_int8.h" +#include "utility.h" + +unsigned int optimize_intervals_int8_1D(int8_t *oriData, size_t dataLength, double realPrecision) +{ + size_t i = 0, radiusIndex; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance; + for(i=2;isampleDistance==0) + { + //pred_value = 2*oriData[i-1] - oriData[i-2]; + pred_value = oriData[i-1]; + pred_err = llabs(pred_value - oriData[i]); + radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2); + return powerOf2; +} + +unsigned int optimize_intervals_int8_2D(int8_t *oriData, size_t r1, size_t r2, double realPrecision) +{ + size_t i,j, index; + size_t radiusIndex; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r2+j; + pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1]; + pred_err = llabs(pred_value - oriData[index]); + radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2); + return powerOf2; +} + +unsigned int optimize_intervals_int8_3D(int8_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision) +{ + size_t i,j,k, index; + size_t radiusIndex; + size_t r23=r2*r3; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)*(r3-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r23+j*r3+k; + pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] + - oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1]; + pred_err = llabs(pred_value - oriData[index]); + radiusIndex = (pred_err/realPrecision+1)/2; + if(radiusIndex>=confparams_cpr->maxRangeRadius) + { + radiusIndex = confparams_cpr->maxRangeRadius - 1; + //printf("radiusIndex=%d\n", radiusIndex); + } + intervals[radiusIndex]++; + } + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2); + return powerOf2; +} + + +unsigned int optimize_intervals_int8_4D(int8_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision) +{ + size_t i,j,k,l, index; + size_t radiusIndex; + size_t r234=r2*r3*r4; + size_t r34=r3*r4; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)*(r3-1)*(r4-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r234+j*r34+k*r4+l; + pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r34] + - oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1]; + pred_err = llabs(pred_value - oriData[index]); + radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + return powerOf2; +} + +TightDataPointStorageI* SZ_compress_int8_1D_MDQ(int8_t *oriData, size_t dataLength, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + quantization_intervals = optimize_intervals_int8_1D(oriData, dataLength, realPrecision); + else + quantization_intervals = exe_params->intvCapacity; + updateQuantizationInfo(quantization_intervals); + size_t i; + + int* type = (int*) malloc(dataLength*sizeof(int)); + + int8_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + int64_t last3CmprsData[3] = {0,0,0}; + + //add the first data + type[0] = 0; + compressInt8Value(spaceFillingValue[0], minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + listAdd_int(last3CmprsData, spaceFillingValue[0]); + + type[1] = 0; + compressInt8Value(spaceFillingValue[1], minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + listAdd_int(last3CmprsData, spaceFillingValue[1]); + //printf("%.30G\n",last3CmprsData[0]); + + int state; + double checkRadius = (exe_params->intvCapacity-1)*realPrecision; + int64_t curData; + int64_t pred, predAbsErr; + double interval = 2*realPrecision; + + for(i=2;i=pred) + { + type[i] = exe_params->intvRadius+state; + pred = pred + state*interval; + } + else //curDataintvRadius-state; + pred = pred - state*interval; + } + if(pred>SZ_INT8_MAX) pred = SZ_INT8_MAX; + if(predsize / byteSize; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_INT8); + +//sdi:Debug +/* int sum =0; + for(i=0;iarray has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +void SZ_compress_args_int8_StoreOriData(int8_t* oriData, size_t dataLength, TightDataPointStorageI* tdps, +unsigned char** newByteData, size_t *outSize) +{ + int intSize=sizeof(int8_t); + size_t k = 0, i; + tdps->isLossless = 1; + size_t totalByteLength = 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + intSize*dataLength; + *newByteData = (unsigned char*)malloc(totalByteLength); + + unsigned char dsLengthBytes[8]; + for (i = 0; i < 3; i++)//3 + (*newByteData)[k++] = versionNumber[i]; + + if(exe_params->SZ_SIZE_TYPE==4)//1 + (*newByteData)[k++] = 16; //00010000 + else + (*newByteData)[k++] = 80; //01010000: 01000000 indicates the SZ_SIZE_TYPE=8 + + convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k])); + k = k + MetaDataByteLength; + + sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8 + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + (*newByteData)[k++] = dsLengthBytes[i]; + + if(sysEndianType==BIG_ENDIAN_SYSTEM) + memcpy((*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, oriData, dataLength*intSize); + else + { + unsigned char* p = (*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE; + for(i=0;i dataLength*sizeof(int8_t)) + SZ_compress_args_int8_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize); + free_TightDataPointStorageI(tdps); +} + +TightDataPointStorageI* SZ_compress_int8_2D_MDQ(int8_t *oriData, size_t r1, size_t r2, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_int8_2D(oriData, r1, r2, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i,j; + int64_t pred1D, pred2D, curValue, tmp; + int diff = 0.0; + double itvNum = 0; + int8_t *P0, *P1; + + size_t dataLength = r1*r2; + + P0 = (int8_t*)malloc(r2*sizeof(int8_t)); + memset(P0, 0, r2*sizeof(int8_t)); + P1 = (int8_t*)malloc(r2*sizeof(int8_t)); + memset(P1, 0, r2*sizeof(int8_t)); + + int* type = (int*) malloc(dataLength*sizeof(int)); + //type[dataLength]=0; + + int8_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + type[0] = 0; + curValue = P1[0] = spaceFillingValue[0]; + compressInt8Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + diff = spaceFillingValue[1] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp data r2-1 */ + for (j = 2; j < r2; j++) + { + pred1D = 2*P1[j-1] - P1[j-2]; + diff = spaceFillingValue[j] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp Row-r1-1 */ + size_t index; + for (i = 1; i < r1; i++) + { + /* Process row-i data 0 */ + index = i*r2; + pred1D = P1[0]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp r2-1*/ + for (j = 1; j < r2; j++) + { + index = i*r2+j; + pred2D = P0[j-1] + P1[j] - P1[j-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmpsize; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_INT8); + + //free memory + free(type); + free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +/** + * + * Note: @r1 is high dimension + * @r2 is low dimension + * */ +void SZ_compress_args_int8_NoCkRngeNoGzip_2D(unsigned char** newByteData, int8_t *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize, +int64_t valueRangeSize, int8_t minValue) +{ + TightDataPointStorageI* tdps = SZ_compress_int8_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, minValue); + + convertTDPStoFlatBytes_int(tdps, newByteData, outSize); + + size_t dataLength = r1*r2; + if(*outSize>dataLength*sizeof(int8_t)) + SZ_compress_args_int8_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); + + free_TightDataPointStorageI(tdps); +} + +TightDataPointStorageI* SZ_compress_int8_3D_MDQ(int8_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_int8_3D(oriData, r1, r2, r3, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i,j,k; + int64_t pred1D, pred2D, pred3D, curValue, tmp; + int diff = 0.0; + double itvNum = 0; + int8_t *P0, *P1; + + size_t dataLength = r1*r2*r3; + + size_t r23 = r2*r3; + P0 = (int8_t*)malloc(r23*sizeof(int8_t)); + P1 = (int8_t*)malloc(r23*sizeof(int8_t)); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + int8_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + type[0] = 0; + P1[0] = spaceFillingValue[0]; + compressInt8Value(spaceFillingValue[0], minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + diff = spaceFillingValue[1] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp data r3-1 */ + for (j = 2; j < r3; j++) + { + pred1D = 2*P1[j-1] - P1[j-2]; + diff = spaceFillingValue[j] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp Row-r2-1 */ + size_t index; + for (i = 1; i < r2; i++) + { + /* Process row-i data 0 */ + index = i*r3; + pred1D = P1[index-r3]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp data r3-1*/ + for (j = 1; j < r3; j++) + { + index = i*r3+j; + pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp layer-r1-1 /////////////////////////// + + for (k = 1; k < r1; k++) + { + /* Process Row-0 data 0*/ + index = k*r23; + pred1D = P1[0]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp data r3-1 */ + for (j = 1; j < r3; j++) + { + //index = k*r2*r3+j; + index ++; + pred2D = P0[j-1] + P1[j] - P1[j-1]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp Row-r2-1 */ + size_t index2D; + for (i = 1; i < r2; i++) + { + /* Process Row-i data 0 */ + index = k*r23 + i*r3; + index2D = i*r3; + pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp data r3-1 */ + for (j = 1; j < r3; j++) + { +// if(k==63&&i==43&&j==27) +// printf("i=%d\n", i); + //index = k*r2*r3 + i*r3 + j; + index ++; + index2D = i*r3 + j; + pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1]; + diff = spaceFillingValue[index] - pred3D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmpsize; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_INT8); + + //free memory + free(type); + free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + + +void SZ_compress_args_int8_NoCkRngeNoGzip_3D(unsigned char** newByteData, int8_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, +int64_t valueRangeSize, int64_t minValue) +{ + TightDataPointStorageI* tdps = SZ_compress_int8_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, minValue); + + convertTDPStoFlatBytes_int(tdps, newByteData, outSize); + + size_t dataLength = r1*r2*r3; + if(*outSize>dataLength*sizeof(int8_t)) + SZ_compress_args_int8_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); + + free_TightDataPointStorageI(tdps); +} + + +TightDataPointStorageI* SZ_compress_int8_4D_MDQ(int8_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_int8_4D(oriData, r1, r2, r3, r4, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i,j,k; + int64_t pred1D, pred2D, pred3D, curValue, tmp; + int diff = 0.0; + double itvNum = 0; + int8_t *P0, *P1; + + size_t dataLength = r1*r2*r3*r4; + + size_t r234 = r2*r3*r4; + size_t r34 = r3*r4; + + P0 = (int8_t*)malloc(r34*sizeof(int8_t)); + P1 = (int8_t*)malloc(r34*sizeof(int8_t)); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + int8_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + size_t l; + for (l = 0; l < r1; l++) + { + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + size_t index = l*r234; + size_t index2D = 0; + + type[index] = 0; + curValue = P1[index2D] = spaceFillingValue[index]; + compressInt8Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + + /* Process Row-0 data 1*/ + index = l*r234+1; + index2D = 1; + + pred1D = P1[index2D-1]; + diff = curValue - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp data r4-1 */ + for (j = 2; j < r4; j++) + { + index = l*r234+j; + index2D = j; + + pred1D = 2*P1[index2D-1] - P1[index2D-2]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp Row-r3-1 */ + for (i = 1; i < r3; i++) + { + /* Process row-i data 0 */ + index = l*r234+i*r4; + index2D = i*r4; + + pred1D = P1[index2D-r4]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp data r4-1*/ + for (j = 1; j < r4; j++) + { + index = l*r234+i*r4+j; + index2D = i*r4+j; + + pred2D = P1[index2D-1] + P1[index2D-r4] - P1[index2D-r4-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp layer-r2-1 /////////////////////////// + + for (k = 1; k < r2; k++) + { + /* Process Row-0 data 0*/ + index = l*r234+k*r34; + index2D = 0; + + pred1D = P1[index2D]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp data r4-1 */ + for (j = 1; j < r4; j++) + { + index = l*r234+k*r34+j; + index2D = j; + + pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp Row-r3-1 */ + for (i = 1; i < r3; i++) + { + /* Process Row-i data 0 */ + index = l*r234+k*r34+i*r4; + index2D = i*r4; + + pred2D = P0[index2D-r4] + P1[index2D] - P1[index2D-r4]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp data r4-1 */ + for (j = 1; j < r4; j++) + { + index = l*r234+k*r34+i*r4+j; + index2D = i*r4+j; + + pred3D = P0[index2D-1] + P0[index2D-r4]+ P1[index2D] - P0[index2D-r4-1] - P1[index2D-r4] - P1[index2D-1] + P1[index2D-r4-1]; + diff = spaceFillingValue[index] - pred3D; + + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmpsize; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_INT8); + + //free memory + free(type); + free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +void SZ_compress_args_int8_NoCkRngeNoGzip_4D(unsigned char** newByteData, int8_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, +size_t *outSize, int64_t valueRangeSize, int64_t minValue) +{ + TightDataPointStorageI* tdps = SZ_compress_int8_4D_MDQ(oriData, r1, r2, r3, r4, realPrecision, valueRangeSize, minValue); + + convertTDPStoFlatBytes_int(tdps, newByteData, outSize); + + size_t dataLength = r1*r2*r3*r4; + if(*outSize>dataLength*sizeof(int8_t)) + SZ_compress_args_int8_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); + + free_TightDataPointStorageI(tdps); +} + +void SZ_compress_args_int8_withinRange(unsigned char** newByteData, int8_t *oriData, size_t dataLength, size_t *outSize) +{ + TightDataPointStorageI* tdps = (TightDataPointStorageI*) malloc(sizeof(TightDataPointStorageI)); + tdps->typeArray = NULL; + + tdps->allSameData = 1; + tdps->dataSeriesLength = dataLength; + tdps->exactDataBytes = (unsigned char*)malloc(sizeof(unsigned char)); + tdps->isLossless = 0; + //tdps->exactByteSize = 4; + tdps->exactDataNum = 1; + tdps->exactDataBytes_size = 1; + + int8_t value = oriData[0]; + //intToBytes_bigEndian(tdps->exactDataBytes, value); + memcpy(tdps->exactDataBytes, &value, 1); + + size_t tmpOutSize; + convertTDPStoFlatBytes_int(tdps, newByteData, &tmpOutSize); + + *outSize = tmpOutSize;//3+1+sizeof(int8_t)+SZ_SIZE_TYPE; //8==3+1+4(int8_size) + free_TightDataPointStorageI(tdps); +} + +int SZ_compress_args_int8_wRngeNoGzip(unsigned char** newByteData, int8_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio) +{ + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + int64_t valueRangeSize = 0; + + int8_t minValue = computeRangeSize_int(oriData, SZ_INT8, dataLength, &valueRangeSize); + double realPrecision = getRealPrecision_int(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status); + + if(valueRangeSize <= realPrecision) + { + SZ_compress_args_int8_withinRange(newByteData, oriData, dataLength, outSize); + } + else + { +// SZ_compress_args_int8_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize); + if(r5==0&&r4==0&&r3==0&&r2==0) + { + SZ_compress_args_int8_NoCkRngeNoGzip_1D(newByteData, oriData, r1, realPrecision, outSize, valueRangeSize, minValue); + } + else if(r5==0&&r4==0&&r3==0) + { + SZ_compress_args_int8_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize, valueRangeSize, minValue); + } + else if(r5==0&&r4==0) + { + SZ_compress_args_int8_NoCkRngeNoGzip_3D(newByteData, oriData, r3, r2, r1, realPrecision, outSize, valueRangeSize, minValue); + } + else if(r5==0) + { + SZ_compress_args_int8_NoCkRngeNoGzip_3D(newByteData, oriData, r4*r3, r2, r1, realPrecision, outSize, valueRangeSize, minValue); + } + } + return status; +} + +int SZ_compress_args_int8(unsigned char** newByteData, int8_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio) +{ + confparams_cpr->errorBoundMode = errBoundMode; + + if(errBoundMode>=PW_REL) + { + printf("Error: Current SZ version doesn't support integer data compression with point-wise relative error bound being based on pwrType=AVG\n"); + exit(0); + return SZ_NSCS; + } + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + int64_t valueRangeSize = 0; + + int8_t minValue = (int8_t)computeRangeSize_int(oriData, SZ_INT8, dataLength, &valueRangeSize); + double realPrecision = 0; + + if(confparams_cpr->errorBoundMode==PSNR) + { + confparams_cpr->errorBoundMode = ABS; + realPrecision = confparams_cpr->absErrBound = computeABSErrBoundFromPSNR(confparams_cpr->psnr, (double)confparams_cpr->predThreshold, (double)valueRangeSize); + //printf("realPrecision=%lf\n", realPrecision); + } + else + realPrecision = getRealPrecision_int(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status); + + if(valueRangeSize <= realPrecision) + { + SZ_compress_args_int8_withinRange(newByteData, oriData, dataLength, outSize); + } + else + { + size_t tmpOutSize = 0; + unsigned char* tmpByteData; + if (r2==0) + { + SZ_compress_args_int8_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + if (r3==0) + { + SZ_compress_args_int8_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + if (r4==0) + { + SZ_compress_args_int8_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + if (r5==0) + { + SZ_compress_args_int8_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + { + printf("Error: doesn't support 5 dimensions for now.\n"); + status = SZ_DERR; //dimension error + } + //Call Gzip to do the further compression. + if(confparams_cpr->szMode==SZ_BEST_SPEED) + { + *outSize = tmpOutSize; + *newByteData = tmpByteData; + } + else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) + { + *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); + free(tmpByteData); + } + else + { + printf("Error: Wrong setting of confparams_cpr->szMode in the int8_t compression.\n"); + status = SZ_MERR; //mode error + } + } + + return status; +} diff --git a/deps/SZ/sz/src/sz_interface.F90 b/deps/SZ/sz/src/sz_interface.F90 new file mode 100644 index 0000000000000000000000000000000000000000..4a3f2a04390b4bba2ed7f5b56f76fbe9e2e686f3 --- /dev/null +++ b/deps/SZ/sz/src/sz_interface.F90 @@ -0,0 +1,1207 @@ +! @file sz_interface.F90 +! @author Sheng Di (disheng222@gmail.com or sdi1@anl.gov) +! @date June, 2016 +! @ Mathematics and Computer Science (MCS) +! @ Argonne National Laboratory, Lemont, USA. +! @brief The key Fortran binding file to connect C language and Fortran (Fortran part) + + +MODULE SZ + use :: ISO_C_BINDING + INTERFACE SZ_Compress + MODULE PROCEDURE SZ_Compress_d1_Fortran_REAL_K4 + MODULE PROCEDURE SZ_Compress_d2_Fortran_REAL_K4 + MODULE PROCEDURE SZ_Compress_d3_Fortran_REAL_K4 + MODULE PROCEDURE SZ_Compress_d4_Fortran_REAL_K4 + MODULE PROCEDURE SZ_Compress_d5_Fortran_REAL_K4 + MODULE PROCEDURE SZ_Compress_d1_Fortran_REAL_K4_ARGS + MODULE PROCEDURE SZ_Compress_d2_Fortran_REAL_K4_ARGS + MODULE PROCEDURE SZ_Compress_d3_Fortran_REAL_K4_ARGS + MODULE PROCEDURE SZ_Compress_d4_Fortran_REAL_K4_ARGS + MODULE PROCEDURE SZ_Compress_d5_Fortran_REAL_K4_ARGS + MODULE PROCEDURE SZ_Compress_d1_Fortran_REAL_K8 + MODULE PROCEDURE SZ_Compress_d2_Fortran_REAL_K8 + MODULE PROCEDURE SZ_Compress_d3_Fortran_REAL_K8 + MODULE PROCEDURE SZ_Compress_d4_Fortran_REAL_K8 + MODULE PROCEDURE SZ_Compress_d5_Fortran_REAL_K8 + MODULE PROCEDURE SZ_Compress_d1_Fortran_REAL_K8_ARGS + MODULE PROCEDURE SZ_Compress_d2_Fortran_REAL_K8_ARGS + MODULE PROCEDURE SZ_Compress_d3_Fortran_REAL_K8_ARGS + MODULE PROCEDURE SZ_Compress_d4_Fortran_REAL_K8_ARGS + MODULE PROCEDURE SZ_Compress_d5_Fortran_REAL_K8_ARGS + MODULE PROCEDURE SZ_Compress_d1_Fortran_REAL_K4_Rev + MODULE PROCEDURE SZ_Compress_d2_Fortran_REAL_K4_Rev + MODULE PROCEDURE SZ_Compress_d3_Fortran_REAL_K4_Rev + MODULE PROCEDURE SZ_Compress_d4_Fortran_REAL_K4_Rev + MODULE PROCEDURE SZ_Compress_d5_Fortran_REAL_K4_Rev + MODULE PROCEDURE SZ_Compress_d1_Fortran_REAL_K4_ARGS_Rev + MODULE PROCEDURE SZ_Compress_d2_Fortran_REAL_K4_ARGS_Rev + MODULE PROCEDURE SZ_Compress_d3_Fortran_REAL_K4_ARGS_Rev + MODULE PROCEDURE SZ_Compress_d4_Fortran_REAL_K4_ARGS_Rev + MODULE PROCEDURE SZ_Compress_d5_Fortran_REAL_K4_ARGS_Rev + MODULE PROCEDURE SZ_Compress_d1_Fortran_REAL_K8_Rev + MODULE PROCEDURE SZ_Compress_d2_Fortran_REAL_K8_Rev + MODULE PROCEDURE SZ_Compress_d3_Fortran_REAL_K8_Rev + MODULE PROCEDURE SZ_Compress_d4_Fortran_REAL_K8_Rev + MODULE PROCEDURE SZ_Compress_d5_Fortran_REAL_K8_Rev + MODULE PROCEDURE SZ_Compress_d1_Fortran_REAL_K8_ARGS_Rev + MODULE PROCEDURE SZ_Compress_d2_Fortran_REAL_K8_ARGS_Rev + MODULE PROCEDURE SZ_Compress_d3_Fortran_REAL_K8_ARGS_Rev + MODULE PROCEDURE SZ_Compress_d4_Fortran_REAL_K8_ARGS_Rev + MODULE PROCEDURE SZ_Compress_d5_Fortran_REAL_K8_ARGS_Rev + END INTERFACE SZ_Compress + + INTERFACE SZ_Decompress + MODULE PROCEDURE SZ_Decompress_d1_Fortran_REAL_K4 + MODULE PROCEDURE SZ_Decompress_d2_Fortran_REAL_K4 + MODULE PROCEDURE SZ_Decompress_d3_Fortran_REAL_K4 + MODULE PROCEDURE SZ_Decompress_d4_Fortran_REAL_K4 + MODULE PROCEDURE SZ_Decompress_d5_Fortran_REAL_K4 + MODULE PROCEDURE SZ_Decompress_d1_Fortran_REAL_K8 + MODULE PROCEDURE SZ_Decompress_d2_Fortran_REAL_K8 + MODULE PROCEDURE SZ_Decompress_d3_Fortran_REAL_K8 + MODULE PROCEDURE SZ_Decompress_d4_Fortran_REAL_K8 + MODULE PROCEDURE SZ_Decompress_d5_Fortran_REAL_K8 + END INTERFACE SZ_Decompress + + INTERFACE SZ_BatchAddVar + MODULE PROCEDURE SZ_BatchAddVar_d1_Fortran_REAL_K4 + MODULE PROCEDURE SZ_BatchAddVar_d2_Fortran_REAL_K4 + MODULE PROCEDURE SZ_BatchAddVar_d3_Fortran_REAL_K4 + MODULE PROCEDURE SZ_BatchAddVar_d4_Fortran_REAL_K4 + MODULE PROCEDURE SZ_BatchAddVar_d5_Fortran_REAL_K4 + MODULE PROCEDURE SZ_BatchAddVar_d1_Fortran_REAL_K8 + MODULE PROCEDURE SZ_BatchAddVar_d2_Fortran_REAL_K8 + MODULE PROCEDURE SZ_BatchAddVar_d3_Fortran_REAL_K8 + MODULE PROCEDURE SZ_BatchAddVar_d4_Fortran_REAL_K8 + MODULE PROCEDURE SZ_BatchAddVar_d5_Fortran_REAL_K8 + END INTERFACE SZ_BatchAddVar + + INTERFACE SZ_GetVarData + MODULE PROCEDURE SZ_GetVarData_d1_Fortran_REAL_K4 + MODULE PROCEDURE SZ_GetVarData_d2_Fortran_REAL_K4 + MODULE PROCEDURE SZ_GetVarData_d3_Fortran_REAL_K4 + MODULE PROCEDURE SZ_GetVarData_d4_Fortran_REAL_K4 + MODULE PROCEDURE SZ_GetVarData_d5_Fortran_REAL_K4 + MODULE PROCEDURE SZ_GetVarData_d1_Fortran_REAL_K8 + MODULE PROCEDURE SZ_GetVarData_d2_Fortran_REAL_K8 + MODULE PROCEDURE SZ_GetVarData_d3_Fortran_REAL_K8 + MODULE PROCEDURE SZ_GetVarData_d4_Fortran_REAL_K8 + MODULE PROCEDURE SZ_GetVarData_d5_Fortran_REAL_K8 + END INTERFACE SZ_GetVarData + + CONTAINS + +!Init and Finalize + + SUBROUTINE SZ_Init(config_File,ierr) + implicit none + CHARACTER(len=32) :: config_File + INTEGER :: l,ierr + CALL SZ_Init_c(config_File,len(trim(config_File)),ierr) + END SUBROUTINE SZ_Init + + SUBROUTINE SZ_Finalize() + CALL SZ_Finalize_c() + END SUBROUTINE SZ_Finalize + + SUBROUTINE SZ_FREE_VARSET(mode) + implicit none + INTEGER :: mode !0,1,2, or 3 + + CALL SZ_Freevarset_c(mode) + END SUBROUTINE SZ_FREE_VARSET + +!batch-mode functions + + SUBROUTINE SZ_BatchDelVar(varName, ierr) + implicit none + CHARACTER(len=*) :: varName + INTEGER :: ierr + CALL SZ_BatchDelVar_c(varName, len(trim(varName)), ierr) + END SUBROUTINE SZ_BatchDelVar + + SUBROUTINE SZ_Batch_Compress(Bytes, OutSize) + implicit none + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + INTEGER(kind=C_SIZE_T) :: OutSize + INTEGER(kind=C_SIZE_T) :: alloSize + + CALL compute_total_batch_size_c(alloSize) + allocate(Bytes(alloSize)) !allocate the largest possible memory + + CALL SZ_Batch_Compress_c(Bytes, OutSize) + + END SUBROUTINE SZ_Batch_Compress + + SUBROUTINE SZ_Batch_Decompress(Bytes, OutSize, ierr) + implicit none + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + INTEGER(kind=C_SIZE_T) :: OutSize + INTEGER :: ierr + + CALL SZ_Batch_Decompress_c(Bytes, OutSize, ierr) + END SUBROUTINE SZ_Batch_Decompress + +!Compress functions that extract the dimension sizes and call C translation interface (single-precision) + + SUBROUTINE SZ_Compress_d1_Fortran_REAL_K4(VAR, Bytes, OutSize) + implicit none + REAL(KIND=4), DIMENSION(:) :: VAR + INTEGER(kind=C_SIZE_T) :: OutSize, R1 + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + R1 = SIZE(VAR,1) + + allocate(Bytes(8*R1)) !allocate the largest possible memory + + CALL SZ_Compress_d1_Float(VAR, Bytes, OutSize, R1) + + END SUBROUTINE SZ_Compress_d1_Fortran_REAL_K4 + + SUBROUTINE SZ_Compress_d1_Fortran_REAL_K4_ARGS(VAR, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + REAL(KIND=4), DIMENSION(:) :: VAR + INTEGER(kind=C_SIZE_T) :: OutSize, R1 + INTEGER(kind=4) :: ErrBoundMode + REAL(kind=4) :: AbsErrBound, RelBoundRatio + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + R1 = SIZE(VAR,1) + + allocate(Bytes(8*R1)) !allocate the largest possible memory + + CALL SZ_Compress_d1_Float_Args(VAR, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio, R1) + + END SUBROUTINE SZ_Compress_d1_Fortran_REAL_K4_ARGS + + SUBROUTINE SZ_Compress_d2_Fortran_REAL_K4(VAR, Bytes, OutSize) + implicit none + REAL(KIND=4), DIMENSION(:,:) :: VAR + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + + R = R1*R2 + + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d2_Float(VAR, Bytes, OutSize, R1, R2) + + END SUBROUTINE SZ_Compress_d2_Fortran_REAL_K4 + + SUBROUTINE SZ_Compress_d2_Fortran_REAL_K4_ARGS(VAR, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + REAL(KIND=4), DIMENSION(:,:) :: VAR + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R + REAL(kind=4) :: AbsErrBound, RelBoundRatio + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + + R = R1*R2 + + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d2_Float_Args(VAR, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio, R1, R2) + END SUBROUTINE SZ_Compress_d2_Fortran_REAL_K4_ARGS + + SUBROUTINE SZ_Compress_d3_Fortran_REAL_K4(VAR, Bytes, OutSize) + implicit none + REAL(KIND=4), DIMENSION(:,:,:) :: VAR + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R3, R + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R = R1*R2*R3 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d3_Float(VAR, Bytes, OutSize, R1, R2, R3) + + END SUBROUTINE SZ_Compress_d3_Fortran_REAL_K4 + + SUBROUTINE SZ_Compress_d3_Fortran_REAL_K4_ARGS(VAR, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + REAL(KIND=4), DIMENSION(:,:,:) :: VAR + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R3, R + REAL(kind=4) :: AbsErrBound, RelBoundRatio + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R = R1*R2*R3 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d3_Float_Args(VAR, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio, R1, R2, R3) + + END SUBROUTINE SZ_Compress_d3_Fortran_REAL_K4_ARGS + + SUBROUTINE SZ_Compress_d4_Fortran_REAL_K4(VAR, Bytes, OutSize) + implicit none + REAL(KIND=4), DIMENSION(:,:,:,:) :: VAR + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R3, R4, R + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R4 = SIZE(VAR,4) + + R = R1*R2*R3*R4 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d4_Float(VAR, Bytes, OutSize, R1, R2, R3, R4) + + END SUBROUTINE SZ_Compress_d4_Fortran_REAL_K4 + + SUBROUTINE SZ_Compress_d4_Fortran_REAL_K4_ARGS(VAR, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + REAL(KIND=4), DIMENSION(:,:,:,:) :: VAR + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R3, R4, R + REAL(kind=4) :: AbsErrBound, RelBoundRatio + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R4 = SIZE(VAR,4) + R = R1*R2*R3*R4 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d4_Float_Args(VAR, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio, R1, R2, R3, R4) + + END SUBROUTINE SZ_Compress_d4_Fortran_REAL_K4_ARGS + + SUBROUTINE SZ_Compress_d5_Fortran_REAL_K4(VAR, Bytes, OutSize) + implicit none + REAL(KIND=4), DIMENSION(:,:,:,:,:) :: VAR + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R3, R4, R5, R + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R4 = SIZE(VAR,4) + R5 = SIZE(VAR,5) + R = R1*R2*R3*R4*R5 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d5_Float(VAR, Bytes, OutSize, R1, R2, R3, R4, R5) + + END SUBROUTINE SZ_Compress_d5_Fortran_REAL_K4 + + SUBROUTINE SZ_Compress_d5_Fortran_REAL_K4_ARGS(VAR, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + REAL(KIND=4), DIMENSION(:,:,:,:,:) :: VAR + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R3, R4, R5, R + REAL(kind=4) :: AbsErrBound, RelBoundRatio + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R4 = SIZE(VAR,4) + R5 = SIZE(VAR,5) + R = R1*R2*R3*R4*R5 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d5_Float_Args(VAR, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio, R1, R2, R3, R4, R5) + + END SUBROUTINE SZ_Compress_d5_Fortran_REAL_K4_ARGS + +!Compress functions that extract the dimension sizes and call C translation interfaces (double-precision) + + SUBROUTINE SZ_Compress_d1_Fortran_REAL_K8(VAR, Bytes, OutSize) + implicit none + REAL(KIND=8), DIMENSION(:) :: VAR + INTEGER(kind=C_SIZE_T) :: OutSize, R1 + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + R1 = SIZE(VAR,1) + + allocate(Bytes(8*R1)) + + CALL SZ_Compress_d1_Double(VAR, Bytes, OutSize, R1) + END SUBROUTINE SZ_Compress_d1_Fortran_REAL_K8 + + SUBROUTINE SZ_Compress_d1_Fortran_REAL_K8_ARGS(VAR, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + REAL(KIND=8), DIMENSION(:) :: VAR + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: OutSize, R1 + REAL(kind=8) :: AbsErrBound, RelBoundRatio + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + R1 = SIZE(VAR,1) + + allocate(Bytes(8*R1)) + CALL SZ_Compress_d1_Double_Args(VAR, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio, R1) + END SUBROUTINE SZ_Compress_d1_Fortran_REAL_K8_ARGS + + SUBROUTINE SZ_Compress_d2_Fortran_REAL_K8(VAR, Bytes, OutSize) + implicit none + REAL(KIND=8), DIMENSION(:,:) :: VAR + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + + R = R1*R2 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d2_Double(VAR, Bytes, OutSize, R1, R2) + END SUBROUTINE SZ_Compress_d2_Fortran_REAL_K8 + + SUBROUTINE SZ_Compress_d2_Fortran_REAL_K8_ARGS(VAR, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + REAL(KIND=8), DIMENSION(:,:) :: VAR + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R + REAL(kind=8) :: AbsErrBound, RelBoundRatio + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + + R = R1*R2 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d2_Double_Args(VAR, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio, R1, R2) + END SUBROUTINE SZ_Compress_d2_Fortran_REAL_K8_ARGS + + SUBROUTINE SZ_Compress_d3_Fortran_REAL_K8(VAR, Bytes, OutSize) + implicit none + REAL(KIND=8), DIMENSION(:,:,:) :: VAR + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R3, R + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R = R1*R2*R3 + + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d3_Double(VAR, Bytes, OutSize, R1, R2, R3) + + END SUBROUTINE SZ_Compress_d3_Fortran_REAL_K8 + + SUBROUTINE SZ_Compress_d3_Fortran_REAL_K8_ARGS(VAR, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + REAL(KIND=8), DIMENSION(:,:,:) :: VAR + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R3, R + REAL(kind=8) :: AbsErrBound, RelBoundRatio + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R = R1*R2*R3 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d3_Double_Args(VAR, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio, R1, R2, R3) + + END SUBROUTINE SZ_Compress_d3_Fortran_REAL_K8_ARGS + + SUBROUTINE SZ_Compress_d4_Fortran_REAL_K8(VAR, Bytes, OutSize) + implicit none + REAL(KIND=8), DIMENSION(:,:,:,:) :: VAR + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R3, R4, R + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R4 = SIZE(VAR,4) + + R = R1*R2*R3*R4 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d4_Double(VAR, Bytes, OutSize, R1, R2, R3, R4) + + END SUBROUTINE SZ_Compress_d4_Fortran_REAL_K8 + + SUBROUTINE SZ_Compress_d4_Fortran_REAL_K8_ARGS(VAR, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + REAL(KIND=8), DIMENSION(:,:,:,:) :: VAR + INTEGER(kind=C_SIZE_T) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R3, R4, R + REAL(kind=8) :: AbsErrBound, RelBoundRatio + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R4 = SIZE(VAR,4) + R = R1*R2*R3*R4 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d4_Double_Args(VAR, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio, R1, R2, R3, R4) + + END SUBROUTINE SZ_Compress_d4_Fortran_REAL_K8_ARGS + + SUBROUTINE SZ_Compress_d5_Fortran_REAL_K8(VAR, Bytes, OutSize) + implicit none + REAL(KIND=8), DIMENSION(:,:,:,:,:) :: VAR + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R3, R4, R5, R + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R4 = SIZE(VAR,4) + R5 = SIZE(VAR,5) + R = R1*R2*R3*R4*R5 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d5_Double(VAR, Bytes, OutSize, R1, R2, R3, R4, R5) + + END SUBROUTINE SZ_Compress_d5_Fortran_REAL_K8 + + SUBROUTINE SZ_Compress_d5_Fortran_REAL_K8_ARGS(VAR, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + REAL(KIND=8), DIMENSION(:,:,:,:,:) :: VAR + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R3, R4, R5, R + REAL(kind=8) :: AbsErrBound, RelBoundRatio + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R4 = SIZE(VAR,4) + R5 = SIZE(VAR,5) + R = R1*R2*R3*R4*R5 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d5_Double_Args(VAR, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio, R1, R2, R3, R4, R5) + + END SUBROUTINE SZ_Compress_d5_Fortran_REAL_K8_ARGS + +!Comrpession functions with reserved value + + SUBROUTINE SZ_Compress_d1_Fortran_REAL_K4_Rev(VAR, ReValue, Bytes, OutSize) + implicit none + REAL(KIND=4), DIMENSION(:) :: VAR + REAL(KIND=4) :: ReValue + INTEGER(kind=C_SIZE_T) :: OutSize, R1 + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + R1 = SIZE(VAR,1) + + allocate(Bytes(8*R1)) + + CALL SZ_Compress_d1_Float_Rev(VAR, ReValue, Bytes, OutSize, R1) + END SUBROUTINE SZ_Compress_d1_Fortran_REAL_K4_Rev + + SUBROUTINE SZ_Compress_d1_Fortran_REAL_K4_ARGS_Rev(VAR, ReValue, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + REAL(KIND=4), DIMENSION(:) :: VAR + REAL(KIND=4) :: ReValue + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: OutSize, R1 + REAL(kind=4) :: AbsErrBound, RelBoundRatio + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + R1 = SIZE(VAR,1) + + allocate(Bytes(8*R1)) + + CALL SZ_Compress_d1_Float_Rev_Args(VAR, ReValue, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio, R1) + END SUBROUTINE SZ_Compress_d1_Fortran_REAL_K4_ARGS_Rev + + SUBROUTINE SZ_Compress_d2_Fortran_REAL_K4_Rev(VAR, ReValue, Bytes, OutSize) + implicit none + REAL(KIND=4), DIMENSION(:,:) :: VAR + REAL(KIND=4) :: ReValue + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + + R = R1*R2 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d2_Float_Rev(VAR, ReValue, Bytes, OutSize, R1, R2) + END SUBROUTINE SZ_Compress_d2_Fortran_REAL_K4_Rev + + SUBROUTINE SZ_Compress_d2_Fortran_REAL_K4_ARGS_Rev(VAR, ReValue, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + REAL(KIND=4), DIMENSION(:,:) :: VAR + REAL(KIND=4) :: ReValue + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R + REAL(kind=4) :: AbsErrBound, RelBoundRatio + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + + R = R1*R2 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d2_Float_Rev_Args(VAR, ReValue, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio, R1, R2) + END SUBROUTINE SZ_Compress_d2_Fortran_REAL_K4_ARGS_Rev + + SUBROUTINE SZ_Compress_d3_Fortran_REAL_K4_Rev(VAR, ReValue, Bytes, OutSize) + implicit none + REAL(KIND=4), DIMENSION(:,:,:) :: VAR + REAL(KIND=4) :: ReValue + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R3, R + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + INTEGER(kind=1), DIMENSION(:), allocatable :: temp + + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R = R1*R2*R3 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d3_Float_Rev(VAR, ReValue, Bytes, OutSize, R1, R2, R3) + + END SUBROUTINE SZ_Compress_d3_Fortran_REAL_K4_Rev + + SUBROUTINE SZ_Compress_d3_Fortran_REAL_K4_ARGS_Rev(VAR, ReValue, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + REAL(KIND=4), DIMENSION(:,:,:) :: VAR + REAL(KIND=4) :: ReValue + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R3, R + REAL(kind=4) :: AbsErrBound, RelBoundRatio + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R = R1*R2*R3 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d3_Float_Rev_Args(VAR, ReValue, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio, R1, R2, R3) + + END SUBROUTINE SZ_Compress_d3_Fortran_REAL_K4_ARGS_Rev + + SUBROUTINE SZ_Compress_d4_Fortran_REAL_K4_Rev(VAR, ReValue, Bytes, OutSize) + implicit none + REAL(KIND=4), DIMENSION(:,:,:,:) :: VAR + REAL(KIND=4) :: ReValue + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R3, R4, R + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R4 = SIZE(VAR,4) + + R = R1*R2*R3*R4 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d4_Float_Rev(VAR, ReValue, Bytes, OutSize, R1, R2, R3, R4) + + END SUBROUTINE SZ_Compress_d4_Fortran_REAL_K4_Rev + + SUBROUTINE SZ_Compress_d4_Fortran_REAL_K4_ARGS_Rev(VAR, ReValue, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + REAL(KIND=4), DIMENSION(:,:,:,:) :: VAR + REAL(KIND=4) :: ReValue + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R3, R4, R + REAL(kind=4) :: AbsErrBound, RelBoundRatio + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R4 = SIZE(VAR,4) + R = R1*R2*R3*R4 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d4_Float_Rev_Args(VAR, ReValue, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio, R1, R2, R3, R4) + + END SUBROUTINE SZ_Compress_d4_Fortran_REAL_K4_ARGS_Rev + + SUBROUTINE SZ_Compress_d5_Fortran_REAL_K4_Rev(VAR, ReValue, Bytes, OutSize) + implicit none + REAL(KIND=4), DIMENSION(:,:,:,:,:) :: VAR + REAL(KIND=4) :: ReValue + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R3, R4, R5, R + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R4 = SIZE(VAR,4) + R5 = SIZE(VAR,5) + R = R1*R2*R3*R4*R5 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d5_Float_Rev(VAR, ReValue, Bytes, OutSize, R1, R2, R3, R4, R5) + + END SUBROUTINE SZ_Compress_d5_Fortran_REAL_K4_Rev + + SUBROUTINE SZ_Compress_d5_Fortran_REAL_K4_ARGS_Rev(VAR, ReValue, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + REAL(KIND=4), DIMENSION(:,:,:,:,:) :: VAR + REAL(KIND=4) :: ReValue + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R3, R4, R5, R + REAL(kind=4) :: AbsErrBound, RelBoundRatio + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R4 = SIZE(VAR,4) + R5 = SIZE(VAR,5) + R = R1*R2*R3*R4*R5 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d5_Float_Rev_Args(VAR, ReValue, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio, R1, R2, R3, R4, R5) + + END SUBROUTINE SZ_Compress_d5_Fortran_REAL_K4_ARGS_Rev + +!Compress functions that extract the dimension sizes and call C translation interfaces (double-precision) + + SUBROUTINE SZ_Compress_d1_Fortran_REAL_K8_Rev(VAR, ReValue, Bytes, OutSize) + implicit none + REAL(KIND=8), DIMENSION(:) :: VAR + REAL(KIND=8) :: ReValue + INTEGER(kind=C_SIZE_T) :: OutSize, R1 + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + R1 = SIZE(VAR,1) + + allocate(Bytes(8*R1)) + + CALL SZ_Compress_d1_Double_Rev(VAR, ReValue, Bytes, OutSize, R1) + END SUBROUTINE SZ_Compress_d1_Fortran_REAL_K8_Rev + + SUBROUTINE SZ_Compress_d1_Fortran_REAL_K8_ARGS_Rev(VAR, ReValue, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + REAL(KIND=8), DIMENSION(:) :: VAR + REAL(KIND=8) :: ReValue + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: OutSize, R1 + REAL(kind=8) :: AbsErrBound, RelBoundRatio + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + R1 = SIZE(VAR,1) + + allocate(Bytes(8*R1)) + + CALL SZ_Compress_d1_Double_Rev_Args(VAR, ReValue, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio, R1) + END SUBROUTINE SZ_Compress_d1_Fortran_REAL_K8_ARGS_Rev + + SUBROUTINE SZ_Compress_d2_Fortran_REAL_K8_Rev(VAR, ReValue, Bytes, OutSize) + implicit none + REAL(KIND=8), DIMENSION(:,:) :: VAR + REAL(KIND=8) :: ReValue + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + + R = R1*R2 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d2_Double_Rev(VAR, ReValue, Bytes, OutSize, R1, R2) + END SUBROUTINE SZ_Compress_d2_Fortran_REAL_K8_Rev + + SUBROUTINE SZ_Compress_d2_Fortran_REAL_K8_ARGS_Rev(VAR, ReValue, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + REAL(KIND=8), DIMENSION(:,:) :: VAR + REAL(KIND=8) :: ReValue + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R + REAL(kind=8) :: AbsErrBound, RelBoundRatio + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R = R1*R2 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d2_Double_Rev_Args(VAR, ReValue, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio, R1, R2) + + END SUBROUTINE SZ_Compress_d2_Fortran_REAL_K8_ARGS_Rev + + SUBROUTINE SZ_Compress_d3_Fortran_REAL_K8_Rev(VAR, ReValue, Bytes, OutSize) + implicit none + REAL(KIND=8), DIMENSION(:,:,:) :: VAR + REAL(KIND=8) :: ReValue + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R3, R + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R = R1*R2*R3 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d3_Double_Rev(VAR, ReValue, Bytes, OutSize, R1, R2, R3) + + END SUBROUTINE SZ_Compress_d3_Fortran_REAL_K8_Rev + + SUBROUTINE SZ_Compress_d3_Fortran_REAL_K8_ARGS_Rev(VAR, ReValue, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + REAL(KIND=8), DIMENSION(:,:,:) :: VAR + REAL(KIND=8) :: ReValue + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R3, R + REAL(kind=8) :: AbsErrBound, RelBoundRatio + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R = R1*R2*R3 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d3_Double_Rev_Args(VAR, ReValue, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio, R1, R2, R3) + + END SUBROUTINE SZ_Compress_d3_Fortran_REAL_K8_ARGS_Rev + + SUBROUTINE SZ_Compress_d4_Fortran_REAL_K8_Rev(VAR, ReValue, Bytes, OutSize) + implicit none + REAL(KIND=8), DIMENSION(:,:,:,:) :: VAR + REAL(KIND=8) :: ReValue + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R3, R4, R + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R4 = SIZE(VAR,4) + + R = R1*R2*R3*R4 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d4_Double_Rev(VAR, ReValue, Bytes, OutSize, R1, R2, R3, R4) + + END SUBROUTINE SZ_Compress_d4_Fortran_REAL_K8_Rev + + SUBROUTINE SZ_Compress_d4_Fortran_REAL_K8_ARGS_Rev(VAR, ReValue, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + REAL(KIND=8), DIMENSION(:,:,:,:) :: VAR + REAL(KIND=8) :: ReValue + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R3, R4, R + REAL(kind=8) :: AbsErrBound, RelBoundRatio + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R4 = SIZE(VAR,4) + R = R1*R2*R3*R4 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d4_Double_Rev_Args(VAR, ReValue, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio, R1, R2, R3, R4) + + END SUBROUTINE SZ_Compress_d4_Fortran_REAL_K8_ARGS_Rev + + SUBROUTINE SZ_Compress_d5_Fortran_REAL_K8_Rev(VAR, ReValue, Bytes, OutSize) + implicit none + REAL(KIND=8), DIMENSION(:,:,:,:,:) :: VAR + REAL(KIND=8) :: ReValue + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R3, R4, R5, R + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R4 = SIZE(VAR,4) + R5 = SIZE(VAR,5) + R = R1*R2*R3*R4*R5 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d5_Double_Rev(VAR, ReValue, Bytes, OutSize, R1, R2, R3, R4, R5) + + END SUBROUTINE SZ_Compress_d5_Fortran_REAL_K8_Rev + + SUBROUTINE SZ_Compress_d5_Fortran_REAL_K8_ARGS_Rev(VAR, ReValue, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + REAL(KIND=8), DIMENSION(:,:,:,:,:) :: VAR + REAL(KIND=8) :: ReValue + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: OutSize, R1, R2, R3, R4, R5, R + REAL(kind=8) :: AbsErrBound, RelBoundRatio + INTEGER(kind=1), DIMENSION(:), allocatable :: Bytes + + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R4 = SIZE(VAR,4) + R5 = SIZE(VAR,5) + R = R1*R2*R3*R4*R5 + allocate(Bytes(8*R)) !allocate the largest possible memory + + CALL SZ_Compress_d5_Double_Rev_Args(VAR, ReValue, Bytes, OutSize, ErrBoundMode, AbsErrBound, RelBoundRatio, R1, R2, R3, R4, R5) + + END SUBROUTINE SZ_Compress_d5_Fortran_REAL_K8_ARGS_Rev + +!Decompress functions + + SUBROUTINE SZ_Decompress_d1_Fortran_REAL_K4(Bytes, VAR, R1) + implicit none + INTEGER(kind=1), DIMENSION(:) :: Bytes + REAL(KIND=4), DIMENSION(:), allocatable :: VAR + INTEGER(kind=C_SIZE_T) :: R1, BLength + BLength = SIZE(Bytes) + allocate(VAR(R1)) + + CALL SZ_Decompress_d1_Float(Bytes, BLength, VAR, R1) + END SUBROUTINE SZ_Decompress_d1_Fortran_REAL_K4 + + SUBROUTINE SZ_Decompress_d2_Fortran_REAL_K4(Bytes, VAR, R1, R2) + implicit none + INTEGER(kind=1), DIMENSION(:) :: Bytes + REAL(KIND=4), DIMENSION(:,:), allocatable :: VAR + INTEGER(kind=C_SIZE_T) :: R1, R2, BLength + BLength = SIZE(Bytes) + allocate(VAR(R1,R2)) + + CALL SZ_Decompress_d2_Float(Bytes, BLength, VAR, R1, R2) + END SUBROUTINE SZ_Decompress_d2_Fortran_REAL_K4 + + SUBROUTINE SZ_Decompress_d3_Fortran_REAL_K4(Bytes, VAR, R1, R2, R3) + implicit none + INTEGER(kind=1), DIMENSION(:) :: Bytes + REAL(KIND=4), DIMENSION(:,:,:), allocatable :: VAR + INTEGER(kind=C_SIZE_T) :: R1, R2, R3, BLength + BLength = SIZE(Bytes) + allocate(VAR(R1,R2,R3)) + + CALL SZ_Decompress_d3_Float(Bytes, BLength, VAR, R1, R2, R3) + END SUBROUTINE SZ_Decompress_d3_Fortran_REAL_K4 + + SUBROUTINE SZ_Decompress_d4_Fortran_REAL_K4(Bytes, VAR, R1, R2, R3, R4) + implicit none + INTEGER(kind=1), DIMENSION(:) :: Bytes + REAL(KIND=4), DIMENSION(:,:,:,:), allocatable :: VAR + INTEGER(kind=C_SIZE_T) :: R1, R2, R3, R4, BLength + BLength = SIZE(Bytes) + allocate(VAR(R1,R2,R3,R4)) + + CALL SZ_Decompress_d4_Float(Bytes, BLength, VAR, R1, R2, R3, R4) + END SUBROUTINE SZ_Decompress_d4_Fortran_REAL_K4 + + SUBROUTINE SZ_Decompress_d5_Fortran_REAL_K4(Bytes, VAR, R1, R2, R3, R4, R5) + implicit none + INTEGER(kind=1), DIMENSION(:) :: Bytes + REAL(KIND=4), DIMENSION(:,:,:,:,:), allocatable :: VAR + INTEGER(kind=C_SIZE_T) :: R1, R2, R3, R4, R5, BLength + BLength = SIZE(Bytes) + allocate(VAR(R1,R2,R3,R4,R5)) + + CALL SZ_Decompress_d5_Float(Bytes, BLength, VAR, R1, R2, R3, R4, R5) + END SUBROUTINE SZ_Decompress_d5_Fortran_REAL_K4 + + SUBROUTINE SZ_Decompress_d1_Fortran_REAL_K8(Bytes, VAR, R1) + implicit none + INTEGER(kind=1), DIMENSION(:) :: Bytes + REAL(KIND=8), DIMENSION(:), allocatable :: VAR + INTEGER(kind=C_SIZE_T) :: R1, BLength + BLength = SIZE(Bytes) + allocate(VAR(R1)) + + CALL SZ_Decompress_d1_Double(Bytes, BLength, VAR, R1) + END SUBROUTINE SZ_Decompress_d1_Fortran_REAL_K8 + + SUBROUTINE SZ_Decompress_d2_Fortran_REAL_K8(Bytes, VAR, R1, R2) + implicit none + INTEGER(kind=1), DIMENSION(:) :: Bytes + REAL(KIND=8), DIMENSION(:,:), allocatable :: VAR + INTEGER(kind=C_SIZE_T) :: R1, R2, BLength + BLength = SIZE(Bytes) + allocate(VAR(R1,R2)) + + CALL SZ_Decompress_d2_Double(Bytes, BLength, VAR, R1, R2) + END SUBROUTINE SZ_Decompress_d2_Fortran_REAL_K8 + + SUBROUTINE SZ_Decompress_d3_Fortran_REAL_K8(Bytes, VAR, R1, R2, R3) + implicit none + INTEGER(kind=1), DIMENSION(:) :: Bytes + REAL(KIND=8), DIMENSION(:,:,:), allocatable :: VAR + INTEGER(kind=C_SIZE_T) :: R1, R2, R3, BLength + BLength = SIZE(Bytes) + allocate(VAR(R1,R2,R3)) + + CALL SZ_Decompress_d3_Double(Bytes, BLength, VAR, R1, R2, R3) + END SUBROUTINE SZ_Decompress_d3_Fortran_REAL_K8 + + SUBROUTINE SZ_Decompress_d4_Fortran_REAL_K8(Bytes, VAR, R1, R2, R3, R4) + implicit none + INTEGER(kind=1), DIMENSION(:) :: Bytes + REAL(KIND=8), DIMENSION(:,:,:,:), allocatable :: VAR + INTEGER(kind=C_SIZE_T) :: R1, R2, R3, R4, BLength + BLength = SIZE(Bytes) + allocate(VAR(R1,R2,R3,R4)) + + CALL SZ_Decompress_d4_Double(Bytes, BLength, VAR, R1, R2, R3, R4) + END SUBROUTINE SZ_Decompress_d4_Fortran_REAL_K8 + + SUBROUTINE SZ_Decompress_d5_Fortran_REAL_K8(Bytes, VAR, R1, R2, R3, R4, R5) + implicit none + INTEGER(kind=1), DIMENSION(:) :: Bytes + REAL(KIND=8), DIMENSION(:,:,:,:,:), allocatable :: VAR + INTEGER(kind=C_SIZE_T) :: R1, R2, R3, R4, R5, BLength + BLength = SIZE(Bytes, 1) + allocate(VAR(R1,R2,R3,R4,R5)) + + CALL SZ_Decompress_d5_Double(Bytes, BLength, VAR, R1, R2, R3, R4, R5) + END SUBROUTINE SZ_Decompress_d5_Fortran_REAL_K8 + +!--------batch add float + + SUBROUTINE SZ_BatchAddVar_d1_Fortran_REAL_K4(varID, varName, VAR, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + INTEGER(kind=4) :: varID + CHARACTER(len=*) :: varName + REAL(KIND=4), DIMENSION(:) :: VAR + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: R1 + REAL(kind=4) :: AbsErrBound, RelBoundRatio + R1 = SIZE(VAR,1) + + CALL SZ_batchAddVar_d1_Float(varID, varName, len(trim(varName)), VAR, ErrBoundMode, AbsErrBound, RelBoundRatio, R1) + END SUBROUTINE SZ_BatchAddVar_d1_Fortran_REAL_K4 + + SUBROUTINE SZ_BatchAddVar_d2_Fortran_REAL_K4(varID, varName, VAR, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + INTEGER(kind=4) :: varID + CHARACTER(len=*) :: varName + REAL(KIND=4), DIMENSION(:,:) :: VAR + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: R1, R2 + REAL(kind=4) :: AbsErrBound, RelBoundRatio + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + + CALL SZ_batchAddVar_d2_Float(varID, varName, len(trim(varName)), VAR, ErrBoundMode, AbsErrBound, RelBoundRatio, R1, R2) + END SUBROUTINE SZ_BatchAddVar_d2_Fortran_REAL_K4 + + SUBROUTINE SZ_BatchAddVar_d3_Fortran_REAL_K4(varID, varName, VAR, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + INTEGER(kind=4) :: varID + CHARACTER(len=*) :: varName + REAL(KIND=4), DIMENSION(:,:,:) :: VAR + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: R1, R2, R3 + REAL(kind=4) :: AbsErrBound, RelBoundRatio + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + + CALL SZ_batchAddVar_d3_Float(varID, varName, len(trim(varName)), VAR, ErrBoundMode, AbsErrBound, RelBoundRatio, R1, R2, R3) + END SUBROUTINE SZ_BatchAddVar_d3_Fortran_REAL_K4 + + SUBROUTINE SZ_BatchAddVar_d4_Fortran_REAL_K4(varID, varName, VAR, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + INTEGER(kind=4) :: varID + CHARACTER(len=*) :: varName + REAL(KIND=4), DIMENSION(:,:,:,:) :: VAR + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: R1, R2, R3, R4 + REAL(kind=4) :: AbsErrBound, RelBoundRatio + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R4 = SIZE(VAR,4) + + CALL SZ_batchAddVar_d4_Float(varID, varName, len(trim(varName)), VAR, ErrBoundMode, AbsErrBound, RelBoundRatio, R1, R2, R3, R4) + END SUBROUTINE SZ_BatchAddVar_d4_Fortran_REAL_K4 + + SUBROUTINE SZ_BatchAddVar_d5_Fortran_REAL_K4(varID, varName, VAR, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + INTEGER(kind=4) :: varID + CHARACTER(len=*) :: varName + REAL(KIND=4), DIMENSION(:,:,:,:,:) :: VAR + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: R1, R2, R3, R4, R5 + REAL(kind=4) :: AbsErrBound, RelBoundRatio + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R4 = SIZE(VAR,4) + R5 = SIZE(VAR,5) + + CALL SZ_batchAddVar_d5_Float(varID, varName, len(trim(varName)), VAR, ErrBoundMode, AbsErrBound, RelBoundRatio, R1, R2, & + R3, R4, R5) + END SUBROUTINE SZ_BatchAddVar_d5_Fortran_REAL_K4 + +!------batch add double + SUBROUTINE SZ_BatchAddVar_d1_Fortran_REAL_K8(varID, varName, VAR, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + INTEGER(kind=4) :: varID + CHARACTER(len=*) :: varName + REAL(KIND=8), DIMENSION(:) :: VAR + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: R1 + REAL(kind=8) :: AbsErrBound, RelBoundRatio + R1 = SIZE(VAR,1) + + CALL SZ_batchAddVar_d1_Double(varID, varName, len(trim(varName)), VAR, ErrBoundMode, AbsErrBound, RelBoundRatio, R1) + END SUBROUTINE SZ_BatchAddVar_d1_Fortran_REAL_K8 + + SUBROUTINE SZ_BatchAddVar_d2_Fortran_REAL_K8(varID, varName, VAR, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + INTEGER(kind=4) :: varID + CHARACTER(len=*) :: varName + REAL(KIND=8), DIMENSION(:,:) :: VAR + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: R1, R2 + REAL(kind=8) :: AbsErrBound, RelBoundRatio + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + + CALL SZ_batchAddVar_d2_Double(varID, varName, len(trim(varName)), VAR, ErrBoundMode, AbsErrBound, RelBoundRatio, R1, R2) + END SUBROUTINE SZ_BatchAddVar_d2_Fortran_REAL_K8 + + SUBROUTINE SZ_BatchAddVar_d3_Fortran_REAL_K8(varID, varName, VAR, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + INTEGER(kind=4) :: varID + CHARACTER(len=*) :: varName + REAL(KIND=8), DIMENSION(:,:,:) :: VAR + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: R1, R2, R3 + REAL(kind=8) :: AbsErrBound, RelBoundRatio + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + + CALL SZ_batchAddVar_d3_Double(varID, varName, len(trim(varName)), VAR, ErrBoundMode, AbsErrBound, RelBoundRatio, R1, R2, R3) + END SUBROUTINE SZ_BatchAddVar_d3_Fortran_REAL_K8 + + SUBROUTINE SZ_BatchAddVar_d4_Fortran_REAL_K8(varID, varName, VAR, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + INTEGER(kind=4) :: varID + CHARACTER(len=*) :: varName + REAL(KIND=8), DIMENSION(:,:,:,:) :: VAR + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: R1, R2, R3, R4 + REAL(kind=8) :: AbsErrBound, RelBoundRatio + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R4 = SIZE(VAR,4) + + CALL SZ_batchAddVar_d4_Double(varID, varName, len(trim(varName)), VAR, ErrBoundMode, AbsErrBound, RelBoundRatio, R1, R2, R3, R4) + END SUBROUTINE SZ_BatchAddVar_d4_Fortran_REAL_K8 + + SUBROUTINE SZ_BatchAddVar_d5_Fortran_REAL_K8(varID, varName, VAR, ErrBoundMode, AbsErrBound, RelBoundRatio) + implicit none + INTEGER(kind=4) :: varID + CHARACTER(len=*) :: varName + REAL(KIND=8), DIMENSION(:,:,:,:,:) :: VAR + INTEGER(kind=4) :: ErrBoundMode + INTEGER(kind=C_SIZE_T) :: R1, R2, R3, R4, R5 + REAL(kind=8) :: AbsErrBound, RelBoundRatio + R1 = SIZE(VAR,1) + R2 = SIZE(VAR,2) + R3 = SIZE(VAR,3) + R4 = SIZE(VAR,4) + R5 = SIZE(VAR,5) + + CALL SZ_batchAddVar_d5_Double(varID, varName, len(trim(varName)), VAR, ErrBoundMode, AbsErrBound, RelBoundRatio, & + R1, R2, R3, R4, R5) + END SUBROUTINE SZ_BatchAddVar_d5_Fortran_REAL_K8 + + SUBROUTINE SZ_GetVarDim(varName, DIMEN, R1, R2, R3, R4, R5) + implicit none + CHARACTER(len=*), INTENT(IN) :: varName + INTEGER(kind=4), INTENT(OUT) :: DIMEN + INTEGER(kind=C_SIZE_T), INTENT(OUT) :: R1, R2, R3, R4, R5 + + CALL SZ_GetVarDim_c(varName, len(trim(varName)), DIMEN, R1, R2, R3, R4, R5) + + END SUBROUTINE SZ_GetVarDim + + SUBROUTINE SZ_GetVarData_d1_Fortran_REAL_K4(varName, VAR) + implicit none + CHARACTER(len=*), INTENT(IN) :: varName + REAL(KIND=4), DIMENSION(:), allocatable :: VAR + + CALL SZ_getVarData_Float(varName, len(trim(varName)), VAR) + + END SUBROUTINE SZ_GetVarData_d1_Fortran_REAL_K4 + + SUBROUTINE SZ_GetVarData_d2_Fortran_REAL_K4(varName, VAR) + implicit none + CHARACTER(len=*), INTENT(IN) :: varName + REAL(KIND=4), DIMENSION(:,:), allocatable :: VAR + + CALL SZ_getVarData_Float(varName, len(trim(varName)), VAR) + + END SUBROUTINE SZ_GetVarData_d2_Fortran_REAL_K4 + + SUBROUTINE SZ_GetVarData_d3_Fortran_REAL_K4(varName, VAR) + implicit none + CHARACTER(len=*), INTENT(IN) :: varName + REAL(KIND=4), DIMENSION(:,:,:), allocatable :: VAR + + CALL SZ_getVarData_Float(varName, len(trim(varName)), VAR) + + END SUBROUTINE SZ_GetVarData_d3_Fortran_REAL_K4 + + SUBROUTINE SZ_GetVarData_d4_Fortran_REAL_K4(varName, VAR) + implicit none + CHARACTER(len=*), INTENT(IN) :: varName + REAL(KIND=4), DIMENSION(:,:,:,:), allocatable :: VAR + + CALL SZ_getVarData_Float(varName, len(trim(varName)), VAR) + + END SUBROUTINE SZ_GetVarData_d4_Fortran_REAL_K4 + + SUBROUTINE SZ_GetVarData_d5_Fortran_REAL_K4(varName, VAR) + implicit none + CHARACTER(len=*), INTENT(IN) :: varName + REAL(KIND=4), DIMENSION(:,:,:,:,:), allocatable :: VAR + + CALL SZ_getVarData_Float(varName, len(trim(varName)), VAR) + + END SUBROUTINE SZ_GetVarData_d5_Fortran_REAL_K4 + + SUBROUTINE SZ_GetVarData_d1_Fortran_REAL_K8(varName, VAR) + implicit none + CHARACTER(len=*), INTENT(IN) :: varName + REAL(KIND=8), DIMENSION(:), allocatable :: VAR + + CALL SZ_getVarData_Double(varName, len(trim(varName)), VAR) + + END SUBROUTINE SZ_GetVarData_d1_Fortran_REAL_K8 + + SUBROUTINE SZ_GetVarData_d2_Fortran_REAL_K8(varName, VAR) + implicit none + CHARACTER(len=*), INTENT(IN) :: varName + REAL(KIND=8), DIMENSION(:,:), allocatable :: VAR + + CALL SZ_getVarData_Double(varName, len(trim(varName)), VAR) + + END SUBROUTINE SZ_GetVarData_d2_Fortran_REAL_K8 + + SUBROUTINE SZ_GetVarData_d3_Fortran_REAL_K8(varName, VAR) + implicit none + CHARACTER(len=*), INTENT(IN) :: varName + REAL(KIND=8), DIMENSION(:,:,:), allocatable :: VAR + + CALL SZ_getVarData_Double(varName, len(trim(varName)), VAR) + + END SUBROUTINE SZ_GetVarData_d3_Fortran_REAL_K8 + + SUBROUTINE SZ_GetVarData_d4_Fortran_REAL_K8(varName, VAR) + implicit none + CHARACTER(len=*), INTENT(IN) :: varName + REAL(KIND=8), DIMENSION(:,:,:,:), allocatable :: VAR + + CALL SZ_getVarData_Double(varName, len(trim(varName)), VAR) + + END SUBROUTINE SZ_GetVarData_d4_Fortran_REAL_K8 + + SUBROUTINE SZ_GetVarData_d5_Fortran_REAL_K8(varName, VAR) + implicit none + CHARACTER(len=*), INTENT(IN) :: varName + REAL(KIND=8), DIMENSION(:,:,:,:,:), allocatable :: VAR + + CALL SZ_getVarData_Double(varName, len(trim(varName)), VAR) + + END SUBROUTINE SZ_GetVarData_d5_Fortran_REAL_K8 + +END MODULE SZ diff --git a/deps/SZ/sz/src/sz_omp.c b/deps/SZ/sz/src/sz_omp.c new file mode 100644 index 0000000000000000000000000000000000000000..362b6bf6955103d6f422012982ebd3b5f1512800 --- /dev/null +++ b/deps/SZ/sz/src/sz_omp.c @@ -0,0 +1,986 @@ +/** + * @file sz_omp.c + * @author Xin Liang + * @date July, 2017 + * @brief the implementation of openMP version + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include "sz_omp.h" +#include +#include + +double sz_wtime(){ +#ifdef _OPENMP + return omp_get_wtime(); +#else + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + + return (double)ts.tv_sec + (double)ts.tv_nsec / 1000000000.0; +#endif +} + +int sz_get_max_threads(){ +#ifdef _OPENMP + return omp_get_max_threads(); +#else + return 1; +#endif +} + +int sz_get_thread_num(){ +#ifdef _OPENMP + return omp_get_thread_num(); +#else + return 0; +#endif +} + +void sz_set_num_threads(int nthreads){ +#ifdef _OPENMP + omp_set_num_threads(nthreads); +#endif +} + +unsigned char * SZ_compress_float_1D_MDQ_openmp(float *oriData, size_t r1, double realPrecision, size_t * comp_size){ + return NULL; +} +unsigned char * SZ_compress_float_2D_MDQ_openmp(float *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size){ + return NULL; +} + +unsigned char * SZ_compress_float_3D_MDQ_openmp(float *oriData, size_t r1, size_t r2, size_t r3, float realPrecision, size_t * comp_size){ + + float elapsed_time = 0.0; + + elapsed_time = -sz_wtime(); + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + // quantization_intervals = optimize_intervals_float_3D(oriData, r1, realPrecision); + quantization_intervals = optimize_intervals_float_3D_opt(oriData, r1, r2, r3, realPrecision); + //quantization_intervals = 32768; + printf("3D number of bins: %d\nerror bound %.20f\n", quantization_intervals, realPrecision); + // exit(0); + updateQuantizationInfo(quantization_intervals); + } + else{ + quantization_intervals = exe_params->intvCapacity; + } + elapsed_time += sz_wtime(); + printf("opt interval time: %.4f\n", elapsed_time); + + elapsed_time = -sz_wtime(); + int thread_num = sz_get_max_threads(); + int thread_order = (int)log2(thread_num); + size_t num_x = 0, num_y = 0, num_z = 0; + { + int block_thread_order = thread_order / 3; + switch(thread_order % 3){ + case 0:{ + num_x = 1 << block_thread_order; + num_y = 1 << block_thread_order; + num_z = 1 << block_thread_order; + break; + } + case 1:{ + num_x = 1 << (block_thread_order + 1); + num_y = 1 << block_thread_order; + num_z = 1 << block_thread_order; + break; + } + case 2:{ + num_x = 1 << (block_thread_order + 1); + num_y = 1 << (block_thread_order + 1); + num_z = 1 << block_thread_order; + break; + } + } + thread_num = num_x * num_y * num_z; + } + sz_set_num_threads(thread_num); + // calculate block dims + printf("number of blocks: %zu %zu %zu\n", num_x, num_y, num_z); + + size_t split_index_x, split_index_y, split_index_z; + size_t early_blockcount_x, early_blockcount_y, early_blockcount_z; + size_t late_blockcount_x, late_blockcount_y, late_blockcount_z; + SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); + SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); + SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z); + + size_t max_num_block_elements = early_blockcount_x * early_blockcount_y * early_blockcount_z; + size_t num_blocks = num_x * num_y * num_z; + size_t num_elements = r1 * r2 * r3; + // printf("max_num_block_elements %d num_blocks %d\n", max_num_block_elements, num_blocks); + + size_t dim0_offset = r2 * r3; + size_t dim1_offset = r3; + + // printf("malloc blockinfo array start\n"); + // fflush(stdout); + + size_t buffer_size = early_blockcount_y * early_blockcount_z * sizeof(float); + int * result_type = (int *) malloc(num_elements * sizeof(int)); + size_t unpred_data_max_size = max_num_block_elements; + float * result_unpredictable_data = (float *) malloc(unpred_data_max_size * sizeof(float) * num_blocks); + unsigned int * unpredictable_count = (unsigned int *) malloc(num_blocks * sizeof(unsigned int)); + float * mean = malloc(num_blocks * sizeof(float)); + float * buffer0, * buffer1; + buffer0 = (float *) malloc(buffer_size * thread_num); + buffer1 = (float *) malloc(buffer_size * thread_num); + unsigned char * result = (unsigned char *) malloc(num_elements * (sizeof(int) + sizeof(float))); + size_t * unpred_offset = (size_t *) malloc(num_blocks * sizeof(size_t)); + unsigned char * encoding_buffer = (unsigned char *) malloc(max_num_block_elements * sizeof(int) * num_blocks); + size_t * block_offset = (size_t *) malloc(num_blocks * sizeof(size_t)); + size_t *freq = (size_t *)malloc(thread_num*quantization_intervals*4*sizeof(size_t)); + memset(freq, 0, thread_num*quantization_intervals*4*sizeof(size_t)); + + size_t stateNum = quantization_intervals*2; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + int num_yz = num_y * num_z; + #pragma omp parallel for + for(int t=0; tcode[i]) nodeCount++; + nodeCount = nodeCount*2-1; + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + + unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength; + size_t total_unpred = 0; + for(int i=0; iintvRadius = (int)((tdps->intervals - 1)/ 2); + + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(unsigned int); + size_t huffman_nodes = bytesToInt_bigEndian(comp_data_pos); + huffmanTree->allNodes = huffman_nodes; + // printf("Reconstruct huffman tree with node count %ld\n", nodeCount); + // fflush(stdout); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+4, huffmanTree->allNodes); + + comp_data_pos += 4 + tree_size; + unsigned int * unpred_count = (unsigned int *) comp_data_pos; + comp_data_pos += num_blocks * sizeof(unsigned int); + float * mean_pos = (float *) comp_data_pos; + comp_data_pos += num_blocks * sizeof(float); + float * result_unpredictable_data = (float *) comp_data_pos; + size_t total_unpred = 0; + for(int i=0; ioptQuantMode==1) + { + // quantization_intervals = optimize_intervals_float_3D(oriData, r1, realPrecision); + quantization_intervals = optimize_intervals_double_3D_opt(oriData, r1, r2, r3, realPrecision); + //quantization_intervals = 32768; + printf("3D number of bins: %d\nerror bound %.20f\n", quantization_intervals, realPrecision); + // exit(0); + updateQuantizationInfo(quantization_intervals); + } + else{ + quantization_intervals = exe_params->intvCapacity; + } + elapsed_time += sz_wtime(); + printf("opt interval time: %.4f\n", elapsed_time); + + elapsed_time = -sz_wtime(); + int thread_num = sz_get_max_threads(); + int thread_order = (int)log2(thread_num); + size_t num_x = 0, num_y = 0, num_z = 0; + { + int block_thread_order = thread_order / 3; + switch(thread_order % 3){ + case 0:{ + num_x = 1 << block_thread_order; + num_y = 1 << block_thread_order; + num_z = 1 << block_thread_order; + break; + } + case 1:{ + num_x = 1 << (block_thread_order + 1); + num_y = 1 << block_thread_order; + num_z = 1 << block_thread_order; + break; + } + case 2:{ + num_x = 1 << (block_thread_order + 1); + num_y = 1 << (block_thread_order + 1); + num_z = 1 << block_thread_order; + break; + } + } + thread_num = num_x * num_y * num_z; + } + sz_set_num_threads(thread_num); + // calculate block dims + printf("number of blocks: %zu %zu %zu\n", num_x, num_y, num_z); + + size_t split_index_x, split_index_y, split_index_z; + size_t early_blockcount_x, early_blockcount_y, early_blockcount_z; + size_t late_blockcount_x, late_blockcount_y, late_blockcount_z; + SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); + SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); + SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z); + + size_t max_num_block_elements = early_blockcount_x * early_blockcount_y * early_blockcount_z; + size_t num_blocks = num_x * num_y * num_z; + size_t num_elements = r1 * r2 * r3; + // printf("max_num_block_elements %d num_blocks %d\n", max_num_block_elements, num_blocks); + + size_t dim0_offset = r2 * r3; + size_t dim1_offset = r3; + + // printf("malloc blockinfo array start\n"); + // fflush(stdout); + + size_t buffer_size = early_blockcount_y * early_blockcount_z * sizeof(double); + int * result_type = (int *) malloc(num_elements * sizeof(int)); + size_t unpred_data_max_size = max_num_block_elements; + double * result_unpredictable_data = (double *) malloc(unpred_data_max_size * sizeof(double) * num_blocks); + unsigned int * unpredictable_count = (unsigned int *) malloc(num_blocks * sizeof(unsigned int)); + double * mean = malloc(num_blocks * sizeof(double)); + double * buffer0, * buffer1; + buffer0 = (double *) malloc(buffer_size * thread_num); + buffer1 = (double *) malloc(buffer_size * thread_num); + unsigned char * result = (unsigned char *) malloc(num_elements * (sizeof(int) + sizeof(double))); + size_t * unpred_offset = (size_t *) malloc(num_blocks * sizeof(size_t)); + unsigned char * encoding_buffer = (unsigned char *) malloc(max_num_block_elements * sizeof(int) * num_blocks); + size_t * block_offset = (size_t *) malloc(num_blocks * sizeof(size_t)); + size_t *freq = (size_t *)malloc(thread_num*quantization_intervals*4*sizeof(size_t)); + memset(freq, 0, thread_num*quantization_intervals*4*sizeof(size_t)); + + size_t stateNum = quantization_intervals*2; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + int num_yz = num_y * num_z; + #pragma omp parallel for + for(int t=0; tcode[i]) nodeCount++; + nodeCount = nodeCount*2-1; + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + + unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength; + size_t total_unpred = 0; + for(int i=0; iintvRadius = (int)((tdps->intervals - 1)/ 2); + + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(unsigned int); + size_t huffman_nodes = bytesToInt_bigEndian(comp_data_pos); + huffmanTree->allNodes = huffman_nodes; + // printf("Reconstruct huffman tree with node count %ld\n", nodeCount); + // fflush(stdout); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+4, huffmanTree->allNodes); + + comp_data_pos += 4 + tree_size; + unsigned int * unpred_count = (unsigned int *) comp_data_pos; + comp_data_pos += num_blocks * sizeof(unsigned int); + double * mean_pos = (double *) comp_data_pos; + comp_data_pos += num_blocks * sizeof(double); + double * result_unpredictable_data = (double *) comp_data_pos; + size_t total_unpred = 0; + for(int i=0; iallNodes*sizeof(size_t)); + // memset(freq, 0, thread_num*huffmanTree->allNodes*sizeof(size_t)); + size_t block_size = (length - 1)/ thread_num + 1; + size_t block_residue = length - (thread_num - 1) * block_size; + #pragma omp parallel for + for(int t=0; tallNodes; + if(id < thread_num - 1){ + for(size_t i=0; iallNodes; + for(int t=1; tallNodes; i++){ + freq[i] += freq_pos[i]; + } + freq_pos += huffmanTree->allNodes; + } + + for (i = 0; i < huffmanTree->allNodes; i++) + if (freq[i]) + qinsert(huffmanTree, new_node(huffmanTree, freq[i], i, 0, 0)); + + while (huffmanTree->qend > 2) + qinsert(huffmanTree, new_node(huffmanTree, 0, 0, qremove(huffmanTree), qremove(huffmanTree))); + + build_code(huffmanTree, huffmanTree->qq[1], 0, 0, 0); + // free(freq); +} + + + diff --git a/deps/SZ/sz/src/sz_stats.c b/deps/SZ/sz/src/sz_stats.c new file mode 100644 index 0000000000000000000000000000000000000000..dbd91c04eccf2e67ccabb8097d892c8c8a1c6880 --- /dev/null +++ b/deps/SZ/sz/src/sz_stats.c @@ -0,0 +1,60 @@ +#include + +sz_stats sz_stat; + +void writeBlockInfo(int use_mean, size_t blockSize, size_t regressionBlocks, size_t totalBlocks) +{ + sz_stat.use_mean = use_mean; + sz_stat.blockSize = blockSize; + sz_stat.lorenzoBlocks = totalBlocks - regressionBlocks; + sz_stat.regressionBlocks = regressionBlocks; + sz_stat.totalBlocks = totalBlocks; + sz_stat.lorenzoPercent = 1.0f*sz_stat.lorenzoBlocks/(float)totalBlocks; + sz_stat.regressionPercent = 1.0f*regressionBlocks/(float)totalBlocks; +} + +void writeHuffmanInfo(size_t huffmanTreeSize, size_t huffmanCodingSize, size_t totalDataSize, int huffmanNodeCount) +{ + sz_stat.huffmanTreeSize = huffmanTreeSize; + sz_stat.huffmanCodingSize = huffmanCodingSize; + sz_stat.huffmanCompressionRatio = 1.0f*totalDataSize/(huffmanTreeSize+huffmanCodingSize); + sz_stat.huffmanNodeCount = huffmanNodeCount; +} + +void writeZstdCompressionRatio(float zstdCompressionRatio) +{ + sz_stat.zstdCompressionRatio = zstdCompressionRatio; +} + + +void writeUnpredictDataCounts(size_t unpredictCount, size_t totalNumElements) +{ + sz_stat.unpredictCount = unpredictCount; + sz_stat.unpredictPercent = 1.0f*unpredictCount/totalNumElements; +} + +void printSZStats() +{ + printf("===============stats about sz================\n"); + if(sz_stat.use_mean) + printf("use_mean: YES\n"); + else + printf("use_mean: NO\n"); + + printf("blockSize %zu\n", sz_stat.blockSize); + printf("lorenzoPercent %f\n", sz_stat.lorenzoPercent); + printf("regressionPercent %f\n", sz_stat.regressionPercent); + printf("lorenzoBlocks %zu\n", sz_stat.lorenzoBlocks); + printf("regressionBlocks %zu\n", sz_stat.regressionBlocks); + printf("totalBlocks %zu\n", sz_stat.totalBlocks); + + printf("huffmanTreeSize %zu\n", sz_stat.huffmanTreeSize); + printf("huffmanCodingSize %zu\n", sz_stat.huffmanCodingSize); + printf("huffmanCompressionRatio %f\n", sz_stat.huffmanCompressionRatio); + printf("huffmanNodeCount %d\n", sz_stat.huffmanNodeCount); + + //printf("zstdCompressionRatio %f\n", sz_stat.zstdCompressionRatio); + + printf("unpredictCount %zu\n", sz_stat.unpredictCount); + printf("unpredictPercent %f\n", sz_stat.unpredictPercent); +} diff --git a/deps/SZ/sz/src/sz_uint16.c b/deps/SZ/sz/src/sz_uint16.c new file mode 100644 index 0000000000000000000000000000000000000000..ae557da356f70f274299dd036a0051bba0346cb9 --- /dev/null +++ b/deps/SZ/sz/src/sz_uint16.c @@ -0,0 +1,1385 @@ +/** + * @file sz_uint16.c + * @author Sheng Di + * @date Aug, 2017 + * @brief sz_uint16, Compression and Decompression functions + * (C) 2017 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + + +#include +#include +#include +#include +#include +#include "sz.h" +#include "CompressElement.h" +#include "DynamicByteArray.h" +#include "DynamicIntArray.h" +#include "zlib.h" +#include "rw.h" +#include "TightDataPointStorageI.h" +#include "sz_uint16.h" +#include "utility.h" + +unsigned int optimize_intervals_uint16_1D(uint16_t *oriData, size_t dataLength, double realPrecision) +{ + size_t i = 0, radiusIndex; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance; + for(i=2;isampleDistance==0) + { + //pred_value = 2*oriData[i-1] - oriData[i-2]; + pred_value = oriData[i-1]; + pred_err = llabs(pred_value - oriData[i]); + radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2); + return powerOf2; +} + +unsigned int optimize_intervals_uint16_2D(uint16_t *oriData, size_t r1, size_t r2, double realPrecision) +{ + size_t i,j, index; + size_t radiusIndex; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r2+j; + pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1]; + pred_err = llabs(pred_value - oriData[index]); + radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2); + return powerOf2; +} + +unsigned int optimize_intervals_uint16_3D(uint16_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision) +{ + size_t i,j,k, index; + size_t radiusIndex; + size_t r23=r2*r3; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)*(r3-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r23+j*r3+k; + pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] + - oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1]; + pred_err = llabs(pred_value - oriData[index]); + radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + { + radiusIndex = confparams_cpr->maxRangeRadius - 1; + //printf("radiusIndex=%d\n", radiusIndex); + } + intervals[radiusIndex]++; + } + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2); + return powerOf2; +} + + +unsigned int optimize_intervals_uint16_4D(uint16_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision) +{ + size_t i,j,k,l, index; + size_t radiusIndex; + size_t r234=r2*r3*r4; + size_t r34=r3*r4; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)*(r3-1)*(r4-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r234+j*r34+k*r4+l; + pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r34] + - oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1]; + pred_err = llabs(pred_value - oriData[index]); + radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + return powerOf2; +} + +TightDataPointStorageI* SZ_compress_uint16_1D_MDQ(uint16_t *oriData, size_t dataLength, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + quantization_intervals = optimize_intervals_uint16_1D(oriData, dataLength, realPrecision); + else + quantization_intervals = exe_params->intvCapacity; + updateQuantizationInfo(quantization_intervals); + size_t i; + + int* type = (int*) malloc(dataLength*sizeof(int)); + + uint16_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + int64_t last3CmprsData[3] = {0,0,0}; + + //add the first data + type[0] = 0; + compressUInt16Value(spaceFillingValue[0], minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + listAdd_int(last3CmprsData, spaceFillingValue[0]); + + type[1] = 0; + compressUInt16Value(spaceFillingValue[1], minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + listAdd_int(last3CmprsData, spaceFillingValue[1]); + //printf("%.30G\n",last3CmprsData[0]); + + int state; + double checkRadius = (exe_params->intvCapacity-1)*realPrecision; + int64_t curData; + int64_t pred, predAbsErr; + double interval = 2*realPrecision; + + for(i=2;i=pred) + { + type[i] = exe_params->intvRadius+state; + pred = pred + state*interval; + } + else //curDataintvRadius-state; + pred = pred - state*interval; + } + if(pred>SZ_UINT16_MAX) pred = SZ_UINT16_MAX; + if(predsize / byteSize; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_UINT16); + +//sdi:Debug +/* int sum =0; + for(i=0;iarray has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +void SZ_compress_args_uint16_StoreOriData(uint16_t* oriData, size_t dataLength, TightDataPointStorageI* tdps, +unsigned char** newByteData, size_t *outSize) +{ + int intSize=sizeof(uint16_t); + size_t k = 0, i; + tdps->isLossless = 1; + size_t totalByteLength = 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + intSize*dataLength; + *newByteData = (unsigned char*)malloc(totalByteLength); + + unsigned char dsLengthBytes[8]; + for (i = 0; i < 3; i++)//3 + (*newByteData)[k++] = versionNumber[i]; + + if(exe_params->SZ_SIZE_TYPE==4)//1 + (*newByteData)[k++] = 16; //00010000 + else + (*newByteData)[k++] = 80; //01010000: 01000000 indicates the SZ_SIZE_TYPE=8 + + convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k])); + k = k + MetaDataByteLength; + + sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8 + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + (*newByteData)[k++] = dsLengthBytes[i]; + + if(sysEndianType==BIG_ENDIAN_SYSTEM) + memcpy((*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, oriData, dataLength*intSize); + else + { + unsigned char* p = (*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE; + for(i=0;i dataLength*sizeof(uint16_t)) + SZ_compress_args_uint16_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize); + free_TightDataPointStorageI(tdps); +} + +TightDataPointStorageI* SZ_compress_uint16_2D_MDQ(uint16_t *oriData, size_t r1, size_t r2, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_uint16_2D(oriData, r1, r2, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i,j; + int64_t pred1D, pred2D, curValue, tmp; + int diff = 0.0; + double itvNum = 0; + uint16_t *P0, *P1; + + size_t dataLength = r1*r2; + + P0 = (uint16_t*)malloc(r2*sizeof(uint16_t)); + memset(P0, 0, r2*sizeof(uint16_t)); + P1 = (uint16_t*)malloc(r2*sizeof(uint16_t)); + memset(P1, 0, r2*sizeof(uint16_t)); + + int* type = (int*) malloc(dataLength*sizeof(int)); + //type[dataLength]=0; + + uint16_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + type[0] = 0; + curValue = P1[0] = spaceFillingValue[0]; + compressUInt16Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + diff = spaceFillingValue[1] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp data r2-1 */ + for (j = 2; j < r2; j++) + { + pred1D = 2*P1[j-1] - P1[j-2]; + diff = spaceFillingValue[j] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp Row-r1-1 */ + size_t index; + for (i = 1; i < r1; i++) + { + /* Process row-i data 0 */ + index = i*r2; + pred1D = P1[0]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp r2-1*/ + for (j = 1; j < r2; j++) + { + index = i*r2+j; + pred2D = P0[j-1] + P1[j] - P1[j-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmpsize; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_UINT16); + + //free memory + free(type); + free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +/** + * + * Note: @r1 is high dimension + * @r2 is low dimension + * */ +void SZ_compress_args_uint16_NoCkRngeNoGzip_2D(unsigned char** newByteData, uint16_t *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize, +int64_t valueRangeSize, uint16_t minValue) +{ + TightDataPointStorageI* tdps = SZ_compress_uint16_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, minValue); + + convertTDPStoFlatBytes_int(tdps, newByteData, outSize); + + size_t dataLength = r1*r2; + if(*outSize>dataLength*sizeof(uint16_t)) + SZ_compress_args_uint16_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); + + free_TightDataPointStorageI(tdps); +} + +TightDataPointStorageI* SZ_compress_uint16_3D_MDQ(uint16_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_uint16_3D(oriData, r1, r2, r3, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i,j,k; + int64_t pred1D, pred2D, pred3D, curValue, tmp; + int diff = 0.0; + double itvNum = 0; + uint16_t *P0, *P1; + + size_t dataLength = r1*r2*r3; + + size_t r23 = r2*r3; + P0 = (uint16_t*)malloc(r23*sizeof(uint16_t)); + P1 = (uint16_t*)malloc(r23*sizeof(uint16_t)); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + uint16_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + type[0] = 0; + P1[0] = spaceFillingValue[0]; + compressUInt16Value(spaceFillingValue[0], minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + diff = spaceFillingValue[1] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp data r3-1 */ + for (j = 2; j < r3; j++) + { + pred1D = 2*P1[j-1] - P1[j-2]; + diff = spaceFillingValue[j] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp Row-r2-1 */ + size_t index; + for (i = 1; i < r2; i++) + { + /* Process row-i data 0 */ + index = i*r3; + pred1D = P1[index-r3]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp data r3-1*/ + for (j = 1; j < r3; j++) + { + index = i*r3+j; + pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp layer-r1-1 /////////////////////////// + + for (k = 1; k < r1; k++) + { + /* Process Row-0 data 0*/ + index = k*r23; + pred1D = P1[0]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp data r3-1 */ + for (j = 1; j < r3; j++) + { + //index = k*r2*r3+j; + index ++; + pred2D = P0[j-1] + P1[j] - P1[j-1]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp Row-r2-1 */ + size_t index2D; + for (i = 1; i < r2; i++) + { + /* Process Row-i data 0 */ + index = k*r23 + i*r3; + index2D = i*r3; + pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp data r3-1 */ + for (j = 1; j < r3; j++) + { +// if(k==63&&i==43&&j==27) +// printf("i=%d\n", i); + //index = k*r2*r3 + i*r3 + j; + index ++; + index2D = i*r3 + j; + pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1]; + diff = spaceFillingValue[index] - pred3D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmpsize; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_UINT16); + + //free memory + free(type); + free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + + +void SZ_compress_args_uint16_NoCkRngeNoGzip_3D(unsigned char** newByteData, uint16_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, +int64_t valueRangeSize, int64_t minValue) +{ + TightDataPointStorageI* tdps = SZ_compress_uint16_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, minValue); + + convertTDPStoFlatBytes_int(tdps, newByteData, outSize); + + size_t dataLength = r1*r2*r3; + if(*outSize>dataLength*sizeof(uint16_t)) + SZ_compress_args_uint16_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); + + free_TightDataPointStorageI(tdps); +} + + +TightDataPointStorageI* SZ_compress_uint16_4D_MDQ(uint16_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_uint16_4D(oriData, r1, r2, r3, r4, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i,j,k; + int64_t pred1D, pred2D, pred3D, curValue, tmp; + int diff = 0.0; + double itvNum = 0; + uint16_t *P0, *P1; + + size_t dataLength = r1*r2*r3*r4; + + size_t r234 = r2*r3*r4; + size_t r34 = r3*r4; + + P0 = (uint16_t*)malloc(r34*sizeof(uint16_t)); + P1 = (uint16_t*)malloc(r34*sizeof(uint16_t)); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + uint16_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + size_t l; + for (l = 0; l < r1; l++) + { + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + size_t index = l*r234; + size_t index2D = 0; + + type[index] = 0; + curValue = P1[index2D] = spaceFillingValue[index]; + compressUInt16Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + + /* Process Row-0 data 1*/ + index = l*r234+1; + index2D = 1; + + pred1D = P1[index2D-1]; + diff = curValue - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp data r4-1 */ + for (j = 2; j < r4; j++) + { + index = l*r234+j; + index2D = j; + + pred1D = 2*P1[index2D-1] - P1[index2D-2]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp Row-r3-1 */ + for (i = 1; i < r3; i++) + { + /* Process row-i data 0 */ + index = l*r234+i*r4; + index2D = i*r4; + + pred1D = P1[index2D-r4]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp data r4-1*/ + for (j = 1; j < r4; j++) + { + index = l*r234+i*r4+j; + index2D = i*r4+j; + + pred2D = P1[index2D-1] + P1[index2D-r4] - P1[index2D-r4-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp layer-r2-1 /////////////////////////// + + for (k = 1; k < r2; k++) + { + /* Process Row-0 data 0*/ + index = l*r234+k*r34; + index2D = 0; + + pred1D = P1[index2D]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp data r4-1 */ + for (j = 1; j < r4; j++) + { + index = l*r234+k*r34+j; + index2D = j; + + pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp Row-r3-1 */ + for (i = 1; i < r3; i++) + { + /* Process Row-i data 0 */ + index = l*r234+k*r34+i*r4; + index2D = i*r4; + + pred2D = P0[index2D-r4] + P1[index2D] - P1[index2D-r4]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp data r4-1 */ + for (j = 1; j < r4; j++) + { + index = l*r234+k*r34+i*r4+j; + index2D = i*r4+j; + + pred3D = P0[index2D-1] + P0[index2D-r4]+ P1[index2D] - P0[index2D-r4-1] - P1[index2D-r4] - P1[index2D-1] + P1[index2D-r4-1]; + diff = spaceFillingValue[index] - pred3D; + + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmpsize; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_UINT16); + + //free memory + free(type); + free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +void SZ_compress_args_uint16_NoCkRngeNoGzip_4D(unsigned char** newByteData, uint16_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, +size_t *outSize, int64_t valueRangeSize, int64_t minValue) +{ + TightDataPointStorageI* tdps = SZ_compress_uint16_4D_MDQ(oriData, r1, r2, r3, r4, realPrecision, valueRangeSize, minValue); + + convertTDPStoFlatBytes_int(tdps, newByteData, outSize); + + size_t dataLength = r1*r2*r3*r4; + if(*outSize>dataLength*sizeof(uint16_t)) + SZ_compress_args_uint16_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); + + free_TightDataPointStorageI(tdps); +} + +void SZ_compress_args_uint16_withinRange(unsigned char** newByteData, uint16_t *oriData, size_t dataLength, size_t *outSize) +{ + TightDataPointStorageI* tdps = (TightDataPointStorageI*) malloc(sizeof(TightDataPointStorageI)); + tdps->typeArray = NULL; + + tdps->allSameData = 1; + tdps->dataSeriesLength = dataLength; + tdps->exactDataBytes = (unsigned char*)malloc(sizeof(unsigned char)*2); + tdps->isLossless = 0; + //tdps->exactByteSize = 4; + tdps->exactDataNum = 1; + tdps->exactDataBytes_size = 2; + tdps->dataTypeSize = convertDataTypeSize(sizeof(uint16_t)); + + uint16_t value = oriData[0]; + int16ToBytes_bigEndian(tdps->exactDataBytes, value); + + size_t tmpOutSize; + convertTDPStoFlatBytes_int(tdps, newByteData, &tmpOutSize); + + *outSize = tmpOutSize;//3+1+sizeof(uint16_t)+SZ_SIZE_TYPE; //8==3+1+4(uint16_size) + free_TightDataPointStorageI(tdps); +} + +int SZ_compress_args_uint16_wRngeNoGzip(unsigned char** newByteData, uint16_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio) +{ + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + int64_t valueRangeSize = 0; + + uint16_t minValue = computeRangeSize_int(oriData, SZ_UINT16, dataLength, &valueRangeSize); + double realPrecision = getRealPrecision_int(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status); + + if(valueRangeSize <= realPrecision) + { + SZ_compress_args_uint16_withinRange(newByteData, oriData, dataLength, outSize); + } + else + { +// SZ_compress_args_uint16_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize); + if(r5==0&&r4==0&&r3==0&&r2==0) + { + SZ_compress_args_uint16_NoCkRngeNoGzip_1D(newByteData, oriData, r1, realPrecision, outSize, valueRangeSize, minValue); + } + else if(r5==0&&r4==0&&r3==0) + { + SZ_compress_args_uint16_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize, valueRangeSize, minValue); + } + else if(r5==0&&r4==0) + { + SZ_compress_args_uint16_NoCkRngeNoGzip_3D(newByteData, oriData, r3, r2, r1, realPrecision, outSize, valueRangeSize, minValue); + } + else if(r5==0) + { + SZ_compress_args_uint16_NoCkRngeNoGzip_3D(newByteData, oriData, r4*r3, r2, r1, realPrecision, outSize, valueRangeSize, minValue); + } + } + return status; +} + +int SZ_compress_args_uint16(unsigned char** newByteData, uint16_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio) +{ + confparams_cpr->errorBoundMode = errBoundMode; + + if(errBoundMode>=PW_REL) + { + printf("Error: Current SZ version doesn't support integer data compression with point-wise relative error bound being based on pwrType=AVG\n"); + exit(0); + return SZ_NSCS; + } + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + int64_t valueRangeSize = 0; + + uint16_t minValue = (uint16_t)computeRangeSize_int(oriData, SZ_UINT16, dataLength, &valueRangeSize); + double realPrecision = 0; + + if(confparams_cpr->errorBoundMode==PSNR) + { + confparams_cpr->errorBoundMode = ABS; + realPrecision = confparams_cpr->absErrBound = computeABSErrBoundFromPSNR(confparams_cpr->psnr, (double)confparams_cpr->predThreshold, (double)valueRangeSize); + //printf("realPrecision=%lf\n", realPrecision); + } + else + realPrecision = getRealPrecision_int(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status); + + if(valueRangeSize <= realPrecision) + { + SZ_compress_args_uint16_withinRange(newByteData, oriData, dataLength, outSize); + } + else + { + size_t tmpOutSize = 0; + unsigned char* tmpByteData; + if (r2==0) + { + SZ_compress_args_uint16_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + if (r3==0) + { + SZ_compress_args_uint16_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + if (r4==0) + { + SZ_compress_args_uint16_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + if (r5==0) + { + SZ_compress_args_uint16_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + { + printf("Error: doesn't support 5 dimensions for now.\n"); + status = SZ_DERR; //dimension error + } + //Call Gzip to do the further compression. + if(confparams_cpr->szMode==SZ_BEST_SPEED) + { + *outSize = tmpOutSize; + *newByteData = tmpByteData; + } + else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) + { + *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); + free(tmpByteData); + } + else + { + printf("Error: Wrong setting of confparams_cpr->szMode in the uint16_t compression.\n"); + status = SZ_MERR; //mode error + } + } + + return status; +} diff --git a/deps/SZ/sz/src/sz_uint32.c b/deps/SZ/sz/src/sz_uint32.c new file mode 100644 index 0000000000000000000000000000000000000000..6f27510f258fc43388b310808e82ad8c50d4b772 --- /dev/null +++ b/deps/SZ/sz/src/sz_uint32.c @@ -0,0 +1,1268 @@ +/** + * @file sz_uint32.c + * @author Sheng Di + * @date Aug, 2017 + * @brief sz_uint32, Compression and Decompression functions + * (C) 2017 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + + +#include +#include +#include +#include +#include +#include "sz.h" +#include "CompressElement.h" +#include "DynamicByteArray.h" +#include "DynamicIntArray.h" +#include "zlib.h" +#include "rw.h" +#include "TightDataPointStorageI.h" +#include "sz_uint32.h" +#include "utility.h" + +unsigned int optimize_intervals_uint32_1D(uint32_t *oriData, size_t dataLength, double realPrecision) +{ + size_t i = 0, radiusIndex; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance; + for(i=2;isampleDistance==0) + { + //pred_value = 2*oriData[i-1] - oriData[i-2]; + pred_value = oriData[i-1]; + pred_err = llabs(pred_value - oriData[i]); + radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2); + return powerOf2; +} + +unsigned int optimize_intervals_uint32_2D(uint32_t *oriData, size_t r1, size_t r2, double realPrecision) +{ + size_t i,j, index; + size_t radiusIndex; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = r1*r2/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r2+j; + pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1]; + pred_err = llabs(pred_value - oriData[index]); + radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2); + return powerOf2; +} + +unsigned int optimize_intervals_uint32_3D(uint32_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision) +{ + size_t i,j,k, index; + size_t radiusIndex; + size_t r23=r2*r3; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)*(r3-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r23+j*r3+k; + pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] + - oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1]; + pred_err = llabs(pred_value - oriData[index]); + radiusIndex = (pred_err/realPrecision+1)/2; + if(radiusIndex>=confparams_cpr->maxRangeRadius) + { + radiusIndex = confparams_cpr->maxRangeRadius - 1; + //printf("radiusIndex=%d\n", radiusIndex); + } + intervals[radiusIndex]++; + } + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2); + return powerOf2; +} + + +unsigned int optimize_intervals_uint32_4D(uint32_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision) +{ + size_t i,j,k,l, index; + size_t radiusIndex; + size_t r234=r2*r3*r4; + size_t r34=r3*r4; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)*(r3-1)*(r4-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r234+j*r34+k*r4+l; + pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r34] + - oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1]; + pred_err = llabs(pred_value - oriData[index]); + radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + return powerOf2; +} + +TightDataPointStorageI* SZ_compress_uint32_1D_MDQ(uint32_t *oriData, size_t dataLength, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + quantization_intervals = optimize_intervals_uint32_1D(oriData, dataLength, realPrecision); + else + quantization_intervals = exe_params->intvCapacity; + updateQuantizationInfo(quantization_intervals); + size_t i; + + int* type = (int*) malloc(dataLength*sizeof(int)); + + uint32_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + int64_t last3CmprsData[3] = {0,0,0}; + + //add the first data + type[0] = 0; + compressUInt32Value(spaceFillingValue[0], minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + listAdd_int(last3CmprsData, spaceFillingValue[0]); + + type[1] = 0; + compressUInt32Value(spaceFillingValue[1], minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + listAdd_int(last3CmprsData, spaceFillingValue[1]); + //printf("%.30G\n",last3CmprsData[0]); + + int state; + double checkRadius = (exe_params->intvCapacity-1)*realPrecision; + int64_t curData; + uint32_t pred, predAbsErr; + double interval = 2*realPrecision; + + for(i=2;i=pred) + { + type[i] = exe_params->intvRadius+state; + pred = pred + state*interval; + } + else //curDataintvRadius-state; + pred = pred - state*interval; + } +/* if(type[i]==0) + printf("err:type[%d]=0\n", i);*/ + listAdd_int(last3CmprsData, pred); + continue; + } + + //unpredictable data processing + type[i] = 0; + compressUInt32Value(curData, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + listAdd_int(last3CmprsData, curData); + }//end of for + + size_t exactDataNum = exactDataByteArray->size / byteSize; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_UINT32); + +//sdi:Debug +/* int sum =0; + for(i=0;iarray has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +void SZ_compress_args_uint32_StoreOriData(uint32_t* oriData, size_t dataLength, TightDataPointStorageI* tdps, +unsigned char** newByteData, size_t *outSize) +{ + int intSize=sizeof(uint32_t); + size_t k = 0, i; + tdps->isLossless = 1; + size_t totalByteLength = 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + intSize*dataLength; + *newByteData = (unsigned char*)malloc(totalByteLength); + + unsigned char dsLengthBytes[8]; + for (i = 0; i < 3; i++)//3 + (*newByteData)[k++] = versionNumber[i]; + + if(exe_params->SZ_SIZE_TYPE==4)//1 + (*newByteData)[k++] = 16; //00010000 + else + (*newByteData)[k++] = 80; //01010000: 01000000 indicates the SZ_SIZE_TYPE=8 + + convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k])); + k = k + MetaDataByteLength; + + sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8 + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + (*newByteData)[k++] = dsLengthBytes[i]; + + if(sysEndianType==BIG_ENDIAN_SYSTEM) + memcpy((*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, oriData, dataLength*intSize); + else + { + unsigned char* p = (*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE; + for(i=0;i dataLength*sizeof(uint32_t)) + SZ_compress_args_uint32_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize); + free_TightDataPointStorageI(tdps); +} + +TightDataPointStorageI* SZ_compress_uint32_2D_MDQ(uint32_t *oriData, size_t r1, size_t r2, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_uint32_2D(oriData, r1, r2, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i,j; + int64_t pred1D, pred2D, curValue; + int64_t diff = 0.0; + double itvNum = 0; + uint32_t *P0, *P1; + + size_t dataLength = r1*r2; + + P0 = (uint32_t*)malloc(r2*sizeof(uint32_t)); + memset(P0, 0, r2*sizeof(uint32_t)); + P1 = (uint32_t*)malloc(r2*sizeof(uint32_t)); + memset(P1, 0, r2*sizeof(uint32_t)); + + int* type = (int*) malloc(dataLength*sizeof(int)); + //type[dataLength]=0; + + uint32_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + type[0] = 0; + curValue = P1[0] = spaceFillingValue[0]; + compressUInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + diff = spaceFillingValue[1] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + exe_params->intvRadius; + P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision; + } + else + { + type[1] = 0; + curValue = P1[1] = spaceFillingValue[1]; + compressUInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process Row-0 data 2 --> data r2-1 */ + for (j = 2; j < r2; j++) + { + pred1D = 2*P1[j-1] - P1[j-2]; + diff = spaceFillingValue[j] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + exe_params->intvRadius; + P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision; + } + else + { + type[j] = 0; + curValue = P1[j] = spaceFillingValue[j]; + compressUInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + + /* Process Row-1 --> Row-r1-1 */ + size_t index; + for (i = 1; i < r1; i++) + { + /* Process row-i data 0 */ + index = i*r2; + pred1D = P1[0]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P0[0] = spaceFillingValue[index]; + compressUInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process row-i data 1 --> r2-1*/ + for (j = 1; j < r2; j++) + { + index = i*r2+j; + pred2D = P0[j-1] + P1[j] - P1[j-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P0[j] = spaceFillingValue[index]; + compressUInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + + uint32_t *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + + if(r2!=1) + free(P0); + free(P1); + + size_t exactDataNum = exactDataByteArray->size; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_UINT32); + + //free memory + free(type); + free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +/** + * + * Note: @r1 is high dimension + * @r2 is low dimension + * */ +void SZ_compress_args_uint32_NoCkRngeNoGzip_2D(unsigned char** newByteData, uint32_t *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize, +int64_t valueRangeSize, uint32_t minValue) +{ + TightDataPointStorageI* tdps = SZ_compress_uint32_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, minValue); + + convertTDPStoFlatBytes_int(tdps, newByteData, outSize); + + size_t dataLength = r1*r2; + if(*outSize>dataLength*sizeof(uint32_t)) + SZ_compress_args_uint32_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); + + free_TightDataPointStorageI(tdps); +} + +TightDataPointStorageI* SZ_compress_uint32_3D_MDQ(uint32_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_uint32_3D(oriData, r1, r2, r3, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i,j,k; + int64_t pred1D, pred2D, pred3D, curValue; + int64_t diff = 0.0; + double itvNum = 0; + uint32_t *P0, *P1; + + size_t dataLength = r1*r2*r3; + + size_t r23 = r2*r3; + P0 = (uint32_t*)malloc(r23*sizeof(uint32_t)); + P1 = (uint32_t*)malloc(r23*sizeof(uint32_t)); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + uint32_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + type[0] = 0; + P1[0] = spaceFillingValue[0]; + compressUInt32Value(spaceFillingValue[0], minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + diff = spaceFillingValue[1] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + exe_params->intvRadius; + P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision; + } + else + { + type[1] = 0; + curValue = P1[1] = spaceFillingValue[1]; + compressUInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process Row-0 data 2 --> data r3-1 */ + for (j = 2; j < r3; j++) + { + pred1D = 2*P1[j-1] - P1[j-2]; + diff = spaceFillingValue[j] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + exe_params->intvRadius; + P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision; + } + else + { + type[j] = 0; + curValue = P1[j] = spaceFillingValue[j]; + compressUInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + + /* Process Row-1 --> Row-r2-1 */ + size_t index; + for (i = 1; i < r2; i++) + { + /* Process row-i data 0 */ + index = i*r3; + pred1D = P1[index-r3]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P1[index] = spaceFillingValue[index]; + compressUInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process row-i data 1 --> data r3-1*/ + for (j = 1; j < r3; j++) + { + index = i*r3+j; + pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P1[index] = spaceFillingValue[index]; + compressUInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + } + + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (k = 1; k < r1; k++) + { + /* Process Row-0 data 0*/ + index = k*r23; + pred1D = P1[0]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P0[0] = spaceFillingValue[index]; + compressUInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + + /* Process Row-0 data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + //index = k*r2*r3+j; + index ++; + pred2D = P0[j-1] + P1[j] - P1[j-1]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; +/* if(type[index]==0) + printf("err:type[%d]=0, index4\n", index); */ + } + else + { + type[index] = 0; + curValue = P0[j] = spaceFillingValue[index]; + compressUInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + + /* Process Row-1 --> Row-r2-1 */ + size_t index2D; + for (i = 1; i < r2; i++) + { + /* Process Row-i data 0 */ + index = k*r23 + i*r3; + index2D = i*r3; + pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P0[index2D] = spaceFillingValue[index]; + compressUInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process Row-i data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { +// if(k==63&&i==43&&j==27) +// printf("i=%d\n", i); + //index = k*r2*r3 + i*r3 + j; + index ++; + index2D = i*r3 + j; + pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1]; + diff = spaceFillingValue[index] - pred3D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P0[index2D] = spaceFillingValue[index]; + compressUInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + } + + uint32_t *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + if(r23!=1) + free(P0); + free(P1); + + size_t exactDataNum = exactDataByteArray->size; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_UINT32); + + //free memory + free(type); + free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + + +void SZ_compress_args_uint32_NoCkRngeNoGzip_3D(unsigned char** newByteData, uint32_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, +int64_t valueRangeSize, int64_t minValue) +{ + TightDataPointStorageI* tdps = SZ_compress_uint32_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, minValue); + + convertTDPStoFlatBytes_int(tdps, newByteData, outSize); + + size_t dataLength = r1*r2*r3; + if(*outSize>dataLength*sizeof(uint32_t)) + SZ_compress_args_uint32_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); + + free_TightDataPointStorageI(tdps); +} + + +TightDataPointStorageI* SZ_compress_uint32_4D_MDQ(uint32_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_uint32_4D(oriData, r1, r2, r3, r4, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i,j,k; + int64_t pred1D, pred2D, pred3D, curValue; + int64_t diff = 0.0; + double itvNum = 0; + uint32_t *P0, *P1; + + size_t dataLength = r1*r2*r3*r4; + + size_t r234 = r2*r3*r4; + size_t r34 = r3*r4; + + P0 = (uint32_t*)malloc(r34*sizeof(uint32_t)); + P1 = (uint32_t*)malloc(r34*sizeof(uint32_t)); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + uint32_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + size_t l; + for (l = 0; l < r1; l++) + { + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + size_t index = l*r234; + size_t index2D = 0; + + type[index] = 0; + curValue = P1[index2D] = spaceFillingValue[index]; + compressUInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + + /* Process Row-0 data 1*/ + index = l*r234+1; + index2D = 1; + + pred1D = P1[index2D-1]; + diff = curValue - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P1[index2D] = spaceFillingValue[0]; + compressUInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process Row-0 data 2 --> data r4-1 */ + for (j = 2; j < r4; j++) + { + index = l*r234+j; + index2D = j; + + pred1D = 2*P1[index2D-1] - P1[index2D-2]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P1[index2D] = spaceFillingValue[0]; + compressUInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (i = 1; i < r3; i++) + { + /* Process row-i data 0 */ + index = l*r234+i*r4; + index2D = i*r4; + + pred1D = P1[index2D-r4]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P1[index2D] = spaceFillingValue[0]; + compressUInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process row-i data 1 --> data r4-1*/ + for (j = 1; j < r4; j++) + { + index = l*r234+i*r4+j; + index2D = i*r4+j; + + pred2D = P1[index2D-1] + P1[index2D-r4] - P1[index2D-r4-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P1[index2D] = spaceFillingValue[0]; + compressUInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + } + + + /////////////////////////// Process layer-1 --> layer-r2-1 /////////////////////////// + + for (k = 1; k < r2; k++) + { + /* Process Row-0 data 0*/ + index = l*r234+k*r34; + index2D = 0; + + pred1D = P1[index2D]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P0[index2D] = spaceFillingValue[0]; + compressUInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process Row-0 data 1 --> data r4-1 */ + for (j = 1; j < r4; j++) + { + index = l*r234+k*r34+j; + index2D = j; + + pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P0[index2D] = spaceFillingValue[0]; + compressUInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (i = 1; i < r3; i++) + { + /* Process Row-i data 0 */ + index = l*r234+k*r34+i*r4; + index2D = i*r4; + + pred2D = P0[index2D-r4] + P1[index2D] - P1[index2D-r4]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P0[index2D] = spaceFillingValue[0]; + compressUInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process Row-i data 1 --> data r4-1 */ + for (j = 1; j < r4; j++) + { + index = l*r234+k*r34+i*r4+j; + index2D = i*r4+j; + + pred3D = P0[index2D-1] + P0[index2D-r4]+ P1[index2D] - P0[index2D-r4-1] - P1[index2D-r4] - P1[index2D-1] + P1[index2D-r4-1]; + diff = spaceFillingValue[index] - pred3D; + + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P0[index2D] = spaceFillingValue[0]; + compressUInt32Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + } + + uint32_t *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + } + + free(P0); + free(P1); + + size_t exactDataNum = exactDataByteArray->size; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_UINT32); + + //free memory + free(type); + free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +void SZ_compress_args_uint32_NoCkRngeNoGzip_4D(unsigned char** newByteData, uint32_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, +size_t *outSize, int64_t valueRangeSize, int64_t minValue) +{ + TightDataPointStorageI* tdps = SZ_compress_uint32_4D_MDQ(oriData, r1, r2, r3, r4, realPrecision, valueRangeSize, minValue); + + convertTDPStoFlatBytes_int(tdps, newByteData, outSize); + + size_t dataLength = r1*r2*r3*r4; + if(*outSize>dataLength*sizeof(uint32_t)) + SZ_compress_args_uint32_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); + + free_TightDataPointStorageI(tdps); +} + +void SZ_compress_args_uint32_withinRange(unsigned char** newByteData, uint32_t *oriData, size_t dataLength, size_t *outSize) +{ + TightDataPointStorageI* tdps = (TightDataPointStorageI*) malloc(sizeof(TightDataPointStorageI)); + tdps->typeArray = NULL; + + tdps->allSameData = 1; + tdps->dataSeriesLength = dataLength; + tdps->exactDataBytes = (unsigned char*)malloc(sizeof(unsigned char)*4); + tdps->isLossless = 0; + //tdps->exactByteSize = 4; + tdps->exactDataNum = 1; + tdps->exactDataBytes_size = 4; + + uint32_t value = oriData[0]; + int32ToBytes_bigEndian(tdps->exactDataBytes, value); + + size_t tmpOutSize; + convertTDPStoFlatBytes_int(tdps, newByteData, &tmpOutSize); + + *outSize = tmpOutSize;//3+1+sizeof(uint32_t)+SZ_SIZE_TYPE; //8==3+1+4(uint32_size) + free_TightDataPointStorageI(tdps); +} + +int SZ_compress_args_uint32_wRngeNoGzip(unsigned char** newByteData, uint32_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio) +{ + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + int64_t valueRangeSize = 0; + + uint32_t minValue = computeRangeSize_int(oriData, SZ_UINT32, dataLength, &valueRangeSize); + double realPrecision = getRealPrecision_int(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status); + + if(valueRangeSize <= realPrecision) + { + SZ_compress_args_uint32_withinRange(newByteData, oriData, dataLength, outSize); + } + else + { +// SZ_compress_args_uint32_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize); + if(r5==0&&r4==0&&r3==0&&r2==0) + { + SZ_compress_args_uint32_NoCkRngeNoGzip_1D(newByteData, oriData, r1, realPrecision, outSize, valueRangeSize, minValue); + } + else if(r5==0&&r4==0&&r3==0) + { + SZ_compress_args_uint32_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize, valueRangeSize, minValue); + } + else if(r5==0&&r4==0) + { + SZ_compress_args_uint32_NoCkRngeNoGzip_3D(newByteData, oriData, r3, r2, r1, realPrecision, outSize, valueRangeSize, minValue); + } + else if(r5==0) + { + SZ_compress_args_uint32_NoCkRngeNoGzip_3D(newByteData, oriData, r4*r3, r2, r1, realPrecision, outSize, valueRangeSize, minValue); + } + } + return status; +} + +int SZ_compress_args_uint32(unsigned char** newByteData, uint32_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio) +{ + confparams_cpr->errorBoundMode = errBoundMode; + + if(errBoundMode>=PW_REL) + { + printf("Error: Current SZ version doesn't support integer data compression with point-wise relative error bound being based on pwrType=AVG\n"); + exit(0); + return SZ_NSCS; + } + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + int64_t valueRangeSize = 0; + + uint32_t minValue = (uint32_t)computeRangeSize_int(oriData, SZ_UINT32, dataLength, &valueRangeSize); + double realPrecision = 0; + + if(confparams_cpr->errorBoundMode==PSNR) + { + confparams_cpr->errorBoundMode = ABS; + realPrecision = confparams_cpr->absErrBound = computeABSErrBoundFromPSNR(confparams_cpr->psnr, (double)confparams_cpr->predThreshold, (double)valueRangeSize); + //printf("realPrecision=%lf\n", realPrecision); + } + else + realPrecision = getRealPrecision_int(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status); + + if(valueRangeSize <= realPrecision) + { + SZ_compress_args_uint32_withinRange(newByteData, oriData, dataLength, outSize); + } + else + { + size_t tmpOutSize = 0; + unsigned char* tmpByteData; + if (r2==0) + { + SZ_compress_args_uint32_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + if (r3==0) + { + SZ_compress_args_uint32_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + if (r4==0) + { + SZ_compress_args_uint32_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + if (r5==0) + { + SZ_compress_args_uint32_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + { + printf("Error: doesn't support 5 dimensions for now.\n"); + status = SZ_DERR; //dimension error + } + //Call Gzip to do the further compression. + if(confparams_cpr->szMode==SZ_BEST_SPEED) + { + *outSize = tmpOutSize; + *newByteData = tmpByteData; + } + else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) + { + *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); + free(tmpByteData); + } + else + { + printf("Error: Wrong setting of confparams_cpr->szMode in the uint32_t compression.\n"); + status = SZ_MERR; //mode error + } + } + + return status; +} diff --git a/deps/SZ/sz/src/sz_uint64.c b/deps/SZ/sz/src/sz_uint64.c new file mode 100644 index 0000000000000000000000000000000000000000..7d2eca843f9205a5b3704d49e5da64a67f315fb9 --- /dev/null +++ b/deps/SZ/sz/src/sz_uint64.c @@ -0,0 +1,1268 @@ +/** + * @file sz_uint64.c + * @author Sheng Di + * @date Aug, 2017 + * @brief sz_uint64, Compression and Decompression functions + * (C) 2017 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + + +#include +#include +#include +#include +#include +#include "sz.h" +#include "CompressElement.h" +#include "DynamicByteArray.h" +#include "DynamicIntArray.h" +#include "zlib.h" +#include "rw.h" +#include "TightDataPointStorageI.h" +#include "sz_uint64.h" +#include "utility.h" + +unsigned int optimize_intervals_uint64_1D(uint64_t *oriData, size_t dataLength, double realPrecision) +{ + size_t i = 0, radiusIndex; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance; + for(i=2;isampleDistance==0) + { + //pred_value = 2*oriData[i-1] - oriData[i-2]; + pred_value = oriData[i-1]; + pred_err = llabs(pred_value - (int64_t)(oriData[i])); + radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2); + return powerOf2; +} + +unsigned int optimize_intervals_uint64_2D(uint64_t *oriData, size_t r1, size_t r2, double realPrecision) +{ + size_t i,j, index; + size_t radiusIndex; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r2+j; + pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1]; + pred_err = llabs(pred_value - (int64_t)(oriData[index])); + radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2); + return powerOf2; +} + +unsigned int optimize_intervals_uint64_3D(uint64_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision) +{ + size_t i,j,k, index; + size_t radiusIndex; + size_t r23=r2*r3; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)*(r3-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r23+j*r3+k; + pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] + - oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1]; + pred_err = llabs(pred_value - (int64_t)(oriData[index])); + radiusIndex = (pred_err/realPrecision+1)/2; + if(radiusIndex>=confparams_cpr->maxRangeRadius) + { + radiusIndex = confparams_cpr->maxRangeRadius - 1; + //printf("radiusIndex=%d\n", radiusIndex); + } + intervals[radiusIndex]++; + } + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2); + return powerOf2; +} + + +unsigned int optimize_intervals_uint64_4D(uint64_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision) +{ + size_t i,j,k,l, index; + size_t radiusIndex; + size_t r234=r2*r3*r4; + size_t r34=r3*r4; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)*(r3-1)*(r4-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r234+j*r34+k*r4+l; + pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r34] + - oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1]; + pred_err = llabs(pred_value - (int64_t)(oriData[index])); + radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + return powerOf2; +} + +TightDataPointStorageI* SZ_compress_uint64_1D_MDQ(uint64_t *oriData, size_t dataLength, double realPrecision, uint64_t valueRangeSize, uint64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + quantization_intervals = optimize_intervals_uint64_1D(oriData, dataLength, realPrecision); + else + quantization_intervals = exe_params->intvCapacity; + updateQuantizationInfo(quantization_intervals); + size_t i; + + int* type = (int*) malloc(dataLength*sizeof(int)); + + uint64_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + int64_t last3CmprsData[3] = {0,0,0}; + + //add the first data + type[0] = 0; + compressUInt64Value(spaceFillingValue[0], minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + listAdd_int(last3CmprsData, spaceFillingValue[0]); + + type[1] = 0; + compressUInt64Value(spaceFillingValue[1], minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + listAdd_int(last3CmprsData, spaceFillingValue[1]); + //printf("%.30G\n",last3CmprsData[0]); + + int state; + double checkRadius = (exe_params->intvCapacity-1)*realPrecision; + int64_t curData; + int64_t pred, predAbsErr; + double interval = 2*realPrecision; + + for(i=2;i=pred) + { + type[i] = exe_params->intvRadius+state; + pred = pred + state*interval; + } + else //curDataintvRadius-state; + pred = pred - state*interval; + } +/* if(type[i]==0) + printf("err:type[%d]=0\n", i);*/ + listAdd_int(last3CmprsData, pred); + continue; + } + + //unpredictable data processing + type[i] = 0; + compressUInt64Value(curData, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + listAdd_int(last3CmprsData, curData); + }//end of for + + size_t exactDataNum = exactDataByteArray->size / byteSize; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_UINT64); + +//sdi:Debug +/* int sum =0; + for(i=0;iarray has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +void SZ_compress_args_uint64_StoreOriData(uint64_t* oriData, size_t dataLength, TightDataPointStorageI* tdps, +unsigned char** newByteData, size_t *outSize) +{ + int intSize=sizeof(uint64_t); + size_t k = 0, i; + tdps->isLossless = 1; + size_t totalByteLength = 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + intSize*dataLength; + *newByteData = (unsigned char*)malloc(totalByteLength); + + unsigned char dsLengthBytes[8]; + for (i = 0; i < 3; i++)//3 + (*newByteData)[k++] = versionNumber[i]; + + if(exe_params->SZ_SIZE_TYPE==4)//1 + (*newByteData)[k++] = 16; //00010000 + else + (*newByteData)[k++] = 80; //01010000: 01000000 indicates the SZ_SIZE_TYPE=8 + + convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k])); + k = k + MetaDataByteLength; + + sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8 + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + (*newByteData)[k++] = dsLengthBytes[i]; + + if(sysEndianType==BIG_ENDIAN_SYSTEM) + memcpy((*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, oriData, dataLength*intSize); + else + { + unsigned char* p = (*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE; + for(i=0;i dataLength*sizeof(uint64_t)) + SZ_compress_args_uint64_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize); + free_TightDataPointStorageI(tdps); +} + +TightDataPointStorageI* SZ_compress_uint64_2D_MDQ(uint64_t *oriData, size_t r1, size_t r2, double realPrecision, uint64_t valueRangeSize, uint64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_uint64_2D(oriData, r1, r2, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i,j; + int64_t pred1D, pred2D, curValue; + int64_t diff = 0.0; + double itvNum = 0; + uint64_t *P0, *P1; + + size_t dataLength = r1*r2; + + P0 = (uint64_t*)malloc(r2*sizeof(uint64_t)); + memset(P0, 0, r2*sizeof(uint64_t)); + P1 = (uint64_t*)malloc(r2*sizeof(uint64_t)); + memset(P1, 0, r2*sizeof(uint64_t)); + + int* type = (int*) malloc(dataLength*sizeof(int)); + //type[dataLength]=0; + + uint64_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + type[0] = 0; + curValue = P1[0] = spaceFillingValue[0]; + compressUInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + diff = (int64_t)(spaceFillingValue[1]) - (int64_t)(pred1D); + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + exe_params->intvRadius; + P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision; + } + else + { + type[1] = 0; + curValue = P1[1] = spaceFillingValue[1]; + compressUInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process Row-0 data 2 --> data r2-1 */ + for (j = 2; j < r2; j++) + { + pred1D = 2*P1[j-1] - P1[j-2]; + diff = (int64_t)(spaceFillingValue[j]) - (int64_t)(pred1D); + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + exe_params->intvRadius; + P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision; + } + else + { + type[j] = 0; + curValue = P1[j] = spaceFillingValue[j]; + compressUInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + + /* Process Row-1 --> Row-r1-1 */ + size_t index; + for (i = 1; i < r1; i++) + { + /* Process row-i data 0 */ + index = i*r2; + pred1D = P1[0]; + diff = (int64_t)(spaceFillingValue[index]) - (int64_t)(pred1D); + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P0[0] = spaceFillingValue[index]; + compressUInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process row-i data 1 --> r2-1*/ + for (j = 1; j < r2; j++) + { + index = i*r2+j; + pred2D = P0[j-1] + P1[j] - P1[j-1]; + + diff = (int64_t)(spaceFillingValue[index]) - (int64_t)(pred2D); + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P0[j] = spaceFillingValue[index]; + compressUInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + + uint64_t *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + + if(r2!=1) + free(P0); + free(P1); + + size_t exactDataNum = exactDataByteArray->size; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_UINT64); + + //free memory + free(type); + free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +/** + * + * Note: @r1 is high dimension + * @r2 is low dimension + * */ +void SZ_compress_args_uint64_NoCkRngeNoGzip_2D(unsigned char** newByteData, uint64_t *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize, +int64_t valueRangeSize, uint64_t minValue) +{ + TightDataPointStorageI* tdps = SZ_compress_uint64_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, minValue); + + convertTDPStoFlatBytes_int(tdps, newByteData, outSize); + + size_t dataLength = r1*r2; + if(*outSize>dataLength*sizeof(uint64_t)) + SZ_compress_args_uint64_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); + + free_TightDataPointStorageI(tdps); +} + +TightDataPointStorageI* SZ_compress_uint64_3D_MDQ(uint64_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, uint64_t valueRangeSize, uint64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_uint64_3D(oriData, r1, r2, r3, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i,j,k; + int64_t pred1D, pred2D, pred3D, curValue; + int64_t diff = 0.0; + double itvNum = 0; + uint64_t *P0, *P1; + + size_t dataLength = r1*r2*r3; + + size_t r23 = r2*r3; + P0 = (uint64_t*)malloc(r23*sizeof(uint64_t)); + P1 = (uint64_t*)malloc(r23*sizeof(uint64_t)); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + uint64_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + type[0] = 0; + P1[0] = spaceFillingValue[0]; + compressUInt64Value(spaceFillingValue[0], minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + diff = (int64_t)(spaceFillingValue[1]) - (int64_t)(pred1D); + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + exe_params->intvRadius; + P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision; + } + else + { + type[1] = 0; + curValue = P1[1] = spaceFillingValue[1]; + compressUInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process Row-0 data 2 --> data r3-1 */ + for (j = 2; j < r3; j++) + { + pred1D = 2*P1[j-1] - P1[j-2]; + diff = (int64_t)(spaceFillingValue[j]) - (int64_t)(pred1D); + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + exe_params->intvRadius; + P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision; + } + else + { + type[j] = 0; + curValue = P1[j] = spaceFillingValue[j]; + compressUInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + + /* Process Row-1 --> Row-r2-1 */ + size_t index; + for (i = 1; i < r2; i++) + { + /* Process row-i data 0 */ + index = i*r3; + pred1D = P1[index-r3]; + diff = (int64_t)(spaceFillingValue[index]) - (int64_t)(pred1D); + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P1[index] = spaceFillingValue[index]; + compressUInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process row-i data 1 --> data r3-1*/ + for (j = 1; j < r3; j++) + { + index = i*r3+j; + pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1]; + + diff = (int64_t)(spaceFillingValue[index]) - (int64_t)(pred2D); + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P1[index] = spaceFillingValue[index]; + compressUInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + } + + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (k = 1; k < r1; k++) + { + /* Process Row-0 data 0*/ + index = k*r23; + pred1D = P1[0]; + diff = (int64_t)(spaceFillingValue[index]) - (int64_t)(pred1D); + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P0[0] = spaceFillingValue[index]; + compressUInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + + /* Process Row-0 data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + //index = k*r2*r3+j; + index ++; + pred2D = P0[j-1] + P1[j] - P1[j-1]; + diff = (int64_t)(spaceFillingValue[index]) - (int64_t)(pred2D); + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; +/* if(type[index]==0) + printf("err:type[%d]=0, index4\n", index); */ + } + else + { + type[index] = 0; + curValue = P0[j] = spaceFillingValue[index]; + compressUInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + + /* Process Row-1 --> Row-r2-1 */ + size_t index2D; + for (i = 1; i < r2; i++) + { + /* Process Row-i data 0 */ + index = k*r23 + i*r3; + index2D = i*r3; + pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3]; + diff = (int64_t)(spaceFillingValue[index]) - (int64_t)(pred2D); + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P0[index2D] = spaceFillingValue[index]; + compressUInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process Row-i data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { +// if(k==63&&i==43&&j==27) +// printf("i=%d\n", i); + //index = k*r2*r3 + i*r3 + j; + index ++; + index2D = i*r3 + j; + pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1]; + diff = (int64_t)(spaceFillingValue[index]) - (int64_t)(pred3D); + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + curValue = P0[index2D] = spaceFillingValue[index]; + compressUInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + } + + uint64_t *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + if(r23!=1) + free(P0); + free(P1); + + size_t exactDataNum = exactDataByteArray->size; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_UINT64); + + //free memory + free(type); + free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + + +void SZ_compress_args_uint64_NoCkRngeNoGzip_3D(unsigned char** newByteData, uint64_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, +uint64_t valueRangeSize, uint64_t minValue) +{ + TightDataPointStorageI* tdps = SZ_compress_uint64_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, minValue); + + convertTDPStoFlatBytes_int(tdps, newByteData, outSize); + + size_t dataLength = r1*r2*r3; + if(*outSize>dataLength*sizeof(uint64_t)) + SZ_compress_args_uint64_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); + + free_TightDataPointStorageI(tdps); +} + + +TightDataPointStorageI* SZ_compress_uint64_4D_MDQ(uint64_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, uint64_t valueRangeSize, uint64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_uint64_4D(oriData, r1, r2, r3, r4, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i,j,k; + int64_t pred1D, pred2D, pred3D, curValue; + int64_t diff = 0.0; + double itvNum = 0; + uint64_t *P0, *P1; + + size_t dataLength = r1*r2*r3*r4; + + size_t r234 = r2*r3*r4; + size_t r34 = r3*r4; + + P0 = (uint64_t*)malloc(r34*sizeof(uint64_t)); + P1 = (uint64_t*)malloc(r34*sizeof(uint64_t)); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + uint64_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + size_t l; + for (l = 0; l < r1; l++) + { + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + size_t index = l*r234; + size_t index2D = 0; + + type[index] = 0; + curValue = P1[index2D] = spaceFillingValue[index]; + compressUInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + + /* Process Row-0 data 1*/ + index = l*r234+1; + index2D = 1; + + pred1D = P1[index2D-1]; + diff = (int64_t)(curValue) - (int64_t)(pred1D); + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P1[index2D] = spaceFillingValue[0]; + compressUInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process Row-0 data 2 --> data r4-1 */ + for (j = 2; j < r4; j++) + { + index = l*r234+j; + index2D = j; + + pred1D = 2*P1[index2D-1] - P1[index2D-2]; + diff = (int64_t)(spaceFillingValue[index]) - (int64_t)(pred1D); + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P1[index2D] = spaceFillingValue[0]; + compressUInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (i = 1; i < r3; i++) + { + /* Process row-i data 0 */ + index = l*r234+i*r4; + index2D = i*r4; + + pred1D = P1[index2D-r4]; + diff = (int64_t)(spaceFillingValue[index]) - (int64_t)(pred1D); + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P1[index2D] = spaceFillingValue[0]; + compressUInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process row-i data 1 --> data r4-1*/ + for (j = 1; j < r4; j++) + { + index = l*r234+i*r4+j; + index2D = i*r4+j; + + pred2D = P1[index2D-1] + P1[index2D-r4] - P1[index2D-r4-1]; + + diff = (int64_t)(spaceFillingValue[index]) - (int64_t)(pred2D); + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P1[index2D] = spaceFillingValue[0]; + compressUInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + } + + + /////////////////////////// Process layer-1 --> layer-r2-1 /////////////////////////// + + for (k = 1; k < r2; k++) + { + /* Process Row-0 data 0*/ + index = l*r234+k*r34; + index2D = 0; + + pred1D = P1[index2D]; + diff = (int64_t)(spaceFillingValue[index]) - (int64_t)(pred1D); + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P0[index2D] = spaceFillingValue[0]; + compressUInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process Row-0 data 1 --> data r4-1 */ + for (j = 1; j < r4; j++) + { + index = l*r234+k*r34+j; + index2D = j; + + pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1]; + diff = (int64_t)(spaceFillingValue[index]) - (int64_t)(pred2D); + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P0[index2D] = spaceFillingValue[0]; + compressUInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (i = 1; i < r3; i++) + { + /* Process Row-i data 0 */ + index = l*r234+k*r34+i*r4; + index2D = i*r4; + + pred2D = P0[index2D-r4] + P1[index2D] - P1[index2D-r4]; + diff = (int64_t)(spaceFillingValue[index]) - (int64_t)(pred2D); + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P0[index2D] = spaceFillingValue[0]; + compressUInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + + /* Process Row-i data 1 --> data r4-1 */ + for (j = 1; j < r4; j++) + { + index = l*r234+k*r34+i*r4+j; + index2D = i*r4+j; + + pred3D = P0[index2D-1] + P0[index2D-r4]+ P1[index2D] - P0[index2D-r4-1] - P1[index2D-r4] - P1[index2D-1] + P1[index2D-r4-1]; + diff = (int64_t)(spaceFillingValue[index]) - (int64_t)(pred3D); + + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + } + else + { + type[index] = 0; + + curValue = P0[index2D] = spaceFillingValue[0]; + compressUInt64Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + } + } + } + + uint64_t *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + } + + free(P0); + free(P1); + + size_t exactDataNum = exactDataByteArray->size; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_UINT64); + + //free memory + free(type); + free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +void SZ_compress_args_uint64_NoCkRngeNoGzip_4D(unsigned char** newByteData, uint64_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, +size_t *outSize, uint64_t valueRangeSize, uint64_t minValue) +{ + TightDataPointStorageI* tdps = SZ_compress_uint64_4D_MDQ(oriData, r1, r2, r3, r4, realPrecision, valueRangeSize, minValue); + + convertTDPStoFlatBytes_int(tdps, newByteData, outSize); + + size_t dataLength = r1*r2*r3*r4; + if(*outSize>dataLength*sizeof(uint64_t)) + SZ_compress_args_uint64_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); + + free_TightDataPointStorageI(tdps); +} + +void SZ_compress_args_uint64_withinRange(unsigned char** newByteData, uint64_t *oriData, size_t dataLength, size_t *outSize) +{ + TightDataPointStorageI* tdps = (TightDataPointStorageI*) malloc(sizeof(TightDataPointStorageI)); + tdps->typeArray = NULL; + + tdps->allSameData = 1; + tdps->dataSeriesLength = dataLength; + tdps->exactDataBytes = (unsigned char*)malloc(sizeof(unsigned char)*8); + tdps->isLossless = 0; + //tdps->exactByteSize = 8; + tdps->exactDataNum = 1; + tdps->exactDataBytes_size = 8; + + uint64_t value = oriData[0]; + int64ToBytes_bigEndian(tdps->exactDataBytes, value); + + size_t tmpOutSize; + convertTDPStoFlatBytes_int(tdps, newByteData, &tmpOutSize); + + *outSize = tmpOutSize;//3+1+sizeof(uint64_t)+SZ_SIZE_TYPE; //8==3+1+4(uint64_size) + free_TightDataPointStorageI(tdps); +} + +int SZ_compress_args_uint64_wRngeNoGzip(unsigned char** newByteData, uint64_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio) +{ + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + int64_t valueRangeSize = 0; + + uint64_t minValue = computeRangeSize_int(oriData, SZ_UINT64, dataLength, &valueRangeSize); + double realPrecision = getRealPrecision_int(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status); + + if(valueRangeSize <= realPrecision) + { + SZ_compress_args_uint64_withinRange(newByteData, oriData, dataLength, outSize); + } + else + { +// SZ_compress_args_uint64_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize); + if(r5==0&&r4==0&&r3==0&&r2==0) + { + SZ_compress_args_uint64_NoCkRngeNoGzip_1D(newByteData, oriData, r1, realPrecision, outSize, valueRangeSize, minValue); + } + else if(r5==0&&r4==0&&r3==0) + { + SZ_compress_args_uint64_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize, valueRangeSize, minValue); + } + else if(r5==0&&r4==0) + { + SZ_compress_args_uint64_NoCkRngeNoGzip_3D(newByteData, oriData, r3, r2, r1, realPrecision, outSize, valueRangeSize, minValue); + } + else if(r5==0) + { + SZ_compress_args_uint64_NoCkRngeNoGzip_3D(newByteData, oriData, r4*r3, r2, r1, realPrecision, outSize, valueRangeSize, minValue); + } + } + return status; +} + +int SZ_compress_args_uint64(unsigned char** newByteData, uint64_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio) +{ + confparams_cpr->errorBoundMode = errBoundMode; + + if(errBoundMode>=PW_REL) + { + printf("Error: Current SZ version doesn't support integer data compression with point-wise relative error bound being based on pwrType=AVG\n"); + exit(0); + return SZ_NSCS; + } + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + int64_t valueRangeSize = 0; + + uint64_t minValue = (uint64_t)computeRangeSize_int(oriData, SZ_UINT64, dataLength, &valueRangeSize); + double realPrecision = 0; + + if(confparams_cpr->errorBoundMode==PSNR) + { + confparams_cpr->errorBoundMode = ABS; + realPrecision = confparams_cpr->absErrBound = computeABSErrBoundFromPSNR(confparams_cpr->psnr, (double)confparams_cpr->predThreshold, (double)valueRangeSize); + //printf("realPrecision=%lf\n", realPrecision); + } + else + realPrecision = getRealPrecision_int(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status); + + if(valueRangeSize <= realPrecision) + { + SZ_compress_args_uint64_withinRange(newByteData, oriData, dataLength, outSize); + } + else + { + size_t tmpOutSize = 0; + unsigned char* tmpByteData; + if (r2==0) + { + SZ_compress_args_uint64_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + if (r3==0) + { + SZ_compress_args_uint64_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + if (r4==0) + { + SZ_compress_args_uint64_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + if (r5==0) + { + SZ_compress_args_uint64_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + { + printf("Error: doesn't support 5 dimensions for now.\n"); + status = SZ_DERR; //dimension error + } + //Call Gzip to do the further compression. + if(confparams_cpr->szMode==SZ_BEST_SPEED) + { + *outSize = tmpOutSize; + *newByteData = tmpByteData; + } + else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) + { + *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); + free(tmpByteData); + } + else + { + printf("Error: Wrong setting of confparams_cpr->szMode in the uint64_t compression.\n"); + status = SZ_MERR; //mode error + } + } + + return status; +} diff --git a/deps/SZ/sz/src/sz_uint8.c b/deps/SZ/sz/src/sz_uint8.c new file mode 100644 index 0000000000000000000000000000000000000000..6865564dd9e8304de3bf973227541775e13b80ea --- /dev/null +++ b/deps/SZ/sz/src/sz_uint8.c @@ -0,0 +1,1385 @@ +/** + * @file sz_uint8.c + * @author Sheng Di + * @date Aug, 2017 + * @brief sz_uint8, Compression and Decompression functions + * (C) 2017 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + + +#include +#include +#include +#include +#include +#include "sz.h" +#include "CompressElement.h" +#include "DynamicByteArray.h" +#include "DynamicIntArray.h" +#include "zlib.h" +#include "rw.h" +#include "TightDataPointStorageI.h" +#include "sz_uint8.h" +#include "utility.h" + +unsigned int optimize_intervals_uint8_1D(uint8_t *oriData, size_t dataLength, double realPrecision) +{ + size_t i = 0, radiusIndex; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance; + for(i=2;isampleDistance==0) + { + //pred_value = 2*oriData[i-1] - oriData[i-2]; + pred_value = oriData[i-1]; + pred_err = llabs(pred_value - oriData[i]); + radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2); + return powerOf2; +} + +unsigned int optimize_intervals_uint8_2D(uint8_t *oriData, size_t r1, size_t r2, double realPrecision) +{ + size_t i,j, index; + size_t radiusIndex; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r2+j; + pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1]; + pred_err = llabs(pred_value - oriData[index]); + radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2); + return powerOf2; +} + +unsigned int optimize_intervals_uint8_3D(uint8_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision) +{ + size_t i,j,k, index; + size_t radiusIndex; + size_t r23=r2*r3; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)*(r3-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r23+j*r3+k; + pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] + - oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1]; + pred_err = llabs(pred_value - oriData[index]); + radiusIndex = (pred_err/realPrecision+1)/2; + if(radiusIndex>=confparams_cpr->maxRangeRadius) + { + radiusIndex = confparams_cpr->maxRangeRadius - 1; + //printf("radiusIndex=%d\n", radiusIndex); + } + intervals[radiusIndex]++; + } + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + //printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2); + return powerOf2; +} + + +unsigned int optimize_intervals_uint8_4D(uint8_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision) +{ + size_t i,j,k,l, index; + size_t radiusIndex; + size_t r234=r2*r3*r4; + size_t r34=r3*r4; + int64_t pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); + size_t totalSampleSize = (r1-1)*(r2-1)*(r3-1)*(r4-1)/confparams_cpr->sampleDistance; + for(i=1;isampleDistance==0) + { + index = i*r234+j*r34+k*r4+l; + pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r34] + - oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1]; + pred_err = llabs(pred_value - oriData[index]); + radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2); + if(radiusIndex>=confparams_cpr->maxRangeRadius) + radiusIndex = confparams_cpr->maxRangeRadius - 1; + intervals[radiusIndex]++; + } + } + } + } + } + //compute the appropriate number + size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; + size_t sum = 0; + for(i=0;imaxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=confparams_cpr->maxRangeRadius) + i = confparams_cpr->maxRangeRadius-1; + + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + free(intervals); + return powerOf2; +} + +TightDataPointStorageI* SZ_compress_uint8_1D_MDQ(uint8_t *oriData, size_t dataLength, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + quantization_intervals = optimize_intervals_uint8_1D(oriData, dataLength, realPrecision); + else + quantization_intervals = exe_params->intvCapacity; + updateQuantizationInfo(quantization_intervals); + size_t i; + + int* type = (int*) malloc(dataLength*sizeof(int)); + + uint8_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + int64_t last3CmprsData[3] = {0,0,0}; + + //add the first data + type[0] = 0; + compressUInt8Value(spaceFillingValue[0], minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + listAdd_int(last3CmprsData, spaceFillingValue[0]); + + type[1] = 0; + compressUInt8Value(spaceFillingValue[1], minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + listAdd_int(last3CmprsData, spaceFillingValue[1]); + //printf("%.30G\n",last3CmprsData[0]); + + int state; + double checkRadius = (exe_params->intvCapacity-1)*realPrecision; + int64_t curData; + int64_t pred, predAbsErr; + double interval = 2*realPrecision; + + for(i=2;i=pred) + { + type[i] = exe_params->intvRadius+state; + pred = pred + state*interval; + } + else //curDataintvRadius-state; + pred = pred - state*interval; + } + if(pred>SZ_UINT8_MAX) pred = SZ_UINT8_MAX; + if(predsize / byteSize; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_UINT8); + +//sdi:Debug +/* int sum =0; + for(i=0;iarray has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +void SZ_compress_args_uint8_StoreOriData(uint8_t* oriData, size_t dataLength, TightDataPointStorageI* tdps, +unsigned char** newByteData, size_t *outSize) +{ + int intSize=sizeof(uint8_t); + size_t k = 0, i; + tdps->isLossless = 1; + size_t totalByteLength = 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + intSize*dataLength; + *newByteData = (unsigned char*)malloc(totalByteLength); + + unsigned char dsLengthBytes[8]; + for (i = 0; i < 3; i++)//3 + (*newByteData)[k++] = versionNumber[i]; + + if(exe_params->SZ_SIZE_TYPE==4)//1 + (*newByteData)[k++] = 16; //00010000 + else + (*newByteData)[k++] = 80; //01010000: 01000000 indicates the SZ_SIZE_TYPE=8 + + convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k])); + k = k + MetaDataByteLength; + + sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8 + for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) + (*newByteData)[k++] = dsLengthBytes[i]; + + if(sysEndianType==BIG_ENDIAN_SYSTEM) + memcpy((*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, oriData, dataLength*intSize); + else + { + unsigned char* p = (*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE; + for(i=0;i dataLength*sizeof(uint8_t)) + SZ_compress_args_uint8_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize); + free_TightDataPointStorageI(tdps); +} + +TightDataPointStorageI* SZ_compress_uint8_2D_MDQ(uint8_t *oriData, size_t r1, size_t r2, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_uint8_2D(oriData, r1, r2, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i,j; + int64_t pred1D, pred2D, curValue, tmp; + int diff = 0.0; + double itvNum = 0; + uint8_t *P0, *P1; + + size_t dataLength = r1*r2; + + P0 = (uint8_t*)malloc(r2*sizeof(uint8_t)); + memset(P0, 0, r2*sizeof(uint8_t)); + P1 = (uint8_t*)malloc(r2*sizeof(uint8_t)); + memset(P1, 0, r2*sizeof(uint8_t)); + + int* type = (int*) malloc(dataLength*sizeof(int)); + //type[dataLength]=0; + + uint8_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + type[0] = 0; + curValue = P1[0] = spaceFillingValue[0]; + compressUInt8Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + diff = spaceFillingValue[1] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp data r2-1 */ + for (j = 2; j < r2; j++) + { + pred1D = 2*P1[j-1] - P1[j-2]; + diff = spaceFillingValue[j] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp Row-r1-1 */ + size_t index; + for (i = 1; i < r1; i++) + { + /* Process row-i data 0 */ + index = i*r2; + pred1D = P1[0]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp r2-1*/ + for (j = 1; j < r2; j++) + { + index = i*r2+j; + pred2D = P0[j-1] + P1[j] - P1[j-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmpsize; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_UINT8); + + //free memory + free(type); + free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +/** + * + * Note: @r1 is high dimension + * @r2 is low dimension + * */ +void SZ_compress_args_uint8_NoCkRngeNoGzip_2D(unsigned char** newByteData, uint8_t *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize, +int64_t valueRangeSize, uint8_t minValue) +{ + TightDataPointStorageI* tdps = SZ_compress_uint8_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, minValue); + + convertTDPStoFlatBytes_int(tdps, newByteData, outSize); + + size_t dataLength = r1*r2; + if(*outSize>dataLength*sizeof(uint8_t)) + SZ_compress_args_uint8_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); + + free_TightDataPointStorageI(tdps); +} + +TightDataPointStorageI* SZ_compress_uint8_3D_MDQ(uint8_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_uint8_3D(oriData, r1, r2, r3, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i,j,k; + int64_t pred1D, pred2D, pred3D, curValue, tmp; + int diff = 0.0; + double itvNum = 0; + uint8_t *P0, *P1; + + size_t dataLength = r1*r2*r3; + + size_t r23 = r2*r3; + P0 = (uint8_t*)malloc(r23*sizeof(uint8_t)); + P1 = (uint8_t*)malloc(r23*sizeof(uint8_t)); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + uint8_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + type[0] = 0; + P1[0] = spaceFillingValue[0]; + compressUInt8Value(spaceFillingValue[0], minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + diff = spaceFillingValue[1] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp data r3-1 */ + for (j = 2; j < r3; j++) + { + pred1D = 2*P1[j-1] - P1[j-2]; + diff = spaceFillingValue[j] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp Row-r2-1 */ + size_t index; + for (i = 1; i < r2; i++) + { + /* Process row-i data 0 */ + index = i*r3; + pred1D = P1[index-r3]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp data r3-1*/ + for (j = 1; j < r3; j++) + { + index = i*r3+j; + pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp layer-r1-1 /////////////////////////// + + for (k = 1; k < r1; k++) + { + /* Process Row-0 data 0*/ + index = k*r23; + pred1D = P1[0]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp data r3-1 */ + for (j = 1; j < r3; j++) + { + //index = k*r2*r3+j; + index ++; + pred2D = P0[j-1] + P1[j] - P1[j-1]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp Row-r2-1 */ + size_t index2D; + for (i = 1; i < r2; i++) + { + /* Process Row-i data 0 */ + index = k*r23 + i*r3; + index2D = i*r3; + pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp data r3-1 */ + for (j = 1; j < r3; j++) + { +// if(k==63&&i==43&&j==27) +// printf("i=%d\n", i); + //index = k*r2*r3 + i*r3 + j; + index ++; + index2D = i*r3 + j; + pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1]; + diff = spaceFillingValue[index] - pred3D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmpsize; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_UINT8); + + //free memory + free(type); + free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + + +void SZ_compress_args_uint8_NoCkRngeNoGzip_3D(unsigned char** newByteData, uint8_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, +int64_t valueRangeSize, int64_t minValue) +{ + TightDataPointStorageI* tdps = SZ_compress_uint8_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, minValue); + + convertTDPStoFlatBytes_int(tdps, newByteData, outSize); + + size_t dataLength = r1*r2*r3; + if(*outSize>dataLength*sizeof(uint8_t)) + SZ_compress_args_uint8_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); + + free_TightDataPointStorageI(tdps); +} + + +TightDataPointStorageI* SZ_compress_uint8_4D_MDQ(uint8_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, int64_t valueRangeSize, int64_t minValue) +{ + unsigned char bytes[8] = {0,0,0,0,0,0,0,0}; + int byteSize = computeByteSizePerIntValue(valueRangeSize); + + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_uint8_4D(oriData, r1, r2, r3, r4, realPrecision); + updateQuantizationInfo(quantization_intervals); + } + else + quantization_intervals = exe_params->intvCapacity; + size_t i,j,k; + int64_t pred1D, pred2D, pred3D, curValue, tmp; + int diff = 0.0; + double itvNum = 0; + uint8_t *P0, *P1; + + size_t dataLength = r1*r2*r3*r4; + + size_t r234 = r2*r3*r4; + size_t r34 = r3*r4; + + P0 = (uint8_t*)malloc(r34*sizeof(uint8_t)); + P1 = (uint8_t*)malloc(r34*sizeof(uint8_t)); + + int* type = (int*) malloc(dataLength*sizeof(int)); + + uint8_t* spaceFillingValue = oriData; // + + DynamicByteArray *exactDataByteArray; + new_DBA(&exactDataByteArray, DynArrayInitLen); + + size_t l; + for (l = 0; l < r1; l++) + { + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + size_t index = l*r234; + size_t index2D = 0; + + type[index] = 0; + curValue = P1[index2D] = spaceFillingValue[index]; + compressUInt8Value(curValue, minValue, byteSize, bytes); + memcpyDBA_Data(exactDataByteArray, bytes, byteSize); + + /* Process Row-0 data 1*/ + index = l*r234+1; + index2D = 1; + + pred1D = P1[index2D-1]; + diff = curValue - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp data r4-1 */ + for (j = 2; j < r4; j++) + { + index = l*r234+j; + index2D = j; + + pred1D = 2*P1[index2D-1] - P1[index2D-2]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp Row-r3-1 */ + for (i = 1; i < r3; i++) + { + /* Process row-i data 0 */ + index = l*r234+i*r4; + index2D = i*r4; + + pred1D = P1[index2D-r4]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp data r4-1*/ + for (j = 1; j < r4; j++) + { + index = l*r234+i*r4+j; + index2D = i*r4+j; + + pred2D = P1[index2D-1] + P1[index2D-r4] - P1[index2D-r4-1]; + + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp layer-r2-1 /////////////////////////// + + for (k = 1; k < r2; k++) + { + /* Process Row-0 data 0*/ + index = l*r234+k*r34; + index2D = 0; + + pred1D = P1[index2D]; + diff = spaceFillingValue[index] - pred1D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp data r4-1 */ + for (j = 1; j < r4; j++) + { + index = l*r234+k*r34+j; + index2D = j; + + pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp Row-r3-1 */ + for (i = 1; i < r3; i++) + { + /* Process Row-i data 0 */ + index = l*r234+k*r34+i*r4; + index2D = i*r4; + + pred2D = P0[index2D-r4] + P1[index2D] - P1[index2D-r4]; + diff = spaceFillingValue[index] - pred2D; + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp data r4-1 */ + for (j = 1; j < r4; j++) + { + index = l*r234+k*r34+i*r4+j; + index2D = i*r4+j; + + pred3D = P0[index2D-1] + P0[index2D-r4]+ P1[index2D] - P0[index2D-r4-1] - P1[index2D-r4] - P1[index2D-1] + P1[index2D-r4-1]; + diff = spaceFillingValue[index] - pred3D; + + + itvNum = llabs(diff)/realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + tmp = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmpsize; + + TightDataPointStorageI* tdps; + + new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, + type, exactDataByteArray->array, exactDataByteArray->size, + realPrecision, minValue, quantization_intervals, SZ_UINT8); + + //free memory + free(type); + free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps); + + return tdps; +} + +void SZ_compress_args_uint8_NoCkRngeNoGzip_4D(unsigned char** newByteData, uint8_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, +size_t *outSize, int64_t valueRangeSize, int64_t minValue) +{ + TightDataPointStorageI* tdps = SZ_compress_uint8_4D_MDQ(oriData, r1, r2, r3, r4, realPrecision, valueRangeSize, minValue); + + convertTDPStoFlatBytes_int(tdps, newByteData, outSize); + + size_t dataLength = r1*r2*r3*r4; + if(*outSize>dataLength*sizeof(uint8_t)) + SZ_compress_args_uint8_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); + + free_TightDataPointStorageI(tdps); +} + +void SZ_compress_args_uint8_withinRange(unsigned char** newByteData, uint8_t *oriData, size_t dataLength, size_t *outSize) +{ + TightDataPointStorageI* tdps = (TightDataPointStorageI*) malloc(sizeof(TightDataPointStorageI)); + tdps->typeArray = NULL; + + tdps->allSameData = 1; + tdps->dataSeriesLength = dataLength; + tdps->exactDataBytes = (unsigned char*)malloc(sizeof(unsigned char)); + tdps->isLossless = 0; + //tdps->exactByteSize = 4; + tdps->exactDataNum = 1; + tdps->exactDataBytes_size = 1; + + uint8_t value = oriData[0]; + //intToBytes_bigEndian(tdps->exactDataBytes, value); + memcpy(tdps->exactDataBytes, &value, 1); + + size_t tmpOutSize; + convertTDPStoFlatBytes_int(tdps, newByteData, &tmpOutSize); + + *outSize = tmpOutSize;//3+1+sizeof(uint8_t)+SZ_SIZE_TYPE; //8==3+1+4(uint8_size) + free_TightDataPointStorageI(tdps); +} + +int SZ_compress_args_uint8_wRngeNoGzip(unsigned char** newByteData, uint8_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio) +{ + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + int64_t valueRangeSize = 0; + + uint8_t minValue = computeRangeSize_int(oriData, SZ_UINT8, dataLength, &valueRangeSize); + double realPrecision = getRealPrecision_int(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status); + + if(valueRangeSize <= realPrecision) + { + SZ_compress_args_uint8_withinRange(newByteData, oriData, dataLength, outSize); + } + else + { +// SZ_compress_args_uint8_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize); + if(r5==0&&r4==0&&r3==0&&r2==0) + { + SZ_compress_args_uint8_NoCkRngeNoGzip_1D(newByteData, oriData, r1, realPrecision, outSize, valueRangeSize, minValue); + } + else if(r5==0&&r4==0&&r3==0) + { + SZ_compress_args_uint8_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize, valueRangeSize, minValue); + } + else if(r5==0&&r4==0) + { + SZ_compress_args_uint8_NoCkRngeNoGzip_3D(newByteData, oriData, r3, r2, r1, realPrecision, outSize, valueRangeSize, minValue); + } + else if(r5==0) + { + SZ_compress_args_uint8_NoCkRngeNoGzip_3D(newByteData, oriData, r4*r3, r2, r1, realPrecision, outSize, valueRangeSize, minValue); + } + } + return status; +} + +int SZ_compress_args_uint8(unsigned char** newByteData, uint8_t *oriData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, +int errBoundMode, double absErr_Bound, double relBoundRatio) +{ + confparams_cpr->errorBoundMode = errBoundMode; + + if(errBoundMode>=PW_REL) + { + printf("Error: Current SZ version doesn't support integer data compression with point-wise relative error bound being based on pwrType=AVG\n"); + exit(0); + return SZ_NSCS; + } + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + int64_t valueRangeSize = 0; + + uint8_t minValue = (uint8_t)computeRangeSize_int(oriData, SZ_UINT8, dataLength, &valueRangeSize); + double realPrecision = 0; + + if(confparams_cpr->errorBoundMode==PSNR) + { + confparams_cpr->errorBoundMode = ABS; + realPrecision = confparams_cpr->absErrBound = computeABSErrBoundFromPSNR(confparams_cpr->psnr, (double)confparams_cpr->predThreshold, (double)valueRangeSize); + //printf("realPrecision=%lf\n", realPrecision); + } + else + realPrecision = getRealPrecision_int(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status); + + if(valueRangeSize <= realPrecision) + { + SZ_compress_args_uint8_withinRange(newByteData, oriData, dataLength, outSize); + } + else + { + size_t tmpOutSize = 0; + unsigned char* tmpByteData; + if (r2==0) + { + SZ_compress_args_uint8_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + if (r3==0) + { + SZ_compress_args_uint8_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + if (r4==0) + { + SZ_compress_args_uint8_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + if (r5==0) + { + SZ_compress_args_uint8_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, minValue); + } + else + { + printf("Error: doesn't support 5 dimensions for now.\n"); + status = SZ_DERR; //dimension error + } + //Call Gzip to do the further compression. + if(confparams_cpr->szMode==SZ_BEST_SPEED) + { + *outSize = tmpOutSize; + *newByteData = tmpByteData; + } + else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) + { + *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); + free(tmpByteData); + } + else + { + printf("Error: Wrong setting of confparams_cpr->szMode in the uint8_t compression.\n"); + status = SZ_MERR; //mode error + } + } + + return status; +} diff --git a/deps/SZ/sz/src/szd_double.c b/deps/SZ/sz/src/szd_double.c new file mode 100644 index 0000000000000000000000000000000000000000..cd3cd1c0d96cb5fea76857bfecf621d86dfbe896 --- /dev/null +++ b/deps/SZ/sz/src/szd_double.c @@ -0,0 +1,5699 @@ +/** + * @file szd_double.c + * @author Sheng Di, Dingwen Tao, Xin Liang, Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang + * @date Aug, 2016 + * @brief + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include "szd_double.h" +#include "TightDataPointStorageD.h" +#include "sz.h" +#include "Huffman.h" +#include "szd_double_pwr.h" +#include "szd_double_ts.h" +#include "utility.h" + +int SZ_decompress_args_double(double** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, +size_t cmpSize, int compressionType, double* hist_data) +{ + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + + //unsigned char* tmpBytes; + size_t targetUncompressSize = dataLength <<3; //i.e., *8 + //tmpSize must be "much" smaller than dataLength + size_t i, tmpSize = 12+MetaDataByteLength_double+exe_params->SZ_SIZE_TYPE; + unsigned char* szTmpBytes; + if(cmpSize!=12+4+MetaDataByteLength_double && cmpSize!=12+8+MetaDataByteLength_double) + { + confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); + if(confparams_dec->szMode!=SZ_TEMPORAL_COMPRESSION) + { + if(confparams_dec->losslessCompressor!=-1) + confparams_dec->szMode = SZ_BEST_COMPRESSION; + else + confparams_dec->szMode = SZ_BEST_SPEED; + } + if(confparams_dec->szMode==SZ_BEST_SPEED) + { + tmpSize = cmpSize; + szTmpBytes = cmpBytes; + } + else if(confparams_dec->szMode==SZ_BEST_COMPRESSION || confparams_dec->szMode==SZ_DEFAULT_COMPRESSION || confparams_dec->szMode==SZ_TEMPORAL_COMPRESSION) + { + if(targetUncompressSizelosslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength_double+exe_params->SZ_SIZE_TYPE); + //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); + //memcpy(szTmpBytes, tmpBytes, tmpSize); + //free(tmpBytes); //release useless memory + } + else + { + printf("Wrong value of confparams_dec->szMode in the double compressed bytes.\n"); + status = SZ_MERR; + return status; + } + } + else + szTmpBytes = cmpBytes; + + confparams_dec->sol_ID = szTmpBytes[4+14]; //szTmpBytes: version(3bytes), samebyte(1byte), [14]:sol_ID=SZ or SZ_Transpose + //TODO: convert szTmpBytes to double array. + TightDataPointStorageD* tdps; + int errBoundMode = new_TightDataPointStorageD_fromFlatBytes(&tdps, szTmpBytes, tmpSize); + + int dim = computeDimension(r5,r4,r3,r2,r1); + int doubleSize = sizeof(double); + if(tdps->isLossless) + { + *newData = (double*)malloc(doubleSize*dataLength); + if(sysEndianType==BIG_ENDIAN_SYSTEM) + { + memcpy(*newData, szTmpBytes+4+MetaDataByteLength_double+exe_params->SZ_SIZE_TYPE, dataLength*doubleSize); + } + else + { + unsigned char* p = szTmpBytes+4+MetaDataByteLength_double+exe_params->SZ_SIZE_TYPE; + for(i=0;isol_ID==SZ_Transpose) + { + getSnapshotData_double_1D(newData,dataLength,tdps, errBoundMode, 0, hist_data); + } + else //confparams_dec->sol_ID==SZ + { + if(tdps->raBytes_size > 0) //v2.0 + { + if (dim == 1) + getSnapshotData_double_1D(newData,r1,tdps, errBoundMode, 0, hist_data); + else if(dim == 2) + decompressDataSeries_double_2D_nonblocked_with_blocked_regression(newData, r2, r1, tdps->raBytes, hist_data); + else if(dim == 3) + decompressDataSeries_double_3D_nonblocked_with_blocked_regression(newData, r3, r2, r1, tdps->raBytes, hist_data); + else if(dim == 4) + decompressDataSeries_double_3D_nonblocked_with_blocked_regression(newData, r4*r3, r2, r1, tdps->raBytes, hist_data); + else + { + printf("Error: currently support only at most 4 dimensions!\n"); + status = SZ_DERR; + } + } + else //1.4.13 or time-based compression + { + if (dim == 1) + getSnapshotData_double_1D(newData,r1,tdps, errBoundMode, compressionType, hist_data); + else + if (dim == 2) + getSnapshotData_double_2D(newData,r2,r1,tdps, errBoundMode, compressionType, hist_data); + else + if (dim == 3) + getSnapshotData_double_3D(newData,r3,r2,r1,tdps, errBoundMode, compressionType, hist_data); + else + if (dim == 4) + getSnapshotData_double_4D(newData,r4,r3,r2,r1,tdps, errBoundMode, compressionType, hist_data); + else + { + printf("Error: currently support only at most 4 dimensions!\n"); + status = SZ_DERR; + } + } + } + + if(confparams_dec->protectValueRange) + { + double* nd = *newData; + double min = confparams_dec->dmin; + double max = confparams_dec->dmax; + for(i=0;i= min) + continue; + if(v < min) + nd[i] = min; + else if(v > max) + nd[i] = max; + } + } + + free_TightDataPointStorageD2(tdps); + if(confparams_dec->szMode!=SZ_BEST_SPEED && cmpSize!=12+MetaDataByteLength_double+exe_params->SZ_SIZE_TYPE) + free(szTmpBytes); + return status; +} + +void decompressDataSeries_double_1D(double** data, size_t dataSeriesLength, double* hist_data, TightDataPointStorageD* tdps) +{ + //updateQuantizationInfo(tdps->intervals); + int intvRadius = tdps->intervals/2; + size_t i, j, k = 0, p = 0, l = 0; // k is to track the location of residual_bit + // in resiMidBits, p is to track the + // byte_index of resiMidBits, l is for + // leadNum + unsigned char* leadNum; + double interval = tdps->realPrecision*2; + + convertByteArray2IntArray_fast_2b(tdps->exactDataNum, tdps->leadNumArray, tdps->leadNumArray_size, &leadNum); + *data = (double*)malloc(sizeof(double)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + unsigned char preBytes[8]; + unsigned char curBytes[8]; + + memset(preBytes, 0, 8); + + size_t curByteIndex = 0; + int reqBytesLength, resiBitsLength, resiBits; + unsigned char leadingNum; + double medianValue, exactData, predValue; + + reqBytesLength = tdps->reqLength/8; + resiBitsLength = tdps->reqLength%8; + medianValue = tdps->medianValue; + + int type_; + for (i = 0; i < dataSeriesLength; i++) { + type_ = type[i]; + switch (type_) { + case 0: + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[i] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + break; + default: + //predValue = 2 * (*data)[i-1] - (*data)[i-2]; + predValue = (*data)[i-1]; + (*data)[i] = predValue + (type_-intvRadius)*interval; + break; + } + //printf("%.30G\n",(*data)[i]); + } + +#ifdef HAVE_TIMECMPR + if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) + memcpy(hist_data, (*data), dataSeriesLength*sizeof(double)); +#endif + + free(leadNum); + free(type); + return; +} + +void decompressDataSeries_double_2D(double** data, size_t r1, size_t r2, double* hist_data, TightDataPointStorageD* tdps) +{ + //updateQuantizationInfo(tdps->intervals); + int intvRadius = tdps->intervals/2; + //printf("tdps->intervals=%d, exe_params->intvRadius=%d\n", tdps->intervals, exe_params->intvRadius); + + size_t j, k = 0, p = 0, l = 0; // k is to track the location of residual_bit + // in resiMidBits, p is to track the + // byte_index of resiMidBits, l is for + // leadNum + size_t dataSeriesLength = r1*r2; + // printf ("%d %d\n", r1, r2); + + unsigned char* leadNum; + double realPrecision = tdps->realPrecision; + + convertByteArray2IntArray_fast_2b(tdps->exactDataNum, tdps->leadNumArray, tdps->leadNumArray_size, &leadNum); + + *data = (double*)malloc(sizeof(double)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + unsigned char preBytes[8]; + unsigned char curBytes[8]; + + memset(preBytes, 0, 8); + + size_t curByteIndex = 0; + int reqBytesLength, resiBitsLength, resiBits; + unsigned char leadingNum; + double medianValue, exactData; + int type_; + + reqBytesLength = tdps->reqLength/8; + resiBitsLength = tdps->reqLength%8; + medianValue = tdps->medianValue; + + double pred1D, pred2D; + size_t ii, jj; + + /* Process Row-0, data 0 */ + + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[0] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + + /* Process Row-0, data 1 */ + type_ = type[1]; + if (type_ != 0) + { + pred1D = (*data)[0]; + (*data)[1] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[1] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + + /* Process Row-0, data 2 --> data r2-1 */ + for (jj = 2; jj < r2; jj++) + { + type_ = type[jj]; + if (type_ != 0) + { + pred1D = 2*(*data)[jj-1] - (*data)[jj-2]; + (*data)[jj] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[jj] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + } + + size_t index; + /* Process Row-1 --> Row-r1-1 */ + for (ii = 1; ii < r1; ii++) + { + /* Process row-ii data 0 */ + index = ii*r2; + + type_ = type[index]; + if (type_ != 0) + { + pred1D = (*data)[index-r2]; + (*data)[index] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + + /* Process row-ii data 1 --> r2-1*/ + for (jj = 1; jj < r2; jj++) + { + index = ii*r2+jj; + pred2D = (*data)[index-1] + (*data)[index-r2] - (*data)[index-r2-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + } + } + +#ifdef HAVE_TIMECMPR + if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) + memcpy(hist_data, (*data), dataSeriesLength*sizeof(double)); +#endif + + free(leadNum); + free(type); + return; +} + +void decompressDataSeries_double_3D(double** data, size_t r1, size_t r2, size_t r3, double* hist_data, TightDataPointStorageD* tdps) +{ + //updateQuantizationInfo(tdps->intervals); + int intvRadius = tdps->intervals/2; + size_t j, k = 0, p = 0, l = 0; // k is to track the location of residual_bit + // in resiMidBits, p is to track the + // byte_index of resiMidBits, l is for + // leadNum + size_t dataSeriesLength = r1*r2*r3; + size_t r23 = r2*r3; +// printf ("%d %d %d\n", r1, r2, r3); + + unsigned char* leadNum; + double realPrecision = tdps->realPrecision; + + convertByteArray2IntArray_fast_2b(tdps->exactDataNum, tdps->leadNumArray, tdps->leadNumArray_size, &leadNum); + + *data = (double*)malloc(sizeof(double)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + unsigned char preBytes[8]; + unsigned char curBytes[8]; + + memset(preBytes, 0, 8); + + size_t curByteIndex = 0; + int reqBytesLength, resiBitsLength, resiBits; + unsigned char leadingNum; + double medianValue, exactData; + int type_; + + reqBytesLength = tdps->reqLength/8; + resiBitsLength = tdps->reqLength%8; + medianValue = tdps->medianValue; + + double pred1D, pred2D, pred3D; + size_t ii, jj, kk; + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[0] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + + /* Process Row-0, data 1 */ + pred1D = (*data)[0]; + + type_ = type[1]; + if (type_ != 0) + { + (*data)[1] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[1] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + + /* Process Row-0, data 2 --> data r3-1 */ + for (jj = 2; jj < r3; jj++) + { + pred1D = 2*(*data)[jj-1] - (*data)[jj-2]; + + type_ = type[jj]; + if (type_ != 0) + { + (*data)[jj] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[jj] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + } + + size_t index; + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process row-ii data 0 */ + index = ii*r3; + pred1D = (*data)[index-r3]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + + /* Process row-ii data 1 --> r3-1*/ + for (jj = 1; jj < r3; jj++) + { + index = ii*r3+jj; + pred2D = (*data)[index-1] + (*data)[index-r3] - (*data)[index-r3-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + } + } + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (kk = 1; kk < r1; kk++) + { + /* Process Row-0 data 0*/ + index = kk*r23; + pred1D = (*data)[index-r23]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + + /* Process Row-0 data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23+jj; + pred2D = (*data)[index-1] + (*data)[index-r23] - (*data)[index-r23-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + } + + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process Row-i data 0 */ + index = kk*r23 + ii*r3; + pred2D = (*data)[index-r3] + (*data)[index-r23] - (*data)[index-r23-r3]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + + /* Process Row-i data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23 + ii*r3 + jj; + pred3D = (*data)[index-1] + (*data)[index-r3] + (*data)[index-r23] + - (*data)[index-r3-1] - (*data)[index-r23-r3] - (*data)[index-r23-1] + (*data)[index-r23-r3-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred3D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + } + } + } + +#ifdef HAVE_TIMECMPR + if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) + memcpy(hist_data, (*data), dataSeriesLength*sizeof(double)); +#endif + + free(leadNum); + free(type); + return; +} + +void decompressDataSeries_double_4D(double** data, size_t r1, size_t r2, size_t r3, size_t r4, double* hist_data, TightDataPointStorageD* tdps) +{ + //updateQuantizationInfo(tdps->intervals); + int intvRadius = tdps->intervals/2; + size_t j, k = 0, p = 0, l = 0; // k is to track the location of residual_bit + // in resiMidBits, p is to track the + // byte_index of resiMidBits, l is for + // leadNum + size_t dataSeriesLength = r1*r2*r3*r4; + size_t r234 = r2*r3*r4; + size_t r34 = r3*r4; +// printf ("%d %d %d\n", r1, r2, r3, r4); + + unsigned char* leadNum; + double realPrecision = tdps->realPrecision; + + convertByteArray2IntArray_fast_2b(tdps->exactDataNum, tdps->leadNumArray, tdps->leadNumArray_size, &leadNum); + + *data = (double*)malloc(sizeof(double)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + unsigned char preBytes[8]; + unsigned char curBytes[8]; + + memset(preBytes, 0, 8); + + size_t curByteIndex = 0; + int reqBytesLength, resiBitsLength, resiBits; + unsigned char leadingNum; + double medianValue, exactData; + int type_; + + reqBytesLength = tdps->reqLength/8; + resiBitsLength = tdps->reqLength%8; + medianValue = tdps->medianValue; + + double pred1D, pred2D, pred3D; + size_t ii, jj, kk, ll; + size_t index; + + for (ll = 0; ll < r1; ll++) + { + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + index = ll*r234; + + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + + /* Process Row-0, data 1 */ + index = ll*r234+1; + + pred1D = (*data)[index-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + + /* Process Row-0, data 2 --> data r4-1 */ + for (jj = 2; jj < r4; jj++) + { + index = ll*r234+jj; + + pred1D = 2*(*data)[index-1] - (*data)[index-2]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (ii = 1; ii < r3; ii++) + { + /* Process row-ii data 0 */ + index = ll*r234+ii*r4; + + pred1D = (*data)[index-r4]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + + /* Process row-ii data 1 --> r4-1*/ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+ii*r4+jj; + + pred2D = (*data)[index-1] + (*data)[index-r4] - (*data)[index-r4-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + } + } + + /////////////////////////// Process layer-1 --> layer-r2-1 /////////////////////////// + + for (kk = 1; kk < r2; kk++) + { + /* Process Row-0 data 0*/ + index = ll*r234+kk*r34; + + pred1D = (*data)[index-r34]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + + /* Process Row-0 data 1 --> data r4-1 */ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+kk*r34+jj; + + pred2D = (*data)[index-1] + (*data)[index-r34] - (*data)[index-r34-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (ii = 1; ii < r3; ii++) + { + /* Process Row-i data 0 */ + index = ll*r234+kk*r34+ii*r4; + + pred2D = (*data)[index-r4] + (*data)[index-r34] - (*data)[index-r34-r4]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + + /* Process Row-i data 1 --> data r4-1 */ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+kk*r34+ii*r4+jj; + + pred3D = (*data)[index-1] + (*data)[index-r4] + (*data)[index-r34] + - (*data)[index-r4-1] - (*data)[index-r34-r4] - (*data)[index-r34-1] + (*data)[index-r34-r4-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred3D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + } + } + } + } + +//I didn't implement time-based compression for 4D actually. +//#ifdef HAVE_TIMECMPR +// if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) +// memcpy(multisteps->hist_data, (*data), dataSeriesLength*sizeof(double)); +//#endif + + free(leadNum); + free(type); + return; +} + +/*MSST19*/ +void decompressDataSeries_double_1D_MSST19(double** data, size_t dataSeriesLength, TightDataPointStorageD* tdps) +{ + //updateQuantizationInfo(tdps->intervals); + int intvRadius = tdps->intervals/2; + int intvCapacity = tdps->intervals; + size_t i, j, k = 0, p = 0, l = 0; // k is to track the location of residual_bit + // in resiMidBits, p is to track the + // byte_index of resiMidBits, l is for + // leadNum + unsigned char* leadNum; + //double interval = tdps->realPrecision*2; + + convertByteArray2IntArray_fast_2b(tdps->exactDataNum, tdps->leadNumArray, tdps->leadNumArray_size, &leadNum); + *data = (double*)malloc(sizeof(double)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree_MSST19(huffmanTree, tdps->typeArray, dataSeriesLength, type, tdps->max_bits); + //decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + unsigned char preBytes[8]; + unsigned char curBytes[8]; + + memset(preBytes, 0, 8); + + size_t curByteIndex = 0; + int reqBytesLength, resiBitsLength, resiBits; + unsigned char leadingNum; + double exactData, predValue = 0; + reqBytesLength = tdps->reqLength/8; + resiBitsLength = tdps->reqLength%8; + //float threshold = tdps->minLogValue; + double* precisionTable = (double*)malloc(sizeof(double) * intvCapacity); + double inv = 2.0-pow(2, -(tdps->plus_bits)); + for(int i=0; irealPrecision), inv*(i - intvRadius)); + precisionTable[i] = test; + } + + int type_; + for (i = 0; i < dataSeriesLength; i++) { + type_ = type[i]; + switch (type_) { + case 0: + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[i] = exactData; + memcpy(preBytes,curBytes,8); + predValue = (*data)[i]; + break; + default: + //predValue = 2 * (*data)[i-1] - (*data)[i-2]; + //predValue = (*data)[i-1]; + predValue = fabs(predValue) * precisionTable[type_]; + (*data)[i] = predValue; + break; + } + } + +#ifdef HAVE_TIMECMPR + if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) + memcpy(multisteps->hist_data, (*data), dataSeriesLength*sizeof(double)); +#endif + free(precisionTable); + free(leadNum); + free(type); + return; +} + +void decompressDataSeries_double_2D_MSST19(double** data, size_t r1, size_t r2, TightDataPointStorageD* tdps) +{ + //updateQuantizationInfo(tdps->intervals); + int intvRadius = tdps->intervals/2; + int intvCapacity = tdps->intervals; + + size_t j, k = 0, p = 0, l = 0; // k is to track the location of residual_bit + // in resiMidBits, p is to track the + // byte_index of resiMidBits, l is for + // leadNum + size_t dataSeriesLength = r1*r2; + + unsigned char* leadNum; + //double realPrecision = tdps->realPrecision; + + convertByteArray2IntArray_fast_2b(tdps->exactDataNum, tdps->leadNumArray, tdps->leadNumArray_size, &leadNum); + + *data = (double*)malloc(sizeof(double)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree_MSST19(huffmanTree, tdps->typeArray, dataSeriesLength, type, tdps->max_bits); + //decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + unsigned char preBytes[8]; + unsigned char curBytes[8]; + + memset(preBytes, 0, 8); + + size_t curByteIndex = 0; + int reqBytesLength, resiBitsLength, resiBits; + unsigned char leadingNum; + double exactData; + int type_; + + double* precisionTable = (double*)malloc(sizeof(double) * intvCapacity); + double inv = 2.0-pow(2, -(tdps->plus_bits)); + for(int i=0; irealPrecision), inv*(i - intvRadius)); + precisionTable[i] = test; + } + + reqBytesLength = tdps->reqLength/8; + resiBitsLength = tdps->reqLength%8; + //medianValue = tdps->medianValue; + + double pred1D, pred2D; + size_t ii, jj; + + /* Process Row-0, data 0 */ + + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[0] = exactData; + memcpy(preBytes,curBytes,8); + + /* Process Row-0, data 1 */ + type_ = type[1]; + if (type_ != 0) + { + pred1D = (*data)[0]; + (*data)[1] = fabs(pred1D) * precisionTable[type_]; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[1] = exactData; + memcpy(preBytes,curBytes,8); + } + + /* Process Row-0, data 2 --> data r2-1 */ + for (jj = 2; jj < r2; jj++) + { + type_ = type[jj]; + if (type_ != 0) + { + pred1D = (*data)[jj-1] * (*data)[jj-1] / (*data)[jj-2]; + (*data)[jj] = fabs(pred1D) * precisionTable[type_]; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[jj] = exactData; + memcpy(preBytes,curBytes,8); + } + } + + size_t index; + /* Process Row-1 --> Row-r1-1 */ + for (ii = 1; ii < r1; ii++) + { + /* Process row-ii data 0 */ + index = ii*r2; + + type_ = type[index]; + if (type_ != 0) + { + pred1D = (*data)[index-r2]; + (*data)[index] = fabs(pred1D) * precisionTable[type_]; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData; + memcpy(preBytes,curBytes,8); + } + + /* Process row-ii data 1 --> r2-1*/ + for (jj = 1; jj < r2; jj++) + { + index = ii*r2+jj; + pred2D = (*data)[index-1] * (*data)[index-r2] / (*data)[index-r2-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = fabs(pred2D) * precisionTable[type_]; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData; + memcpy(preBytes,curBytes,8); + } + } + } + +#ifdef HAVE_TIMECMPR + if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) + memcpy(multisteps->hist_data, (*data), dataSeriesLength*sizeof(double)); +#endif + + free(leadNum); + free(type); + return; +} + +void decompressDataSeries_double_3D_MSST19(double** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageD* tdps) +{ + //updateQuantizationInfo(tdps->intervals); + int intvRadius = tdps->intervals/2; + int intvCapacity = tdps->intervals; + size_t j, k = 0, p = 0, l = 0; // k is to track the location of residual_bit + // in resiMidBits, p is to track the + // byte_index of resiMidBits, l is for + // leadNum + size_t dataSeriesLength = r1*r2*r3; + size_t r23 = r2*r3; + unsigned char* leadNum; + //double realPrecision = tdps->realPrecision; + + convertByteArray2IntArray_fast_2b(tdps->exactDataNum, tdps->leadNumArray, tdps->leadNumArray_size, &leadNum); + + *data = (double*)malloc(sizeof(double)*dataSeriesLength); + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + double* precisionTable = (double*)malloc(sizeof(double) * intvCapacity); + double inv = 2.0-pow(2, -(tdps->plus_bits)); + for(int i=0; irealPrecision), inv*(i - intvRadius)); + precisionTable[i] = test; + } + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree_MSST19(huffmanTree, tdps->typeArray, dataSeriesLength, type, tdps->max_bits); + //decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + unsigned char preBytes[8]; + unsigned char curBytes[8]; + + memset(preBytes, 0, 8); + size_t curByteIndex = 0; + int reqBytesLength, resiBitsLength, resiBits; + unsigned char leadingNum; + double exactData; + int type_; + + reqBytesLength = tdps->reqLength/8; + resiBitsLength = tdps->reqLength%8; + + double pred1D, pred2D, pred3D; + double temp; + double temp2; + size_t ii, jj, kk; + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + exactData = bytesToDouble(curBytes); + (*data)[0] = exactData; + memcpy(preBytes,curBytes,8); + + /* Process Row-0, data 1 */ + pred1D = (*data)[0]; + + type_ = type[1]; + if (type_ != 0) + { + (*data)[1] = fabs(pred1D) * precisionTable[type_]; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[1] = exactData; + memcpy(preBytes,curBytes,8); + } + /* Process Row-0, data 2 --> data r3-1 */ + for (jj = 2; jj < r3; jj++) + { + temp = (*data)[jj-1]; + pred1D = temp * ( *data)[jj-1] / (*data)[jj-2]; + + type_ = type[jj]; + if (type_ != 0) + { + (*data)[jj] = fabs(pred1D) * precisionTable[type_]; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[jj] = exactData; + memcpy(preBytes,curBytes,8); + } + } + + size_t index; + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process row-ii data 0 */ + index = ii*r3; + pred1D = (*data)[index-r3]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = fabs(pred1D) * precisionTable[type_]; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData; + memcpy(preBytes,curBytes,8); + } + + /* Process row-ii data 1 --> r3-1*/ + for (jj = 1; jj < r3; jj++) + { + index = ii*r3+jj; + temp = (*data)[index-1]; + pred2D = temp * (*data)[index-r3] / (*data)[index-r3-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = fabs(pred2D) * precisionTable[type_]; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData; + memcpy(preBytes,curBytes,8); + } + } + } + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (kk = 1; kk < r1; kk++) + { + /* Process Row-0 data 0*/ + index = kk*r23; + pred1D = (*data)[index-r23]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = fabs(pred1D) * precisionTable[type_]; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData; + memcpy(preBytes,curBytes,8); + } + + /* Process Row-0 data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23+jj; + temp = (*data)[index-1]; + pred2D = temp * (*data)[index-r23] / (*data)[index-r23-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = fabs(pred2D) * precisionTable[type_]; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData; + memcpy(preBytes,curBytes,8); + } + } + + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process Row-i data 0 */ + index = kk*r23 + ii*r3; + temp = (*data)[index-r3]; + pred2D = temp * (*data)[index-r23] / (*data)[index-r23-r3]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = fabs(pred2D) * precisionTable[type_]; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData; + memcpy(preBytes,curBytes,8); + } + + /* Process Row-i data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23 + ii*r3 + jj; + //pred3D = (*data)[index-1] + (*data)[index-r3] + (*data)[index-r23] + // - (*data)[index-r3-1] - (*data)[index-r23-r3] - (*data)[index-r23-1] + (*data)[index-r23-r3-1]; + temp = (*data)[index-1]; + temp2 = (*data)[index-r3-1]; + pred3D = temp * (*data)[index-r3] * (*data)[index-r23] * (*data)[index-r23-r3-1] / (temp2 * (*data)[index-r23-r3] * (*data)[index-r23-1]); + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = fabs(pred3D) * precisionTable[type_]; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData; + memcpy(preBytes,curBytes,8); + } + } + } + } + +#ifdef HAVE_TIMECMPR + if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) + memcpy(multisteps->hist_data, (*data), dataSeriesLength*sizeof(double)); +#endif + + free(leadNum); + free(type); + return; +} + +void getSnapshotData_double_1D(double** data, size_t dataSeriesLength, TightDataPointStorageD* tdps, int errBoundMode, int compressionType, double* hist_data) +{ + size_t i; + if (tdps->allSameData) { + double value = bytesToDouble(tdps->exactMidBytes); + *data = (double*)malloc(sizeof(double)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + if (tdps->rtypeArray == NULL) { + if(errBoundMode < PW_REL) + { +#ifdef HAVE_TIMECMPR + if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) + { + if(multisteps->compressionType == 0) //snapshot + decompressDataSeries_double_1D(data, dataSeriesLength, hist_data, tdps); + else + decompressDataSeries_double_1D_ts(data, dataSeriesLength, hist_data, tdps); + } + else +#endif + decompressDataSeries_double_1D(data, dataSeriesLength, hist_data, tdps); + } + else + { + if(confparams_dec->accelerate_pw_rel_compression) + decompressDataSeries_double_1D_pwr_pre_log_MSST19(data, dataSeriesLength, tdps); + else + decompressDataSeries_double_1D_pwr_pre_log(data, dataSeriesLength, tdps); + //decompressDataSeries_double_1D_pwrgroup(data, dataSeriesLength, tdps); + } + return; + } else { + //TODO + } + } +} + +void getSnapshotData_double_2D(double** data, size_t r1, size_t r2, TightDataPointStorageD* tdps, int errBoundMode, int compressionType, double* hist_data) +{ + size_t i; + size_t dataSeriesLength = r1*r2; + if (tdps->allSameData) { + double value = bytesToDouble(tdps->exactMidBytes); + *data = (double*)malloc(sizeof(double)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + if (tdps->rtypeArray == NULL) { + if(errBoundMode < PW_REL) + { +#ifdef HAVE_TIMECMPR + if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) + { + if(compressionType == 0) //snapshot + decompressDataSeries_double_2D(data, r1, r2, hist_data, tdps); + else + decompressDataSeries_double_1D_ts(data, dataSeriesLength, hist_data, tdps); + } + else +#endif + decompressDataSeries_double_2D(data, r1, r2, hist_data, tdps); + } + else + //decompressDataSeries_double_2D_pwr(data, r1, r2, tdps); + if(confparams_dec->accelerate_pw_rel_compression) + decompressDataSeries_double_2D_pwr_pre_log_MSST19(data, r1, r2, tdps); + else + decompressDataSeries_double_2D_pwr_pre_log(data, r1, r2, tdps); + return; + } else { + //TODO + } + } +} + +void getSnapshotData_double_3D(double** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageD* tdps, int errBoundMode, int compressionType, double* hist_data) +{ + size_t i; + size_t dataSeriesLength = r1*r2*r3; + if (tdps->allSameData) { + double value = bytesToDouble(tdps->exactMidBytes); + *data = (double*)malloc(sizeof(double)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + if (tdps->rtypeArray == NULL) { + if(errBoundMode < PW_REL) + { +#ifdef HAVE_TIMECMPR + if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) + { + if(compressionType == 0) //snapshot + decompressDataSeries_double_3D(data, r1, r2, r3, hist_data, tdps); + else + decompressDataSeries_double_1D_ts(data, dataSeriesLength, hist_data, tdps); + } + else +#endif + decompressDataSeries_double_3D(data, r1, r2, r3, hist_data, tdps); + } + else + { + //decompressDataSeries_double_3D_pwr(data, r1, r2, r3, tdps); + if(confparams_dec->accelerate_pw_rel_compression) + decompressDataSeries_double_3D_pwr_pre_log_MSST19(data, r1, r2, r3, tdps); + else + decompressDataSeries_double_3D_pwr_pre_log(data, r1, r2, r3, tdps); + } + return; + } else { + //TODO + } + } +} + +void getSnapshotData_double_4D(double** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageD* tdps, int errBoundMode, int compressionType, double* hist_data) +{ + size_t i; + size_t dataSeriesLength = r1*r2*r3*r4; + if (tdps->allSameData) { + double value = bytesToDouble(tdps->exactMidBytes); + *data = (double*)malloc(sizeof(double)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + if (tdps->rtypeArray == NULL) { + if(errBoundMode < PW_REL) + { +#ifdef HAVE_TIMECMPR + if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) + { + if(multisteps->compressionType == 0) + decompressDataSeries_double_4D(data, r1, r2, r3, r4, hist_data, tdps); + else + decompressDataSeries_double_1D_ts(data, r1*r2*r3*r4, hist_data, tdps); + } + else +#endif + decompressDataSeries_double_4D(data, r1, r2, r3, r4, hist_data, tdps); + } + else + { + //decompressDataSeries_double_3D_pwr(data, r1*r2, r3, r4, tdps); + if(confparams_dec->accelerate_pw_rel_compression) + decompressDataSeries_double_3D_pwr_pre_log_MSST19(data, r1*r2, r3, r4, tdps); + else + decompressDataSeries_double_3D_pwr_pre_log(data, r1*r2, r3, r4, tdps); + } + return; + } else { + //TODO + } + } +} + +size_t decompressDataSeries_double_3D_RA_block(double * data, double mean, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, double realPrecision, int * type, double * unpredictable_data) +{ + int intvRadius = exe_params->intvRadius; + + size_t dim0_offset = dim_1 * dim_2; + size_t dim1_offset = dim_2; + + size_t unpredictable_count = 0; + size_t r1, r2, r3; + r1 = block_dim_0; + r2 = block_dim_1; + r3 = block_dim_2; + + double * cur_data_pos = data; + double * last_row_pos; + double pred1D, pred2D, pred3D; + size_t i, j, k; + size_t r23 = r2*r3; + int type_; + // Process Row-0 data 0 + pred1D = mean; + type_ = type[0]; + // printf("Type 0 %d, mean %.4f\n", type_, mean); + if (type_ != 0){ + cur_data_pos[0] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + cur_data_pos[0] = unpredictable_data[unpredictable_count ++]; + } + + /* Process Row-0 data 1*/ + pred1D = cur_data_pos[0]; + type_ = type[1]; + if (type_ != 0){ + cur_data_pos[1] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + cur_data_pos[1] = unpredictable_data[unpredictable_count ++]; + } + /* Process Row-0 data 2 --> data r3-1 */ + for (j = 2; j < r3; j++){ + pred1D = 2*cur_data_pos[j-1] - cur_data_pos[j-2]; + type_ = type[j]; + if (type_ != 0){ + cur_data_pos[j] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + cur_data_pos[j] = unpredictable_data[unpredictable_count ++]; + } + } + + last_row_pos = cur_data_pos; + cur_data_pos += dim1_offset; + + /* Process Row-1 --> Row-r2-1 */ + size_t index; + for (i = 1; i < r2; i++) + { + /* Process row-i data 0 */ + index = i*r3; + pred1D = last_row_pos[0]; + type_ = type[index]; + if (type_ != 0){ + cur_data_pos[0] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + cur_data_pos[0] = unpredictable_data[unpredictable_count ++]; + } + /* Process row-i data 1 --> data r3-1*/ + for (j = 1; j < r3; j++) + { + index = i*r3+j; + pred2D = cur_data_pos[j-1] + last_row_pos[j] - last_row_pos[j-1]; + type_ = type[index]; + if (type_ != 0){ + cur_data_pos[j] = pred2D + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + cur_data_pos[j] = unpredictable_data[unpredictable_count ++]; + } + // printf("pred2D %.2f cur_data %.2f last_row_data %.2f %.2f, result %.2f\n", pred2D, cur_data_pos[j-1], last_row_pos[j], last_row_pos[j-1], cur_data_pos[j]); + // getchar(); + } + last_row_pos = cur_data_pos; + cur_data_pos += dim1_offset; + } + cur_data_pos += dim0_offset - r2 * dim1_offset; + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (k = 1; k < r1; k++) + { + /* Process Row-0 data 0*/ + index = k*r23; + pred1D = cur_data_pos[- dim0_offset]; + type_ = type[index]; + if (type_ != 0){ + cur_data_pos[0] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + cur_data_pos[0] = unpredictable_data[unpredictable_count ++]; + } + /* Process Row-0 data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + //index = k*r2*r3+j; + index ++; + pred2D = cur_data_pos[j-1] + cur_data_pos[j - dim0_offset] - cur_data_pos[j - 1 - dim0_offset]; + type_ = type[index]; + if (type_ != 0){ + cur_data_pos[j] = pred2D + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + cur_data_pos[j] = unpredictable_data[unpredictable_count ++]; + } + } + last_row_pos = cur_data_pos; + cur_data_pos += dim1_offset; + + /* Process Row-1 --> Row-r2-1 */ + for (i = 1; i < r2; i++) + { + /* Process Row-i data 0 */ + index = k*r23 + i*r3; + pred2D = last_row_pos[0] + cur_data_pos[- dim0_offset] - last_row_pos[- dim0_offset]; + type_ = type[index]; + if (type_ != 0){ + cur_data_pos[0] = pred2D + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + cur_data_pos[0] = unpredictable_data[unpredictable_count ++]; + } + + /* Process Row-i data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + //index = k*r2*r3 + i*r3 + j; + index ++; + pred3D = cur_data_pos[j-1] + last_row_pos[j]+ cur_data_pos[j - dim0_offset] - last_row_pos[j-1] - last_row_pos[j - dim0_offset] - cur_data_pos[j-1 - dim0_offset] + last_row_pos[j-1 - dim0_offset]; + type_ = type[index]; + if (type_ != 0){ + cur_data_pos[j] = pred3D + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + cur_data_pos[j] = unpredictable_data[unpredictable_count ++]; + } + } + last_row_pos = cur_data_pos; + cur_data_pos += dim1_offset; + } + cur_data_pos += dim0_offset - r2 * dim1_offset; + } + + return unpredictable_count; +} + +void decompressDataSeries_double_2D_nonblocked_with_blocked_regression(double** data, size_t r1, size_t r2, unsigned char* comp_data, double* hist_data){ + + size_t dim0_offset = r2; + size_t num_elements = r1 * r2; + + *data = (double*)malloc(sizeof(double)*num_elements); + + unsigned char * comp_data_pos = comp_data; + + size_t block_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + // calculate block dims + size_t num_x, num_y; + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size); + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size); + + size_t split_index_x, split_index_y; + size_t early_blockcount_x, early_blockcount_y; + size_t late_blockcount_x, late_blockcount_y; + SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); + SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); + + size_t num_blocks = num_x * num_y; + + double realPrecision = bytesToDouble(comp_data_pos); + comp_data_pos += sizeof(double); + unsigned int intervals = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + //updateQuantizationInfo(intervals); + + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + int stateNum = 2*intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,comp_data_pos+sizeof(int), nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + double mean; + unsigned char use_mean; + memcpy(&use_mean, comp_data_pos, sizeof(unsigned char)); + comp_data_pos += sizeof(unsigned char); + memcpy(&mean, comp_data_pos, sizeof(double)); + comp_data_pos += sizeof(double); + size_t reg_count = 0; + + unsigned char * indicator; + size_t indicator_bitlength = (num_blocks - 1)/8 + 1; + convertByteArray2IntArray_fast_1b(num_blocks, comp_data_pos, indicator_bitlength, &indicator); + comp_data_pos += indicator_bitlength; + for(size_t i=0; i 0){ + for(int i=0; i<3; i++){ + precision[i] = bytesToDouble(comp_data_pos); + comp_data_pos += sizeof(double); + coeff_intvRadius[i] = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + int stateNum = 2*coeff_intvRadius[i]*2; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+sizeof(int), nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + coeff_type[i] = coeff_result_type + i * num_blocks; + size_t typeArray_size = bytesToSize(comp_data_pos); + decode(comp_data_pos + sizeof(size_t), reg_count, root, coeff_type[i]); + comp_data_pos += sizeof(size_t) + typeArray_size; + int coeff_unpred_count = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + coeff_unpred_data[i] = (double *) comp_data_pos; + comp_data_pos += coeff_unpred_count * sizeof(double); + SZ_ReleaseHuffman(huffmanTree); + } + } + double last_coefficients[3] = {0.0}; + int coeff_unpred_data_count[3] = {0}; + int coeff_index = 0; + //updateQuantizationInfo(intervals); + + size_t total_unpred; + memcpy(&total_unpred, comp_data_pos, sizeof(size_t)); + comp_data_pos += sizeof(size_t); + double * unpred_data = (double *) comp_data_pos; + comp_data_pos += total_unpred * sizeof(double); + + int * result_type = (int *) malloc(num_elements * sizeof(int)); + decode(comp_data_pos, num_elements, root, result_type); + SZ_ReleaseHuffman(huffmanTree); + + int intvRadius = intervals/2; + + int * type; + + double * data_pos = *data; + size_t offset_x, offset_y; + size_t current_blockcount_x, current_blockcount_y; + size_t cur_unpred_count; + + unsigned char * indicator_pos = indicator; + if(use_mean){ + type = result_type; + for(size_t i=0; iszMode == SZ_TEMPORAL_COMPRESSION) + memcpy(hist_data, (*data), num_elements*sizeof(double)); +#endif + + free(coeff_result_type); + + free(indicator); + free(result_type); +} + + +void decompressDataSeries_double_3D_nonblocked_with_blocked_regression(double** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data, double* hist_data){ + + size_t dim0_offset = r2 * r3; + size_t dim1_offset = r3; + size_t num_elements = r1 * r2 * r3; + + *data = (double*)malloc(sizeof(double)*num_elements); + + unsigned char * comp_data_pos = comp_data; + + size_t block_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + // calculate block dims + size_t num_x, num_y, num_z; + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size); + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size); + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r3, num_z, block_size); + + size_t split_index_x, split_index_y, split_index_z; + size_t early_blockcount_x, early_blockcount_y, early_blockcount_z; + size_t late_blockcount_x, late_blockcount_y, late_blockcount_z; + SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); + SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); + SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z); + + size_t num_blocks = num_x * num_y * num_z; + + double realPrecision = bytesToDouble(comp_data_pos); + comp_data_pos += sizeof(double); + unsigned int intervals = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + //updateQuantizationInfo(intervals); + + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + int stateNum = 2*intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,comp_data_pos+4, nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + double mean; + unsigned char use_mean; + memcpy(&use_mean, comp_data_pos, sizeof(unsigned char)); + comp_data_pos += sizeof(unsigned char); + memcpy(&mean, comp_data_pos, sizeof(double)); + comp_data_pos += sizeof(double); + size_t reg_count = 0; + + unsigned char * indicator; + size_t indicator_bitlength = (num_blocks - 1)/8 + 1; + convertByteArray2IntArray_fast_1b(num_blocks, comp_data_pos, indicator_bitlength, &indicator); + comp_data_pos += indicator_bitlength; + for(size_t i=0; i 0){ + for(int i=0; i<4; i++){ + precision[i] = bytesToDouble(comp_data_pos); + comp_data_pos += sizeof(double); + coeff_intvRadius[i] = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + int stateNum = 2*coeff_intvRadius[i]*2; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+4, nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + coeff_type[i] = coeff_result_type + i * num_blocks; + size_t typeArray_size = bytesToSize(comp_data_pos); + decode(comp_data_pos + sizeof(size_t), reg_count, root, coeff_type[i]); + comp_data_pos += sizeof(size_t) + typeArray_size; + int coeff_unpred_count = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + coeff_unpred_data[i] = (double *) comp_data_pos; + comp_data_pos += coeff_unpred_count * sizeof(double); + SZ_ReleaseHuffman(huffmanTree); + } + } + double last_coefficients[4] = {0.0}; + int coeff_unpred_data_count[4] = {0}; + int coeff_index = 0; + //updateQuantizationInfo(intervals); + + size_t total_unpred; + memcpy(&total_unpred, comp_data_pos, sizeof(size_t)); + comp_data_pos += sizeof(size_t); + double * unpred_data = (double *) comp_data_pos; + comp_data_pos += total_unpred * sizeof(double); + + int * result_type = (int *) malloc(num_elements * sizeof(int)); + decode(comp_data_pos, num_elements, root, result_type); + SZ_ReleaseHuffman(huffmanTree); + + int intvRadius = intervals/2; + + int * type; + double * data_pos = *data; + size_t offset_x, offset_y, offset_z; + size_t current_blockcount_x, current_blockcount_y, current_blockcount_z; + size_t cur_unpred_count; + unsigned char * indicator_pos = indicator; + if(use_mean){ + // type = result_type; + + // for(size_t i=0; i 10000){ + // printf("%d %d %d-%d: pred %.4f type %d precision %.4g last_coefficients %.4g\n", i, j, k, e, pred, type_, precision[e], last_coefficients[e]); + // exit(0); + // } + // } + // coeff_index ++; + // } + // { + // double * block_data_pos = data_pos; + // double pred; + // int type_; + // size_t index = 0; + // size_t unpredictable_count = 0; + // for(size_t ii=0; iiszMode == SZ_TEMPORAL_COMPRESSION) + memcpy(hist_data, (*data), num_elements*sizeof(double)); +#endif + + free(coeff_result_type); + + free(indicator); + free(result_type); +} diff --git a/deps/SZ/sz/src/szd_double_pwr.c b/deps/SZ/sz/src/szd_double_pwr.c new file mode 100644 index 0000000000000000000000000000000000000000..aa9bb96ab8196a03c5b3c02b134b0e448306f5ba --- /dev/null +++ b/deps/SZ/sz/src/szd_double_pwr.c @@ -0,0 +1,1530 @@ +/** + * @file szd_double_pwr.c + * @author Sheng Di, Dingwen Tao, Xin Liang, Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang + * @date Feb, 2019 + * @brief + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include +#include "TightDataPointStorageD.h" +#include "CompressElement.h" +#include "sz.h" +#include "Huffman.h" +#include "sz_double_pwr.h" +#include "utility.h" +//#include "rw.h" + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wchar-subscripts" + +void decompressDataSeries_double_1D_pwr(double** data, size_t dataSeriesLength, TightDataPointStorageD* tdps) +{ + updateQuantizationInfo(tdps->intervals); + unsigned char tmpPrecBytes[8] = {0}; //used when needing to convert bytes to double values + unsigned char* bp = tdps->pwrErrBoundBytes; + size_t i, j, k = 0, p = 0, l = 0; // k is to track the location of residual_bit + // in resiMidBits, p is to track the + // byte_index of resiMidBits, l is for + // leadNum + + unsigned char* leadNum; + double interval = 0;// = (double)tdps->realPrecision*2; + + convertByteArray2IntArray_fast_2b(tdps->exactDataNum, tdps->leadNumArray, tdps->leadNumArray_size, &leadNum); + *data = (double*)malloc(sizeof(double)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + unsigned char preBytes[8]; + unsigned char curBytes[8]; + + memset(preBytes, 0, 8); + + size_t curByteIndex = 0; + int reqLength = 0, reqBytesLength = 0, resiBitsLength = 0, resiBits = 0; + unsigned char leadingNum; + double medianValue, exactData, predValue = 0, realPrecision = 0; + + medianValue = tdps->medianValue; + + int type_, updateReqLength = 0; + for (i = 0; i < dataSeriesLength; i++) + { + if(i%tdps->segment_size==0) + { + tmpPrecBytes[0] = *(bp++); + tmpPrecBytes[1] = *(bp++); + memset(&tmpPrecBytes[2], 0, 6*sizeof(unsigned char)); + + realPrecision = bytesToDouble(tmpPrecBytes); + interval = realPrecision*2; + updateReqLength = 0; + } + type_ = type[i]; + switch (type_) { + case 0: + // compute resiBits + if(updateReqLength==0) + { + computeReqLength_double(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[i] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + break; + default: + //predValue = 2 * (*data)[i-1] - (*data)[i-2]; + predValue = (*data)[i-1]; + (*data)[i] = predValue + (type_-exe_params->intvRadius)*interval; + break; + } + //printf("%.30G\n",(*data)[i]); + } + free(leadNum); + free(type); + return; +} + +double* extractRealPrecision_2D_double(size_t R1, size_t R2, int blockSize, TightDataPointStorageD* tdps) +{ + size_t i,j,k=0, I; + unsigned char* bytes = tdps->pwrErrBoundBytes; + unsigned char tmpBytes[8] = {0}; + double* result = (double*)malloc(sizeof(double)*R1*R2); + for(i=0;iintervals); + //printf("tdps->intervals=%d, exe_params->intvRadius=%d\n", tdps->intervals, exe_params->intvRadius); + + size_t j, k = 0, p = 0, l = 0; // k is to track the location of residual_bit + // in resiMidBits, p is to track the + // byte_index of resiMidBits, l is for + // leadNum + size_t dataSeriesLength = r1*r2; + // printf ("%d %d\n", r1, r2); + + unsigned char* leadNum; + + convertByteArray2IntArray_fast_2b(tdps->exactDataNum, tdps->leadNumArray, tdps->leadNumArray_size, &leadNum); + + *data = (double*)malloc(sizeof(double)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + unsigned char preBytes[8]; + unsigned char curBytes[8]; + + memset(preBytes, 0, 8); + + size_t curByteIndex = 0; + int reqLength, reqBytesLength, resiBitsLength, resiBits; + unsigned char leadingNum; + double medianValue, exactData, realPrecision; + int type_; + double pred1D, pred2D; + size_t ii, jj, II = 0, JJ = 0, updateReqLength = 1; + + int blockSize = computeBlockEdgeSize_2D(tdps->segment_size); + size_t R1 = 1+(r1-1)/blockSize; + size_t R2 = 1+(r2-1)/blockSize; + double* pwrErrBound = extractRealPrecision_2D_double(R1, R2, blockSize, tdps); + + realPrecision = pwrErrBound[0]; + computeReqLength_double(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + + /* Process Row-0, data 0 */ + + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[0] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + + /* Process Row-0, data 1 */ + type_ = type[1]; + if (type_ != 0) + { + pred1D = (*data)[0]; + (*data)[1] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[1] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + + /* Process Row-0, data 2 --> data r2-1 */ + for (jj = 2; jj < r2; jj++) + { + if(jj%blockSize==0) + { + II = 0; + JJ++; + realPrecision = pwrErrBound[JJ]; + updateReqLength = 0; + } + + type_ = type[jj]; + if (type_ != 0) + { + pred1D = 2*(*data)[jj-1] - (*data)[jj-2]; + (*data)[jj] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + if(updateReqLength==0) + { + computeReqLength_double(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[jj] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + } + + size_t index; + /* Process Row-1 --> Row-r1-1 */ + for (ii = 1; ii < r1; ii++) + { + /* Process row-ii data 0 */ + if(ii%blockSize==0) + II++; + JJ = 0; + realPrecision = pwrErrBound[II*R2+JJ]; + updateReqLength = 0; + + index = ii*r2; + + type_ = type[index]; + if (type_ != 0) + { + pred1D = (*data)[index-r2]; + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // compute resiBits + if(updateReqLength==0) + { + computeReqLength_double(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + + /* Process row-ii data 1 --> r2-1*/ + for (jj = 1; jj < r2; jj++) + { + index = ii*r2+jj; + + if(jj%blockSize==0) + JJ++; + realPrecision = pwrErrBound[II*R2+JJ]; + updateReqLength = 0; + + type_ = type[index]; + if (type_ != 0) + { + pred2D = (*data)[index-1] + (*data)[index-r2] - (*data)[index-r2-1]; + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // compute resiBits + if(updateReqLength==0) + { + computeReqLength_double(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + } + } + + free(pwrErrBound); + free(leadNum); + free(type); + return; +} + +double* extractRealPrecision_3D_double(size_t R1, size_t R2, size_t R3, int blockSize, TightDataPointStorageD* tdps) +{ + size_t i,j,k=0, IR, JR, p = 0; + size_t R23 = R2*R3; + unsigned char* bytes = tdps->pwrErrBoundBytes; + unsigned char tmpBytes[4] = {0}; + double* result = (double*)malloc(sizeof(double)*R1*R2*R3); + for(i=0;iintervals); + size_t j, k = 0, p = 0, l = 0; // k is to track the location of residual_bit + // in resiMidBits, p is to track the + // byte_index of resiMidBits, l is for + // leadNum + size_t dataSeriesLength = r1*r2*r3; + size_t r23 = r2*r3; +// printf ("%d %d %d\n", r1, r2, r3); + + unsigned char* leadNum; + + convertByteArray2IntArray_fast_2b(tdps->exactDataNum, tdps->leadNumArray, tdps->leadNumArray_size, &leadNum); + + *data = (double*)malloc(sizeof(double)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + unsigned char preBytes[8]; + unsigned char curBytes[8]; + + memset(preBytes, 0, 8); + + size_t curByteIndex = 0; + int reqLength, reqBytesLength, resiBitsLength, resiBits; + unsigned char leadingNum; + double medianValue, exactData, realPrecision; + int type_; + double pred1D, pred2D, pred3D; + size_t ii, jj, kk, II = 0, JJ = 0, KK = 0, updateReqLength = 1; + + int blockSize = computeBlockEdgeSize_3D(tdps->segment_size); + size_t R1 = 1+(r1-1)/blockSize; + size_t R2 = 1+(r2-1)/blockSize; + size_t R3 = 1+(r3-1)/blockSize; + size_t R23 = R2*R3; + double* pwrErrBound = extractRealPrecision_3D_double(R1, R2, R3, blockSize, tdps); + + realPrecision = pwrErrBound[0]; + computeReqLength_double(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[0] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + + /* Process Row-0, data 1 */ + pred1D = (*data)[0]; + + type_ = type[1]; + if (type_ != 0) + { + (*data)[1] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[1] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + + /* Process Row-0, data 2 --> data r3-1 */ + for (jj = 2; jj < r3; jj++) + { + if(jj%blockSize==0) + { + KK = 0;//dimension 1 (top) + II = 0;//dimension 2 (mid) + JJ++; + realPrecision = pwrErrBound[JJ]; + updateReqLength = 0; + } + type_ = type[jj]; + if (type_ != 0) + { + pred1D = 2*(*data)[jj-1] - (*data)[jj-2]; + (*data)[jj] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // compute resiBits + if(updateReqLength==0) + { + computeReqLength_double(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[jj] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + } + + size_t index; + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process row-ii data 0 */ + if(ii%blockSize==0) + II++; + JJ = 0; + realPrecision = pwrErrBound[II*R3+JJ]; + updateReqLength = 0; + + index = ii*r3; + + type_ = type[index]; + if (type_ != 0) + { + pred1D = (*data)[index-r3]; + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // compute resiBits + if(updateReqLength==0) + { + computeReqLength_double(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + + /* Process row-ii data 1 --> r3-1*/ + for (jj = 1; jj < r3; jj++) + { + index = ii*r3+jj; + + if(jj%blockSize==0) + JJ++; + realPrecision = pwrErrBound[II*R3+JJ]; + updateReqLength = 0; + + type_ = type[index]; + if (type_ != 0) + { + pred2D = (*data)[index-1] + (*data)[index-r3] - (*data)[index-r3-1]; + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // compute resiBits + if(updateReqLength==0) + { + computeReqLength_double(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + } + } + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (kk = 1; kk < r1; kk++) + { + /* Process Row-0 data 0*/ + index = kk*r23; + if(kk%blockSize==0) + KK++; + II = 0; + JJ = 0; + + realPrecision = pwrErrBound[KK*R23]; + updateReqLength = 0; + + type_ = type[index]; + if (type_ != 0) + { + pred1D = (*data)[index-r23]; + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // compute resiBits + if(updateReqLength==0) + { + computeReqLength_double(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + + /* Process Row-0 data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23+jj; + + if(jj%blockSize==0) + JJ++; + + realPrecision = pwrErrBound[KK*R23+JJ]; + updateReqLength = 0; + + type_ = type[index]; + if (type_ != 0) + { + pred2D = (*data)[index-1] + (*data)[index-r23] - (*data)[index-r23-1]; + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // compute resiBits + if(updateReqLength==0) + { + computeReqLength_double(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + } + + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process Row-i data 0 */ + index = kk*r23 + ii*r3; + + if(ii%blockSize==0) + II++; + JJ = 0; + + realPrecision = pwrErrBound[KK*R23+II*R3]; + updateReqLength = 0; + + type_ = type[index]; + if (type_ != 0) + { + pred2D = (*data)[index-r3] + (*data)[index-r23] - (*data)[index-r23-r3]; + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // compute resiBits + if(updateReqLength==0) + { + computeReqLength_double(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + + /* Process Row-i data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23 + ii*r3 + jj; + if(jj%blockSize==0) + JJ++; + + realPrecision = pwrErrBound[KK*R23+II*R3+JJ]; + updateReqLength = 0; + + type_ = type[index]; + if (type_ != 0) + { + pred3D = (*data)[index-1] + (*data)[index-r3] + (*data)[index-r23] + - (*data)[index-r3-1] - (*data)[index-r23-r3] - (*data)[index-r23-1] + (*data)[index-r23-r3-1]; + (*data)[index] = pred3D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // compute resiBits + if(updateReqLength==0) + { + computeReqLength_double(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } + } + } + } + + free(pwrErrBound); + free(leadNum); + free(type); + return; +} + +void decompressDataSeries_double_1D_pwrgroup(double** data, size_t dataSeriesLength, TightDataPointStorageD* tdps) +{ + double *posGroups, *negGroups, *groups; + double pos_01_group, neg_01_group; + int *posFlags, *negFlags; + + updateQuantizationInfo(tdps->intervals); + + unsigned char* leadNum; + double interval;// = (double)tdps->realPrecision*2; + + convertByteArray2IntArray_fast_2b(tdps->exactDataNum, tdps->leadNumArray, tdps->leadNumArray_size, &leadNum); + + *data = (double*)malloc(sizeof(double)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + createRangeGroups_double(&posGroups, &negGroups, &posFlags, &negFlags); + + double realGroupPrecision; + double realPrecision = tdps->realPrecision; + char* groupID = decompressGroupIDArray(tdps->pwrErrBoundBytes, tdps->dataSeriesLength); + + //note that the groupID values here are [1,2,3,....,18] or [-1,-2,...,-18] + + double* groupErrorBounds = generateGroupErrBounds(confparams_dec->errorBoundMode, realPrecision, confparams_dec->pw_relBoundRatio); + exe_params->intvRadius = generateGroupMaxIntervalCount(groupErrorBounds); + + size_t nbBins = (size_t)(1/confparams_dec->pw_relBoundRatio + 0.5); + if(nbBins%2==1) + nbBins++; + exe_params->intvRadius = nbBins; + + + + unsigned char preBytes[8]; + unsigned char curBytes[8]; + + memset(preBytes, 0, 8); + + size_t curByteIndex = 0; + int reqLength, reqBytesLength = 0, resiBitsLength = 0, resiBits; + unsigned char leadingNum; + double medianValue, exactData, curValue, predValue; + + medianValue = tdps->medianValue; + + size_t i, j, k = 0, p = 0, l = 0; // k is to track the location of residual_bit + // in resiMidBits, p is to track the + // byte_index of resiMidBits, l is for + // leadNum + + int type_, updateReqLength = 0; + char rawGrpID = 0, indexGrpID = 0; + for (i = 0; i < dataSeriesLength; i++) + { + rawGrpID = groupID[i]; + + if(rawGrpID >= 2) + { + groups = posGroups; + indexGrpID = rawGrpID - 2; + } + else if(rawGrpID <= -2) + { + groups = negGroups; + indexGrpID = -rawGrpID - 2; } + else if(rawGrpID == 1) + { + groups = &pos_01_group; + indexGrpID = 0; + } + else //rawGrpID == -1 + { + groups = &neg_01_group; + indexGrpID = 0; + } + + type_ = type[i]; + switch (type_) { + case 0: + // compute resiBits + if(updateReqLength==0) + { + computeReqLength_double(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + exactData = exactData + medianValue; + (*data)[i] = exactData; + memcpy(preBytes,curBytes,8); + + groups[indexGrpID] = exactData; + + break; + default: + predValue = groups[indexGrpID]; //Here, groups[indexGrpID] is the previous value. + realGroupPrecision = groupErrorBounds[indexGrpID]; + interval = realGroupPrecision*2; + + curValue = predValue + (type_-exe_params->intvRadius)*interval; + + //groupNum = computeGroupNum_double(curValue); + + if((curValue>0&&rawGrpID<0)||(curValue<0&&rawGrpID>0)) + curValue = 0; + //else + //{ + // realGrpID = fabs(rawGrpID)-2; + // if(groupNum0?pow(2,realGrpID):-pow(2,realGrpID); + // else if(groupNum>realGrpID) + // curValue = rawGrpID>0?pow(2,groupNum):-pow(2,groupNum); + //} + + (*data)[i] = curValue; + groups[indexGrpID] = curValue; + break; + } + } + + free(leadNum); + free(type); + + free(posGroups); + free(negGroups); + free(posFlags); + free(negFlags); + free(groupErrorBounds); + free(groupID); +} + +void decompressDataSeries_double_1D_pwr_pre_log(double** data, size_t dataSeriesLength, TightDataPointStorageD* tdps) { + + decompressDataSeries_double_1D(data, dataSeriesLength, NULL, tdps); + double threshold = tdps->minLogValue; + if(tdps->pwrErrBoundBytes_size > 0){ + unsigned char * signs; + sz_lossless_decompress(ZSTD_COMPRESSOR, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength); + + for(size_t i=0; iminLogValue; + if(tdps->pwrErrBoundBytes_size > 0){ + unsigned char * signs; + sz_lossless_decompress(ZSTD_COMPRESSOR, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength); + + for(size_t i=0; iminLogValue; + if(tdps->pwrErrBoundBytes_size > 0){ + unsigned char * signs; + sz_lossless_decompress(ZSTD_COMPRESSOR, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength); + + for(size_t i=0; iminLogValue; + uint64_t* ptr; + + if(tdps->pwrErrBoundBytes_size > 0){ + unsigned char * signs = NULL; + if(tdps->pwrErrBoundBytes_size==0) + { + signs = (unsigned char*)malloc(dataSeriesLength); + memset(signs, 0, dataSeriesLength); + } + else + sz_lossless_decompress(ZSTD_COMPRESSOR, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength); + for(size_t i=0; i= 0){ + (*data)[i] = 0; + continue; + } + if(signs[i]){ + ptr = (uint64_t*)(*data) + i; + *ptr |= 0x8000000000000000; + } + } + free(signs); + } + else{ + for(size_t i=0; iminLogValue; + uint64_t* ptr; + + if(tdps->pwrErrBoundBytes_size > 0){ + unsigned char * signs = NULL; + if(tdps->pwrErrBoundBytes_size==0) + { + signs = (unsigned char*)malloc(dataSeriesLength); + memset(signs, 0, dataSeriesLength); + } + else + sz_lossless_decompress(ZSTD_COMPRESSOR, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength); + for(size_t i=0; i= 0){ + (*data)[i] = 0; + continue; + } + if(signs[i]){ + ptr = (uint64_t*)(*data) + i; + *ptr |= 0x8000000000000000; + } + } + free(signs); + } + else{ + for(size_t i=0; iminLogValue; + if(tdps->pwrErrBoundBytes_size > 0){ + unsigned char * signs = NULL; + uint64_t* ptr; + if(tdps->pwrErrBoundBytes_size==0) + { + signs = (unsigned char*)malloc(dataSeriesLength); + memset(signs, 0, dataSeriesLength); + } + else + sz_lossless_decompress(ZSTD_COMPRESSOR, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength); + for(size_t i=0; i= 0) { + (*data)[i] = 0; + continue; + } + if(signs[i]) { + ptr = (uint64_t*)(*data)+i; + *ptr |= 0x8000000000000000; + } + } + free(signs); + } + else{ + for(size_t i=0; i +#include +#include +#include "szd_double.h" +#include "TightDataPointStorageD.h" +#include "sz.h" +#include "Huffman.h" +#include "szd_double_ts.h" + +void decompressDataSeries_double_1D_ts(double** data, size_t dataSeriesLength, double* hist_data, TightDataPointStorageD* tdps) +{ + double* lastSnapshotData = hist_data; + updateQuantizationInfo(tdps->intervals); + size_t i, j, k = 0, p = 0, l = 0; // k is to track the location of residual_bit + // in resiMidBits, p is to track the + // byte_index of resiMidBits, l is for + // leadNum + unsigned char* leadNum; + double interval = tdps->realPrecision*2; + + convertByteArray2IntArray_fast_2b(tdps->exactDataNum, tdps->leadNumArray, tdps->leadNumArray_size, &leadNum); + *data = (double*)malloc(sizeof(double)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + unsigned char preBytes[8]; + unsigned char curBytes[8]; + + memset(preBytes, 0, 8); + + size_t curByteIndex = 0; + int reqBytesLength, resiBitsLength, resiBits; + unsigned char leadingNum; + double medianValue, exactData, predValue = 0; + + reqBytesLength = tdps->reqLength/8; + resiBitsLength = tdps->reqLength%8; + medianValue = tdps->medianValue; + + int type_; + for (i = 0; i < dataSeriesLength; i++) { + type_ = type[i]; + switch (type_) { + case 0: + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*data)[i] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + break; + default: + //predValue = (*data)[i-1]; + if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) + predValue = lastSnapshotData[i]; + (*data)[i] = predValue + (type_-exe_params->intvRadius)*interval; + break; + } + //printf("%.30G\n",(*data)[i]); + } + + memcpy(hist_data, (*data), dataSeriesLength*sizeof(double)); + + free(leadNum); + free(type); + return; +} diff --git a/deps/SZ/sz/src/szd_float.c b/deps/SZ/sz/src/szd_float.c new file mode 100644 index 0000000000000000000000000000000000000000..2c4a1f12aef18eb6a34497eca34118f291c94759 --- /dev/null +++ b/deps/SZ/sz/src/szd_float.c @@ -0,0 +1,7720 @@ +/** + * @file szd_float.c + * @author Sheng Di, Dingwen Tao, Xin Liang, Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang + * @date Aug, 2018 + * @brief + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include "szd_float.h" +#include "TightDataPointStorageF.h" +#include "sz.h" +#include "Huffman.h" +#include "szd_float_pwr.h" +#include "szd_float_ts.h" +#include "utility.h" + + +//struct timeval startTime_; +//struct timeval endTime_; /* Start and end times */ +//struct timeval costStart_; /*only used for recording the cost*/ +//double totalCost_ = 0; + +/*void cost_start_() +{ + totalCost_ = 0; + gettimeofday(&costStart_, NULL); +} + +void cost_end_() +{ + double elapsed; + struct timeval costEnd; + gettimeofday(&costEnd, NULL); + elapsed = ((costEnd.tv_sec*1000000+costEnd.tv_usec)-(costStart_.tv_sec*1000000+costStart_.tv_usec))/1000000.0; + totalCost_ += elapsed; +}*/ + + +/** + * + * int compressionType: 1 (time-based compression) ; 0 (space-based compression) + * hist_data: only valid when compressionType==1, hist_data is the historical dataset such as the data in previous time step + * + * @return status SUCCESSFUL (SZ_SCES) or not (other error codes) f + * */ +int SZ_decompress_args_float(float** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, +size_t cmpSize, int compressionType, float* hist_data) +{ + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + + //unsigned char* tmpBytes; + size_t targetUncompressSize = dataLength <<2; //i.e., *4 + //tmpSize must be "much" smaller than dataLength + size_t i, tmpSize = 8+MetaDataByteLength+exe_params->SZ_SIZE_TYPE; + unsigned char* szTmpBytes; + + if(cmpSize!=8+4+MetaDataByteLength && cmpSize!=8+8+MetaDataByteLength) //4,8 means two posibilities of SZ_SIZE_TYPE + { + confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); + if(confparams_dec->szMode!=SZ_TEMPORAL_COMPRESSION) + { + if(confparams_dec->losslessCompressor!=-1) + confparams_dec->szMode = SZ_BEST_COMPRESSION; + else + confparams_dec->szMode = SZ_BEST_SPEED; + } + + if(confparams_dec->szMode==SZ_BEST_SPEED) + { + tmpSize = cmpSize; + szTmpBytes = cmpBytes; + } + else if(confparams_dec->szMode==SZ_BEST_COMPRESSION || confparams_dec->szMode==SZ_DEFAULT_COMPRESSION || confparams_dec->szMode==SZ_TEMPORAL_COMPRESSION) + { + if(targetUncompressSizelosslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize + //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); + //memcpy(szTmpBytes, tmpBytes, tmpSize); + //free(tmpBytes); //release useless memory + } + else + { + printf("Wrong value of confparams_dec->szMode in the double compressed bytes.\n"); + status = SZ_MERR; + return status; + } + } + else + szTmpBytes = cmpBytes; + + confparams_dec->sol_ID = szTmpBytes[4+14]; //szTmpBytes: version(3bytes), samebyte(1byte), [14]:sol_ID=SZ or SZ_Transpose + + //TODO: convert szTmpBytes to data array. + TightDataPointStorageF* tdps; + int errBoundMode = new_TightDataPointStorageF_fromFlatBytes(&tdps, szTmpBytes, tmpSize); + + //writeByteData(tdps->typeArray, tdps->typeArray_size, "decompress-typebytes.tbt"); + int dim = computeDimension(r5,r4,r3,r2,r1); + int floatSize = sizeof(float); + if(tdps->isLossless) + { + *newData = (float*)malloc(floatSize*dataLength); + if(sysEndianType==BIG_ENDIAN_SYSTEM) + { + memcpy(*newData, szTmpBytes+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, dataLength*floatSize); + } + else + { + unsigned char* p = szTmpBytes+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE; + for(i=0;isol_ID==SZ_Transpose) + { + getSnapshotData_float_1D(newData,dataLength,tdps, errBoundMode, 0, hist_data); + } + else //confparams_dec->sol_ID==SZ + { + if(tdps->raBytes_size > 0) //v2.0 + { + if (dim == 1) + getSnapshotData_float_1D(newData,r1,tdps, errBoundMode, 0, hist_data); + else if(dim == 2) + decompressDataSeries_float_2D_nonblocked_with_blocked_regression(newData, r2, r1, tdps->raBytes, hist_data); + else if(dim == 3) + decompressDataSeries_float_3D_nonblocked_with_blocked_regression(newData, r3, r2, r1, tdps->raBytes, hist_data); + else if(dim == 4) + decompressDataSeries_float_3D_nonblocked_with_blocked_regression(newData, r4*r3, r2, r1, tdps->raBytes, hist_data); + else + { + printf("Error: currently support only at most 4 dimensions!\n"); + status = SZ_DERR; + } + } + else //1.4.13 or time-based compression + { + if (dim == 1) + getSnapshotData_float_1D(newData,r1,tdps, errBoundMode, compressionType, hist_data); + else if (dim == 2) + getSnapshotData_float_2D(newData,r2,r1,tdps, errBoundMode, compressionType, hist_data); + else if (dim == 3) + getSnapshotData_float_3D(newData,r3,r2,r1,tdps, errBoundMode, compressionType, hist_data); + else if (dim == 4) + getSnapshotData_float_4D(newData,r4,r3,r2,r1,tdps, errBoundMode, compressionType, hist_data); + else + { + printf("Error: currently support only at most 4 dimensions!\n"); + status = SZ_DERR; + } + } + } + + //cost_start_(); + if(confparams_dec->protectValueRange) + { + float* nd = *newData; + float min = confparams_dec->fmin; + float max = confparams_dec->fmax; + for(i=0;i= min) + continue; + if(v < min) + nd[i] = min; + else if(v > max) + nd[i] = max; + } + } + //cost_end_(); + //printf("totalCost_=%f\n", totalCost_); + free_TightDataPointStorageF2(tdps); + if(confparams_dec->szMode!=SZ_BEST_SPEED && cmpSize!=8+MetaDataByteLength+exe_params->SZ_SIZE_TYPE) + free(szTmpBytes); + return status; +} + +void decompressDataSeries_float_1D(float** data, size_t dataSeriesLength, float* hist_data, TightDataPointStorageF* tdps) +{ + //updateQuantizationInfo(tdps->intervals); + int intvRadius = tdps->intervals/2; + size_t i, j, k = 0, p = 0, l = 0; // k is to track the location of residual_bit + // in resiMidBits, p is to track the + // byte_index of resiMidBits, l is for + // leadNum + unsigned char* leadNum; + float interval = tdps->realPrecision*2; + + convertByteArray2IntArray_fast_2b(tdps->exactDataNum, tdps->leadNumArray, tdps->leadNumArray_size, &leadNum); + *data = (float*)malloc(sizeof(float)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + unsigned char preBytes[4]; + unsigned char curBytes[4]; + + memset(preBytes, 0, 4); + + size_t curByteIndex = 0; + int reqBytesLength, resiBitsLength, resiBits; + unsigned char leadingNum; + float medianValue, exactData, predValue; + + reqBytesLength = tdps->reqLength/8; + resiBitsLength = tdps->reqLength%8; + medianValue = tdps->medianValue; + + int type_; + for (i = 0; i < dataSeriesLength; i++) { + type_ = type[i]; + switch (type_) { + case 0: + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[i] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + break; + default: + //predValue = 2 * (*data)[i-1] - (*data)[i-2]; + predValue = (*data)[i-1]; + (*data)[i] = predValue + (float)(type_-intvRadius)*interval; + break; + } + //printf("%.30G\n",(*data)[i]); + } + +#ifdef HAVE_TIMECMPR + if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) + memcpy(hist_data, (*data), dataSeriesLength*sizeof(float)); +#endif + + free(leadNum); + free(type); + return; +} + +void decompressDataSeries_float_2D(float** data, size_t r1, size_t r2, float* hist_data, TightDataPointStorageF* tdps) +{ + //updateQuantizationInfo(tdps->intervals); + int intvRadius = tdps->intervals/2; + //printf("tdps->intervals=%d, exe_params->intvRadius=%d\n", tdps->intervals, exe_params->intvRadius); + + size_t j, k = 0, p = 0, l = 0; // k is to track the location of residual_bit + // in resiMidBits, p is to track the + // byte_index of resiMidBits, l is for + // leadNum + size_t dataSeriesLength = r1*r2; + // printf ("%d %d\n", r1, r2); + + unsigned char* leadNum; + float realPrecision = tdps->realPrecision; + + convertByteArray2IntArray_fast_2b(tdps->exactDataNum, tdps->leadNumArray, tdps->leadNumArray_size, &leadNum); + + *data = (float*)malloc(sizeof(float)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + unsigned char preBytes[4]; + unsigned char curBytes[4]; + + memset(preBytes, 0, 4); + + size_t curByteIndex = 0; + int reqBytesLength, resiBitsLength, resiBits; + unsigned char leadingNum; + float medianValue, exactData; + int type_; + + reqBytesLength = tdps->reqLength/8; + resiBitsLength = tdps->reqLength%8; + medianValue = tdps->medianValue; + + float pred1D, pred2D; + size_t ii, jj; + + /* Process Row-0, data 0 */ + + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[0] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + + /* Process Row-0, data 1 */ + type_ = type[1]; + if (type_ != 0) + { + pred1D = (*data)[0]; + (*data)[1] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[1] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + + /* Process Row-0, data 2 --> data r2-1 */ + for (jj = 2; jj < r2; jj++) + { + type_ = type[jj]; + if (type_ != 0) + { + pred1D = 2*(*data)[jj-1] - (*data)[jj-2]; + (*data)[jj] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[jj] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + } + + size_t index; + /* Process Row-1 --> Row-r1-1 */ + for (ii = 1; ii < r1; ii++) + { + /* Process row-ii data 0 */ + index = ii*r2; + + type_ = type[index]; + if (type_ != 0) + { + pred1D = (*data)[index-r2]; + (*data)[index] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + + /* Process row-ii data 1 --> r2-1*/ + for (jj = 1; jj < r2; jj++) + { + index = ii*r2+jj; + pred2D = (*data)[index-1] + (*data)[index-r2] - (*data)[index-r2-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + } + } + +#ifdef HAVE_TIMECMPR + if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) + memcpy(hist_data, (*data), dataSeriesLength*sizeof(float)); +#endif + + free(leadNum); + free(type); + return; +} + +void decompressDataSeries_float_3D(float** data, size_t r1, size_t r2, size_t r3, float* hist_data, TightDataPointStorageF* tdps) +{ + //updateQuantizationInfo(tdps->intervals); + int intvRadius = tdps->intervals/2; + size_t j, k = 0, p = 0, l = 0; // k is to track the location of residual_bit + // in resiMidBits, p is to track the + // byte_index of resiMidBits, l is for + // leadNum + size_t dataSeriesLength = r1*r2*r3; + size_t r23 = r2*r3; + unsigned char* leadNum; + float realPrecision = tdps->realPrecision; + + //TODO + convertByteArray2IntArray_fast_2b(tdps->exactDataNum, tdps->leadNumArray, tdps->leadNumArray_size, &leadNum); + + *data = (float*)malloc(sizeof(float)*dataSeriesLength); + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + unsigned char preBytes[4]; + unsigned char curBytes[4]; + + memset(preBytes, 0, 4); + size_t curByteIndex = 0; + int reqBytesLength, resiBitsLength, resiBits; + unsigned char leadingNum; + float medianValue, exactData; + int type_; + + reqBytesLength = tdps->reqLength/8; + resiBitsLength = tdps->reqLength%8; + medianValue = tdps->medianValue; + + float pred1D, pred2D, pred3D; + size_t ii, jj, kk; + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + exactData = bytesToFloat(curBytes); + (*data)[0] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + + /* Process Row-0, data 1 */ + pred1D = (*data)[0]; + + type_ = type[1]; + if (type_ != 0) + { + (*data)[1] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[1] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + /* Process Row-0, data 2 --> data r3-1 */ + for (jj = 2; jj < r3; jj++) + { + pred1D = 2*(*data)[jj-1] - (*data)[jj-2]; + + type_ = type[jj]; + if (type_ != 0) + { + (*data)[jj] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[jj] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + } + + size_t index; + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process row-ii data 0 */ + index = ii*r3; + pred1D = (*data)[index-r3]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + + /* Process row-ii data 1 --> r3-1*/ + for (jj = 1; jj < r3; jj++) + { + index = ii*r3+jj; + pred2D = (*data)[index-1] + (*data)[index-r3] - (*data)[index-r3-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + } + } + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (kk = 1; kk < r1; kk++) + { + /* Process Row-0 data 0*/ + index = kk*r23; + pred1D = (*data)[index-r23]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + + /* Process Row-0 data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23+jj; + pred2D = (*data)[index-1] + (*data)[index-r23] - (*data)[index-r23-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + } + + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process Row-i data 0 */ + index = kk*r23 + ii*r3; + pred2D = (*data)[index-r3] + (*data)[index-r23] - (*data)[index-r23-r3]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + + /* Process Row-i data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23 + ii*r3 + jj; + pred3D = (*data)[index-1] + (*data)[index-r3] + (*data)[index-r23] + - (*data)[index-r3-1] - (*data)[index-r23-r3] - (*data)[index-r23-1] + (*data)[index-r23-r3-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred3D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + } + } + } + +#ifdef HAVE_TIMECMPR + if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) + memcpy(hist_data, (*data), dataSeriesLength*sizeof(float)); +#endif + + free(leadNum); + free(type); + return; +} + +void decompressDataSeries_float_4D(float** data, size_t r1, size_t r2, size_t r3, size_t r4, float* hist_data, TightDataPointStorageF* tdps) +{ + //updateQuantizationInfo(tdps->intervals); + int intvRadius = tdps->intervals; + size_t j, k = 0, p = 0, l = 0; // k is to track the location of residual_bit + // in resiMidBits, p is to track the + // byte_index of resiMidBits, l is for + // leadNum + size_t dataSeriesLength = r1*r2*r3*r4; + size_t r234 = r2*r3*r4; + size_t r34 = r3*r4; +// printf ("%d %d %d %d\n", r1, r2, r3, r4); + unsigned char* leadNum; + double realPrecision = tdps->realPrecision; + + convertByteArray2IntArray_fast_2b(tdps->exactDataNum, tdps->leadNumArray, tdps->leadNumArray_size, &leadNum); + + *data = (float*)malloc(sizeof(float)*dataSeriesLength); + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + unsigned char preBytes[4]; + unsigned char curBytes[4]; + + memset(preBytes, 0, 4); + size_t curByteIndex = 0; + int reqBytesLength, resiBitsLength, resiBits; + unsigned char leadingNum; + float medianValue, exactData; + int type_; + + reqBytesLength = tdps->reqLength/8; + resiBitsLength = tdps->reqLength%8; + medianValue = tdps->medianValue; + + float pred1D, pred2D, pred3D; + size_t ii, jj, kk, ll; + size_t index; + + for (ll = 0; ll < r1; ll++) + { + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + index = ll*r234; + + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + + /* Process Row-0, data 1 */ + index = ll*r234+1; + + pred1D = (*data)[index-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + + /* Process Row-0, data 2 --> data r4-1 */ + for (jj = 2; jj < r4; jj++) + { + index = ll*r234+jj; + + pred1D = 2*(*data)[index-1] - (*data)[index-2]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (ii = 1; ii < r3; ii++) + { + /* Process row-ii data 0 */ + index = ll*r234+ii*r4; + + pred1D = (*data)[index-r4]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + + /* Process row-ii data 1 --> r4-1*/ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+ii*r4+jj; + + pred2D = (*data)[index-1] + (*data)[index-r4] - (*data)[index-r4-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + } + } + + /////////////////////////// Process layer-1 --> layer-r2-1 /////////////////////////// + + for (kk = 1; kk < r2; kk++) + { + /* Process Row-0 data 0*/ + index = ll*r234+kk*r34; + + pred1D = (*data)[index-r34]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + + /* Process Row-0 data 1 --> data r4-1 */ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+kk*r34+jj; + + pred2D = (*data)[index-1] + (*data)[index-r34] - (*data)[index-r34-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (ii = 1; ii < r3; ii++) + { + /* Process Row-i data 0 */ + index = ll*r234+kk*r34+ii*r4; + + pred2D = (*data)[index-r4] + (*data)[index-r34] - (*data)[index-r34-r4]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + + /* Process Row-i data 1 --> data r4-1 */ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+kk*r34+ii*r4+jj; + + pred3D = (*data)[index-1] + (*data)[index-r4] + (*data)[index-r34] + - (*data)[index-r4-1] - (*data)[index-r34-r4] - (*data)[index-r34-1] + (*data)[index-r34-r4-1]; + + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred3D + 2 * (type_ - intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + } + } + + } + } + +//I didn't implement time-based compression for 4D actually. +//#ifdef HAVE_TIMECMPR +// if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) +// memcpy(multisteps->hist_data, (*data), dataSeriesLength*sizeof(float)); +//#endif + + free(leadNum); + free(type); + return; +} + +/*MSST19*/ +void decompressDataSeries_float_1D_MSST19(float** data, size_t dataSeriesLength, TightDataPointStorageF* tdps) +{ + //updateQuantizationInfo(tdps->intervals); + int intvRadius = tdps->intervals/2; + int intvCapacity = tdps->intervals; + size_t i, j, k = 0, p = 0, l = 0; // k is to track the location of residual_bit + // in resiMidBits, p is to track the + // byte_index of resiMidBits, l is for + // leadNum + unsigned char* leadNum; + //double interval = tdps->realPrecision*2; + + convertByteArray2IntArray_fast_2b(tdps->exactDataNum, tdps->leadNumArray, tdps->leadNumArray_size, &leadNum); + *data = (float*)malloc(sizeof(float)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree_MSST19(huffmanTree, tdps->typeArray, dataSeriesLength, type, tdps->max_bits); + SZ_ReleaseHuffman(huffmanTree); + unsigned char preBytes[4]; + unsigned char curBytes[4]; + + memset(preBytes, 0, 4); + + size_t curByteIndex = 0; + int reqBytesLength, resiBitsLength, resiBits; + unsigned char leadingNum; + float exactData, predValue = 0; + reqBytesLength = tdps->reqLength/8; + resiBitsLength = tdps->reqLength%8; + //float threshold = tdps->minLogValue; + double* precisionTable = (double*)malloc(sizeof(double) * intvCapacity); + double inv = 2.0-pow(2, -(tdps->plus_bits)); + for(int i=0; irealPrecision), inv*(i - intvRadius)); + precisionTable[i] = test; + } + + int type_; + for (i = 0; i < dataSeriesLength; i++) { + type_ = type[i]; + switch (type_) { + case 0: + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[i] = exactData; + memcpy(preBytes,curBytes,4); + predValue = (*data)[i]; + break; + default: + //predValue = 2 * (*data)[i-1] - (*data)[i-2]; + //predValue = (*data)[i-1]; + predValue = fabs(predValue) * precisionTable[type_]; + (*data)[i] = predValue; + break; + } + //printf("%.30G\n",(*data)[i]); + } + +#ifdef HAVE_TIMECMPR + if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) + memcpy(multisteps->hist_data, (*data), dataSeriesLength*sizeof(float)); +#endif + free(precisionTable); + free(leadNum); + free(type); + return; +} + +void decompressDataSeries_float_2D_MSST19(float** data, size_t r1, size_t r2, TightDataPointStorageF* tdps) +{ + //updateQuantizationInfo(tdps->intervals); + int intvRadius = tdps->intervals/2; + int intvCapacity = tdps->intervals; + + size_t j, k = 0, p = 0, l = 0; // k is to track the location of residual_bit + // in resiMidBits, p is to track the + // byte_index of resiMidBits, l is for + // leadNum + size_t dataSeriesLength = r1*r2; + + unsigned char* leadNum; + //double realPrecision = tdps->realPrecision; + + convertByteArray2IntArray_fast_2b(tdps->exactDataNum, tdps->leadNumArray, tdps->leadNumArray_size, &leadNum); + + *data = (float*)malloc(sizeof(float)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree_MSST19(huffmanTree, tdps->typeArray, dataSeriesLength, type, tdps->max_bits); + SZ_ReleaseHuffman(huffmanTree); + + unsigned char preBytes[4]; + unsigned char curBytes[4]; + + memset(preBytes, 0, 4); + + size_t curByteIndex = 0; + int reqBytesLength, resiBitsLength, resiBits; + unsigned char leadingNum; + float exactData; + int type_; + + double* precisionTable = (double*)malloc(sizeof(double) * intvCapacity); + double inv = 2.0-pow(2, -(tdps->plus_bits)); + for(int i=0; irealPrecision), inv*(i - intvRadius)); + precisionTable[i] = test; + } + + reqBytesLength = tdps->reqLength/8; + resiBitsLength = tdps->reqLength%8; + + float pred1D, pred2D; + size_t ii, jj; + + /* Process Row-0, data 0 */ + + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[0] = exactData; + memcpy(preBytes,curBytes,4); + + /* Process Row-0, data 1 */ + type_ = type[1]; + if (type_ != 0) + { + pred1D = (*data)[0]; + (*data)[1] = fabs(pred1D) * precisionTable[type_]; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[1] = exactData; + memcpy(preBytes,curBytes,4); + } + + /* Process Row-0, data 2 --> data r2-1 */ + for (jj = 2; jj < r2; jj++) + { + type_ = type[jj]; + if (type_ != 0) + { + pred1D = (*data)[jj-1] * (*data)[jj-1] / (*data)[jj-2]; + (*data)[jj] = fabs(pred1D) * precisionTable[type_]; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[jj] = exactData; + memcpy(preBytes,curBytes,4); + } + } + + size_t index; + /* Process Row-1 --> Row-r1-1 */ + for (ii = 1; ii < r1; ii++) + { + /* Process row-ii data 0 */ + index = ii*r2; + + type_ = type[index]; + if (type_ != 0) + { + pred1D = (*data)[index-r2]; + (*data)[index] = fabs(pred1D) * precisionTable[type_]; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData; + memcpy(preBytes,curBytes,4); + } + + /* Process row-ii data 1 --> r2-1*/ + for (jj = 1; jj < r2; jj++) + { + index = ii*r2+jj; + pred2D = (*data)[index-1] * (*data)[index-r2] / (*data)[index-r2-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = fabs(pred2D) * precisionTable[type_]; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData; + memcpy(preBytes,curBytes,4); + } + } + } + +#ifdef HAVE_TIMECMPR + if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) + memcpy(multisteps->hist_data, (*data), dataSeriesLength*sizeof(float)); +#endif + + free(leadNum); + free(type); + return; +} + +void decompressDataSeries_float_3D_MSST19(float** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageF* tdps) +{ + //updateQuantizationInfo(tdps->intervals); + int intvRadius = tdps->intervals/2; + int intvCapacity = tdps->intervals; + size_t j, k = 0, p = 0, l = 0; // k is to track the location of residual_bit + // in resiMidBits, p is to track the + // byte_index of resiMidBits, l is for + // leadNum + size_t dataSeriesLength = r1*r2*r3; + size_t r23 = r2*r3; + unsigned char* leadNum; + //double realPrecision = tdps->realPrecision; + + convertByteArray2IntArray_fast_2b(tdps->exactDataNum, tdps->leadNumArray, tdps->leadNumArray_size, &leadNum); + + *data = (float*)malloc(sizeof(float)*dataSeriesLength); + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + double* precisionTable = (double*)malloc(sizeof(double) * intvCapacity); + double inv = 2.0-pow(2, -(tdps->plus_bits)); + for(int i=0; irealPrecision), inv*(i - intvRadius)); + precisionTable[i] = test; + } + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree_MSST19(huffmanTree, tdps->typeArray, dataSeriesLength, type, tdps->max_bits); + SZ_ReleaseHuffman(huffmanTree); + + unsigned char preBytes[4]; + unsigned char curBytes[4]; + + memset(preBytes, 0, 4); + size_t curByteIndex = 0; + int reqBytesLength, resiBitsLength, resiBits; + unsigned char leadingNum; + float exactData; + int type_; + + reqBytesLength = tdps->reqLength/8; + resiBitsLength = tdps->reqLength%8; + + float pred1D, pred2D, pred3D; + double temp; + double temp2; + size_t ii, jj, kk; + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + exactData = bytesToFloat(curBytes); + (*data)[0] = exactData; + memcpy(preBytes,curBytes,4); + + /* Process Row-0, data 1 */ + pred1D = (*data)[0]; + + type_ = type[1]; + if (type_ != 0) + { + (*data)[1] = fabs(pred1D) * precisionTable[type_]; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[1] = exactData; + memcpy(preBytes,curBytes,4); + } + /* Process Row-0, data 2 --> data r3-1 */ + for (jj = 2; jj < r3; jj++) + { + temp = (*data)[jj-1]; + pred1D = temp * ( *data)[jj-1] / (*data)[jj-2]; + + type_ = type[jj]; + if (type_ != 0) + { + (*data)[jj] = fabsf(pred1D) * precisionTable[type_]; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[jj] = exactData; + memcpy(preBytes,curBytes,4); + } + } + + size_t index; + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process row-ii data 0 */ + index = ii*r3; + pred1D = (*data)[index-r3]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = fabsf(pred1D) * precisionTable[type_]; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData; + memcpy(preBytes,curBytes,4); + } + + /* Process row-ii data 1 --> r3-1*/ + for (jj = 1; jj < r3; jj++) + { + index = ii*r3+jj; + temp = (*data)[index-1]; + pred2D = temp * (*data)[index-r3] / (*data)[index-r3-1]; + + type_ = type[index]; + if (type_ != 0) + { + //float ppp = precisionTable[type_]; + //float test = fabsf(pred2D) * precisionTable[type_]; + (*data)[index] = fabsf(pred2D) * precisionTable[type_]; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData; + memcpy(preBytes,curBytes,4); + } + } + } + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (kk = 1; kk < r1; kk++) + { + /* Process Row-0 data 0*/ + index = kk*r23; + pred1D = (*data)[index-r23]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = fabsf(pred1D) * precisionTable[type_]; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData; + memcpy(preBytes,curBytes,4); + } + + /* Process Row-0 data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23+jj; + temp = (*data)[index-1]; + pred2D = temp * (*data)[index-r23] / (*data)[index-r23-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = fabsf(pred2D) * precisionTable[type_]; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData; + memcpy(preBytes,curBytes,4); + } + } + + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process Row-i data 0 */ + index = kk*r23 + ii*r3; + temp = (*data)[index-r3]; + pred2D = temp * (*data)[index-r23] / (*data)[index-r23-r3]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = fabsf(pred2D) * precisionTable[type_]; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData; + memcpy(preBytes,curBytes,4); + } + + /* Process Row-i data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23 + ii*r3 + jj; + //pred3D = (*data)[index-1] + (*data)[index-r3] + (*data)[index-r23] + // - (*data)[index-r3-1] - (*data)[index-r23-r3] - (*data)[index-r23-1] + (*data)[index-r23-r3-1]; + temp = (*data)[index-1]; + temp2 = (*data)[index-r3-1]; + pred3D = temp * (*data)[index-r3] * (*data)[index-r23] * (*data)[index-r23-r3-1] / (temp2 * (*data)[index-r23-r3] * (*data)[index-r23-1]); + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = fabsf(pred3D) * precisionTable[type_]; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData; + memcpy(preBytes,curBytes,4); + } + } + } + } + +#ifdef HAVE_TIMECMPR + if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) + memcpy(multisteps->hist_data, (*data), dataSeriesLength*sizeof(float)); +#endif + + free(leadNum); + free(type); + return; +} + +void getSnapshotData_float_1D(float** data, size_t dataSeriesLength, TightDataPointStorageF* tdps, int errBoundMode, int compressionType, float* hist_data) +{ + size_t i; + + if (tdps->allSameData) { + float value = bytesToFloat(tdps->exactMidBytes); + *data = (float*)malloc(sizeof(float)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + if (tdps->rtypeArray == NULL) { + if(errBoundMode < PW_REL) + { +#ifdef HAVE_TIMECMPR + if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) + { + if(compressionType == 0) //snapshot + decompressDataSeries_float_1D(data, dataSeriesLength, hist_data, tdps); + else + decompressDataSeries_float_1D_ts(data, dataSeriesLength, hist_data, tdps); + } + else +#endif + decompressDataSeries_float_1D(data, dataSeriesLength, hist_data, tdps); + } + else + { + if(confparams_dec->accelerate_pw_rel_compression) + decompressDataSeries_float_1D_pwr_pre_log_MSST19(data, dataSeriesLength, tdps); + else + decompressDataSeries_float_1D_pwr_pre_log(data, dataSeriesLength, tdps); + //decompressDataSeries_float_1D_pwrgroup(data, dataSeriesLength, tdps); + } + return; + } else { //the special version supporting one value to reserve + //TODO + } + } +} + +void getSnapshotData_float_2D(float** data, size_t r1, size_t r2, TightDataPointStorageF* tdps, int errBoundMode, int compressionType, float* hist_data) +{ + size_t i; + size_t dataSeriesLength = r1*r2; + if (tdps->allSameData) { + float value = bytesToFloat(tdps->exactMidBytes); + *data = (float*)malloc(sizeof(float)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + if (tdps->rtypeArray == NULL) { + if(errBoundMode < PW_REL) + { +#ifdef HAVE_TIMECMPR + if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) + { + if(compressionType == 0) + decompressDataSeries_float_2D(data, r1, r2, hist_data, tdps); + else + decompressDataSeries_float_1D_ts(data, dataSeriesLength, hist_data, tdps); + } + else +#endif + decompressDataSeries_float_2D(data, r1, r2, hist_data, tdps); + } + else + { + //decompressDataSeries_float_2D_pwr(data, r1, r2, tdps); + if(confparams_dec->accelerate_pw_rel_compression) + decompressDataSeries_float_2D_pwr_pre_log_MSST19(data, r1, r2, tdps); + else + decompressDataSeries_float_2D_pwr_pre_log(data, r1, r2, tdps); + } + + return; + } else { + //TODO + } + } +} + +void getSnapshotData_float_3D(float** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageF* tdps, int errBoundMode, int compressionType, float* hist_data) +{ + size_t i; + size_t dataSeriesLength = r1*r2*r3; + if (tdps->allSameData) { + float value = bytesToFloat(tdps->exactMidBytes); + *data = (float*)malloc(sizeof(float)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + if (tdps->rtypeArray == NULL) { + if(errBoundMode < PW_REL) + { +#ifdef HAVE_TIMECMPR + if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) + { + if(compressionType == 0) + decompressDataSeries_float_3D(data, r1, r2, r3, hist_data, tdps); + else + decompressDataSeries_float_1D_ts(data, dataSeriesLength, hist_data, tdps); + } + else +#endif + decompressDataSeries_float_3D(data, r1, r2, r3, hist_data, tdps); + } + else + { + //decompressDataSeries_float_3D_pwr(data, r1, r2, r3, tdps); + if(confparams_dec->accelerate_pw_rel_compression) + decompressDataSeries_float_3D_pwr_pre_log_MSST19(data, r1, r2, r3, tdps); + else + decompressDataSeries_float_3D_pwr_pre_log(data, r1, r2, r3, tdps); + } + + return; + } else { + //TODO + } + } +} + +void getSnapshotData_float_4D(float** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageF* tdps, int errBoundMode, int compressionType, float* hist_data) +{ + size_t i; + size_t dataSeriesLength = r1*r2*r3*r4; + if (tdps->allSameData) { + float value = bytesToFloat(tdps->exactMidBytes); + *data = (float*)malloc(sizeof(float)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + if (tdps->rtypeArray == NULL) { + if(errBoundMode < PW_REL) + { +#ifdef HAVE_TIMECMPR + if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) + { + if(compressionType == 0) + decompressDataSeries_float_4D(data, r1, r2, r3, r4, hist_data, tdps); + else + decompressDataSeries_float_1D_ts(data, r1*r2*r3*r4, hist_data, tdps); + } + else +#endif + decompressDataSeries_float_4D(data, r1, r2, r3, r4, hist_data, tdps); + } + else + { + if(confparams_dec->accelerate_pw_rel_compression) + decompressDataSeries_float_3D_pwr_pre_log_MSST19(data, r1*r2, r3, r4, tdps); + else + decompressDataSeries_float_3D_pwr_pre_log(data, r1*r2, r3, r4, tdps); + //decompressDataSeries_float_4D_pwr(data, r1, r2, r3, r4, tdps); + } + return; + } else { + //TODO + } + } +} + +size_t decompressDataSeries_float_3D_RA_block(float * data, float mean, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, double realPrecision, int * type, float * unpredictable_data){ + int intvRadius = exe_params->intvRadius; + size_t dim0_offset = dim_1 * dim_2; + size_t dim1_offset = dim_2; + // printf("SZ_compress_float_3D_MDQ_RA_block real dim: %d %d %d\n", real_block_dims[0], real_block_dims[1], real_block_dims[2]); + // fflush(stdout); + + size_t unpredictable_count = 0; + size_t r1, r2, r3; + r1 = block_dim_0; + r2 = block_dim_1; + r3 = block_dim_2; + + float * cur_data_pos = data; + float * last_row_pos; + float pred1D, pred2D, pred3D; + size_t i, j, k; + size_t r23 = r2*r3; + int type_; + // Process Row-0 data 0 + pred1D = mean; + type_ = type[0]; + // printf("Type 0 %d, mean %.4f\n", type_, mean); + if (type_ != 0){ + cur_data_pos[0] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + cur_data_pos[0] = unpredictable_data[unpredictable_count ++]; + } + + /* Process Row-0 data 1*/ + pred1D = cur_data_pos[0]; + type_ = type[1]; + if (type_ != 0){ + cur_data_pos[1] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + cur_data_pos[1] = unpredictable_data[unpredictable_count ++]; + } + /* Process Row-0 data 2 --> data r3-1 */ + for (j = 2; j < r3; j++){ + pred1D = 2*cur_data_pos[j-1] - cur_data_pos[j-2]; + type_ = type[j]; + if (type_ != 0){ + cur_data_pos[j] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + cur_data_pos[j] = unpredictable_data[unpredictable_count ++]; + } + } + + last_row_pos = cur_data_pos; + cur_data_pos += dim1_offset; + // printf("SZ_compress_float_3D_MDQ_RA_block row 0 done, cur_data_pos: %ld\n", cur_data_pos - block_ori_data); + // fflush(stdout); + + /* Process Row-1 --> Row-r2-1 */ + size_t index; + for (i = 1; i < r2; i++) + { + /* Process row-i data 0 */ + index = i*r3; + pred1D = last_row_pos[0]; + type_ = type[index]; + if (type_ != 0){ + cur_data_pos[0] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + cur_data_pos[0] = unpredictable_data[unpredictable_count ++]; + } + /* Process row-i data 1 --> data r3-1*/ + for (j = 1; j < r3; j++) + { + index = i*r3+j; + pred2D = cur_data_pos[j-1] + last_row_pos[j] - last_row_pos[j-1]; + type_ = type[index]; + if (type_ != 0){ + cur_data_pos[j] = pred2D + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + cur_data_pos[j] = unpredictable_data[unpredictable_count ++]; + } + // printf("pred2D %.2f cur_data %.2f last_row_data %.2f %.2f, result %.2f\n", pred2D, cur_data_pos[j-1], last_row_pos[j], last_row_pos[j-1], cur_data_pos[j]); + // getchar(); + } + last_row_pos = cur_data_pos; + cur_data_pos += dim1_offset; + } + cur_data_pos += dim0_offset - r2 * dim1_offset; + + // printf("SZ_compress_float_3D_MDQ_RA_block layer 0 done, cur_data_pos: %ld\n", cur_data_pos - block_ori_data); + // fflush(stdout); + // exit(0); + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (k = 1; k < r1; k++) + { + // if(idx == 63 && idy == 63 && idz == 63){ + // printf("SZ_compress_float_3D_MDQ_RA_block layer %d done, cur_data_pos: %ld\n", k-1, cur_data_pos - data); + // fflush(stdout); + // } + /* Process Row-0 data 0*/ + index = k*r23; + pred1D = cur_data_pos[- dim0_offset]; + type_ = type[index]; + if (type_ != 0){ + cur_data_pos[0] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + cur_data_pos[0] = unpredictable_data[unpredictable_count ++]; + } + /* Process Row-0 data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + //index = k*r2*r3+j; + index ++; + pred2D = cur_data_pos[j-1] + cur_data_pos[j - dim0_offset] - cur_data_pos[j - 1 - dim0_offset]; + type_ = type[index]; + if (type_ != 0){ + cur_data_pos[j] = pred2D + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + cur_data_pos[j] = unpredictable_data[unpredictable_count ++]; + } + // printf("pred2D %.2f cur_data %.2f %.2f %.2f, result %.2f\n", pred2D, cur_data_pos[j-1], cur_data_pos[j - dim0_offset], cur_data_pos[j - 1 - dim0_offset], cur_data_pos[j]); + // getchar(); + } + last_row_pos = cur_data_pos; + cur_data_pos += dim1_offset; + + // if(idx == 63 && idy == 63 && idz == 63){ + // printf("SZ_compress_float_3D_MDQ_RA_block layer row 0 done, cur_data_pos: %ld\n", k-1, cur_data_pos - data); + // fflush(stdout); + // } + + /* Process Row-1 --> Row-r2-1 */ + for (i = 1; i < r2; i++) + { + // if(idx == 63 && idy == 63 && idz == 63){ + // printf("SZ_compress_float_3D_MDQ_RA_block layer row %d done, cur_data_pos: %ld\n", i-1, cur_data_pos - data); + // fflush(stdout); + // } + /* Process Row-i data 0 */ + index = k*r23 + i*r3; + pred2D = last_row_pos[0] + cur_data_pos[- dim0_offset] - last_row_pos[- dim0_offset]; + type_ = type[index]; + if (type_ != 0){ + cur_data_pos[0] = pred2D + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + cur_data_pos[0] = unpredictable_data[unpredictable_count ++]; + } + + /* Process Row-i data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { +// if(k==63&&i==43&&j==27) +// printf("i=%d\n", i); + //index = k*r2*r3 + i*r3 + j; + index ++; + pred3D = cur_data_pos[j-1] + last_row_pos[j]+ cur_data_pos[j - dim0_offset] - last_row_pos[j-1] - last_row_pos[j - dim0_offset] - cur_data_pos[j-1 - dim0_offset] + last_row_pos[j-1 - dim0_offset]; + type_ = type[index]; + if (type_ != 0){ + cur_data_pos[j] = pred3D + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + cur_data_pos[j] = unpredictable_data[unpredictable_count ++]; + } + } + last_row_pos = cur_data_pos; + cur_data_pos += dim1_offset; + } + cur_data_pos += dim0_offset - r2 * dim1_offset; + } + + return unpredictable_count; +} + +size_t decompressDataSeries_float_1D_RA_block(float * data, float mean, size_t dim_0, size_t block_dim_0, double realPrecision, int * type, float * unpredictable_data){ + int intvRadius = exe_params->intvRadius; + size_t unpredictable_count = 0; + + float * cur_data_pos = data; + size_t type_index = 0; + int type_; + float last_over_thres = mean; + for(size_t i=0; iintvRadius; + size_t dim0_offset = dim_1; + // printf("SZ_compress_float_3D_MDQ_RA_block real dim: %d %d %d\n", real_block_dims[0], real_block_dims[1], real_block_dims[2]); + // fflush(stdout); + + size_t unpredictable_count = 0; + size_t r1, r2; + r1 = block_dim_0; + r2 = block_dim_1; + + float * cur_data_pos = data; + float * last_row_pos; + float pred1D, pred2D; + size_t i, j; + int type_; + // Process Row-0 data 0 + pred1D = mean; + type_ = type[0]; + // printf("Type 0 %d, mean %.4f\n", type_, mean); + if (type_ != 0){ + cur_data_pos[0] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + cur_data_pos[0] = unpredictable_data[unpredictable_count ++]; + } + + /* Process Row-0 data 1*/ + pred1D = cur_data_pos[0]; + type_ = type[1]; + if (type_ != 0){ + cur_data_pos[1] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + cur_data_pos[1] = unpredictable_data[unpredictable_count ++]; + } + /* Process Row-0 data 2 --> data r3-1 */ + for (j = 2; j < r2; j++){ + pred1D = 2*cur_data_pos[j-1] - cur_data_pos[j-2]; + type_ = type[j]; + if (type_ != 0){ + cur_data_pos[j] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + cur_data_pos[j] = unpredictable_data[unpredictable_count ++]; + } + } + + last_row_pos = cur_data_pos; + cur_data_pos += dim0_offset; + // printf("SZ_compress_float_3D_MDQ_RA_block row 0 done, cur_data_pos: %ld\n", cur_data_pos - block_ori_data); + // fflush(stdout); + + /* Process Row-1 --> Row-r2-1 */ + size_t index; + for (i = 1; i < r1; i++) + { + /* Process row-i data 0 */ + index = i*r2; + type_ = type[index]; + if (type_ != 0){ + pred1D = last_row_pos[0]; + cur_data_pos[0] = pred1D + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + cur_data_pos[0] = unpredictable_data[unpredictable_count ++]; + } + /* Process row-i data 1 --> data r3-1*/ + for (j = 1; j < r2; j++) + { + index = i*r2+j; + pred2D = cur_data_pos[j-1] + last_row_pos[j] - last_row_pos[j-1]; + type_ = type[index]; + if (type_ != 0){ + cur_data_pos[j] = pred2D + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + cur_data_pos[j] = unpredictable_data[unpredictable_count ++]; + } + // printf("pred2D %.2f cur_data %.2f last_row_data %.2f %.2f, result %.2f\n", pred2D, cur_data_pos[j-1], last_row_pos[j], last_row_pos[j-1], cur_data_pos[j]); + // getchar(); + } + last_row_pos = cur_data_pos; + cur_data_pos += dim0_offset; + } + return unpredictable_count; +} + +void decompressDataSeries_float_2D_nonblocked_with_blocked_regression(float** data, size_t r1, size_t r2, unsigned char* comp_data, float* hist_data){ + + size_t dim0_offset = r2; + size_t num_elements = r1 * r2; + + *data = (float*)malloc(sizeof(float)*num_elements); + + unsigned char * comp_data_pos = comp_data; + + size_t block_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + // calculate block dims + size_t num_x, num_y; + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size); + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size); + + size_t split_index_x, split_index_y; + size_t early_blockcount_x, early_blockcount_y; + size_t late_blockcount_x, late_blockcount_y; + SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); + SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); + + size_t num_blocks = num_x * num_y; + + float realPrecision = bytesToFloat(comp_data_pos); + comp_data_pos += sizeof(float); + unsigned int intervals = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + //updateQuantizationInfo(intervals); + + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + int stateNum = 2*intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,comp_data_pos+sizeof(int), nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + float mean; + unsigned char use_mean; + memcpy(&use_mean, comp_data_pos, sizeof(unsigned char)); + comp_data_pos += sizeof(unsigned char); + memcpy(&mean, comp_data_pos, sizeof(float)); + comp_data_pos += sizeof(float); + size_t reg_count = 0; + + unsigned char * indicator; + size_t indicator_bitlength = (num_blocks - 1)/8 + 1; + convertByteArray2IntArray_fast_1b(num_blocks, comp_data_pos, indicator_bitlength, &indicator); + comp_data_pos += indicator_bitlength; + for(size_t i=0; i 0){ + for(int i=0; i<3; i++){ + precision[i] = bytesToFloat(comp_data_pos); + comp_data_pos += sizeof(float); + coeff_intvRadius[i] = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + int stateNum = 2*coeff_intvRadius[i]*2; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+sizeof(int), nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + coeff_type[i] = coeff_result_type + i * num_blocks; + size_t typeArray_size = bytesToSize(comp_data_pos); + decode(comp_data_pos + sizeof(size_t), reg_count, root, coeff_type[i]); + comp_data_pos += sizeof(size_t) + typeArray_size; + int coeff_unpred_count = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + coeff_unpred_data[i] = (float *) comp_data_pos; + comp_data_pos += coeff_unpred_count * sizeof(float); + SZ_ReleaseHuffman(huffmanTree); + } + } + float last_coefficients[3] = {0.0}; + int coeff_unpred_data_count[3] = {0}; + int coeff_index = 0; + //updateQuantizationInfo(intervals); + + size_t total_unpred; + memcpy(&total_unpred, comp_data_pos, sizeof(size_t)); + comp_data_pos += sizeof(size_t); + float * unpred_data = (float *) comp_data_pos; + comp_data_pos += total_unpred * sizeof(float); + + int * result_type = (int *) malloc(num_elements * sizeof(int)); + decode(comp_data_pos, num_elements, root, result_type); + SZ_ReleaseHuffman(huffmanTree); + + int intvRadius = intervals/2; + + int * type; + + float * data_pos = *data; + size_t offset_x, offset_y; + size_t current_blockcount_x, current_blockcount_y; + size_t cur_unpred_count; + + unsigned char * indicator_pos = indicator; + if(use_mean){ + type = result_type; + for(size_t i=0; iszMode == SZ_TEMPORAL_COMPRESSION) + memcpy(hist_data, (*data), num_elements*sizeof(float)); +#endif + + free(coeff_result_type); + + free(indicator); + free(result_type); +} + + +void decompressDataSeries_float_3D_nonblocked_with_blocked_regression(float** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data, float* hist_data){ + + size_t dim0_offset = r2 * r3; + size_t dim1_offset = r3; + size_t num_elements = r1 * r2 * r3; + + *data = (float*)malloc(sizeof(float)*num_elements); + + unsigned char * comp_data_pos = comp_data; + + size_t block_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + // calculate block dims + size_t num_x, num_y, num_z; + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size); + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size); + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r3, num_z, block_size); + + size_t split_index_x, split_index_y, split_index_z; + size_t early_blockcount_x, early_blockcount_y, early_blockcount_z; + size_t late_blockcount_x, late_blockcount_y, late_blockcount_z; + SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); + SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); + SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z); + + size_t num_blocks = num_x * num_y * num_z; + + float realPrecision = bytesToFloat(comp_data_pos); + comp_data_pos += sizeof(float); + unsigned int intervals = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + //updateQuantizationInfo(intervals); + + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + int stateNum = 2*intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,comp_data_pos+sizeof(int), nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + float mean; + unsigned char use_mean; + memcpy(&use_mean, comp_data_pos, sizeof(unsigned char)); + comp_data_pos += sizeof(unsigned char); + memcpy(&mean, comp_data_pos, sizeof(float)); + comp_data_pos += sizeof(float); + size_t reg_count = 0; + + unsigned char * indicator; + size_t indicator_bitlength = (num_blocks - 1)/8 + 1; + convertByteArray2IntArray_fast_1b(num_blocks, comp_data_pos, indicator_bitlength, &indicator); + comp_data_pos += indicator_bitlength; + for(size_t i=0; i 0){ + for(int i=0; i<4; i++){ + precision[i] = bytesToFloat(comp_data_pos); + comp_data_pos += sizeof(float); + coeff_intvRadius[i] = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + int stateNum = 2*coeff_intvRadius[i]*2; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+sizeof(int), nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + coeff_type[i] = coeff_result_type + i * num_blocks; + size_t typeArray_size = bytesToSize(comp_data_pos); + decode(comp_data_pos + sizeof(size_t), reg_count, root, coeff_type[i]); + comp_data_pos += sizeof(size_t) + typeArray_size; + int coeff_unpred_count = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + coeff_unpred_data[i] = (float *) comp_data_pos; + comp_data_pos += coeff_unpred_count * sizeof(float); + SZ_ReleaseHuffman(huffmanTree); + } + } + float last_coefficients[4] = {0.0}; + int coeff_unpred_data_count[4] = {0}; + int coeff_index = 0; + //updateQuantizationInfo(intervals); + + size_t total_unpred; + memcpy(&total_unpred, comp_data_pos, sizeof(size_t)); + comp_data_pos += sizeof(size_t); + float * unpred_data = (float *) comp_data_pos; + comp_data_pos += total_unpred * sizeof(float); + + int * result_type = (int *) malloc(num_elements * sizeof(int)); + decode(comp_data_pos, num_elements, root, result_type); + SZ_ReleaseHuffman(huffmanTree); + + int intvRadius = intervals/2; + + int * type; + float * data_pos = *data; + size_t offset_x, offset_y, offset_z; + size_t current_blockcount_x, current_blockcount_y, current_blockcount_z; + size_t cur_unpred_count; + unsigned char * indicator_pos = indicator; + if(use_mean){ + // type = result_type; + + // for(size_t i=0; i 10000){ + // printf("%d %d %d-%d: pred %.4f type %d precision %.4g last_coefficients %.4g\n", i, j, k, e, pred, type_, precision[e], last_coefficients[e]); + // exit(0); + // } + // } + // coeff_index ++; + // } + // { + // float * block_data_pos = data_pos; + // float pred; + // int type_; + // size_t index = 0; + // size_t unpredictable_count = 0; + // for(size_t ii=0; iiszMode == SZ_TEMPORAL_COMPRESSION) + memcpy(hist_data, (*data), num_elements*sizeof(float)); +#endif + + free(coeff_result_type); + + free(indicator); + free(result_type); +} + +void decompressDataSeries_float_3D_random_access_with_blocked_regression(float** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data){ + + size_t dim0_offset = r2 * r3; + size_t dim1_offset = r3; + size_t num_elements = r1 * r2 * r3; + + *data = (float*)malloc(sizeof(float)*num_elements); + + unsigned char * comp_data_pos = comp_data; + + size_t block_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + // calculate block dims + size_t num_x, num_y, num_z; + num_x = (r1 - 1) / block_size + 1; + num_y = (r2 - 1) / block_size + 1; + num_z = (r3 - 1) / block_size + 1; + + size_t max_num_block_elements = block_size * block_size * block_size; + size_t num_blocks = num_x * num_y * num_z; + + double realPrecision = bytesToDouble(comp_data_pos); + comp_data_pos += sizeof(double); + unsigned int intervals = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + //updateQuantizationInfo(intervals); + + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + int stateNum = 2*intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,comp_data_pos+sizeof(int), nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + float mean; + unsigned char use_mean; + memcpy(&use_mean, comp_data_pos, sizeof(unsigned char)); + comp_data_pos += sizeof(unsigned char); + memcpy(&mean, comp_data_pos, sizeof(float)); + comp_data_pos += sizeof(float); + size_t reg_count = 0; + + unsigned char * indicator; + size_t indicator_bitlength = (num_blocks - 1)/8 + 1; + convertByteArray2IntArray_fast_1b(num_blocks, comp_data_pos, indicator_bitlength, &indicator); + comp_data_pos += indicator_bitlength; + for(size_t i=0; i 0){ + for(int i=0; i<4; i++){ + precision[i] = bytesToDouble(comp_data_pos); + comp_data_pos += sizeof(double); + coeff_intvRadius[i] = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + int stateNum = 2*coeff_intvRadius[i]*2; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+sizeof(int), nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + coeff_type[i] = coeff_result_type + i * num_blocks; + size_t typeArray_size = bytesToSize(comp_data_pos); + decode(comp_data_pos + sizeof(size_t), reg_count, root, coeff_type[i]); + comp_data_pos += sizeof(size_t) + typeArray_size; + int coeff_unpred_count = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + coeff_unpred_data[i] = (float *) comp_data_pos; + comp_data_pos += coeff_unpred_count * sizeof(float); + SZ_ReleaseHuffman(huffmanTree); + } + } + float last_coefficients[4] = {0.0}; + int coeff_unpred_data_count[4] = {0}; + int coeff_index = 0; + //updateQuantizationInfo(intervals); + + size_t total_unpred; + memcpy(&total_unpred, comp_data_pos, sizeof(size_t)); + comp_data_pos += sizeof(size_t); + float * unpred_data = (float *) comp_data_pos; + comp_data_pos += total_unpred * sizeof(float); + + int * result_type = (int *) malloc(num_blocks*max_num_block_elements * sizeof(int)); + decode(comp_data_pos, num_blocks*max_num_block_elements, root, result_type); + SZ_ReleaseHuffman(huffmanTree); + + int intvRadius = intervals/2; + + int * type; + float * data_pos = *data; + size_t cur_unpred_count; + unsigned char * indicator_pos = indicator; + int dec_buffer_size = block_size + 1; + float * dec_buffer = (float *) malloc(dec_buffer_size*dec_buffer_size*dec_buffer_size*sizeof(float)); + memset(dec_buffer, 0, dec_buffer_size*dec_buffer_size*dec_buffer_size*sizeof(float)); + float * block_data_pos_x = NULL; + float * block_data_pos_y = NULL; + float * block_data_pos_z = NULL; + int block_dim0_offset = dec_buffer_size*dec_buffer_size; + int block_dim1_offset = dec_buffer_size; + if(use_mean){ + type = result_type; + for(size_t i=0; i= r1) break; + block_data_pos_y = block_data_pos_x; + for(int jj=0; jj= r2) break; + block_data_pos_z = block_data_pos_y; + for(int kk=0; kk= r3) break; + *block_data_pos_z = data_pos[ii*dec_buffer_size*dec_buffer_size + jj*dec_buffer_size + kk]; + block_data_pos_z ++; + } + block_data_pos_y += dim1_offset; + } + block_data_pos_x += dim0_offset; + } + + } + } + } + + } + else{ + type = result_type; + for(size_t i=0; i= r1) break; + block_data_pos_y = block_data_pos_x; + for(int jj=0; jj= r2) break; + block_data_pos_z = block_data_pos_y; + for(int kk=0; kk= r3) break; + *block_data_pos_z = data_pos[ii*dec_buffer_size*dec_buffer_size + jj*dec_buffer_size + kk]; + block_data_pos_z ++; + } + block_data_pos_y += dim1_offset; + } + block_data_pos_x += dim0_offset; + } + } + } + } + } + free(dec_buffer); + free(coeff_result_type); + + free(indicator); + free(result_type); +} + +void decompressDataSeries_float_3D_decompression_random_access_with_blocked_regression(float** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data){ + + size_t dim0_offset = r2 * r3; + size_t dim1_offset = r3; + size_t num_elements = r1 * r2 * r3; + + *data = (float*)malloc(sizeof(float)*num_elements); + + unsigned char * comp_data_pos = comp_data; + + size_t block_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + // calculate block dims + size_t num_x, num_y, num_z; + num_x = (r1 - 1) / block_size + 1; + num_y = (r2 - 1) / block_size + 1; + num_z = (r3 - 1) / block_size + 1; + + size_t max_num_block_elements = block_size * block_size * block_size; + size_t num_blocks = num_x * num_y * num_z; + + double realPrecision = bytesToDouble(comp_data_pos); + comp_data_pos += sizeof(double); + unsigned int intervals = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + //updateQuantizationInfo(intervals); + + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + int stateNum = 2*intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,comp_data_pos+sizeof(int), nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + float mean; + unsigned char use_mean; + memcpy(&use_mean, comp_data_pos, sizeof(unsigned char)); + comp_data_pos += sizeof(unsigned char); + memcpy(&mean, comp_data_pos, sizeof(float)); + comp_data_pos += sizeof(float); + size_t reg_count = 0; + + unsigned char * indicator; + size_t indicator_bitlength = (num_blocks - 1)/8 + 1; + convertByteArray2IntArray_fast_1b(num_blocks, comp_data_pos, indicator_bitlength, &indicator); + comp_data_pos += indicator_bitlength; + for(size_t i=0; i 0){ + for(int i=0; i<4; i++){ + precision[i] = bytesToDouble(comp_data_pos); + comp_data_pos += sizeof(double); + coeff_intvRadius[i] = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + int stateNum = 2*coeff_intvRadius[i]*2; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+sizeof(int), nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + coeff_type[i] = coeff_result_type + i * num_blocks; + size_t typeArray_size = bytesToSize(comp_data_pos); + decode(comp_data_pos + sizeof(size_t), reg_count, root, coeff_type[i]); + comp_data_pos += sizeof(size_t) + typeArray_size; + int coeff_unpred_count = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + coeff_unpred_data[i] = (float *) comp_data_pos; + comp_data_pos += coeff_unpred_count * sizeof(float); + SZ_ReleaseHuffman(huffmanTree); + } + } + float last_coefficients[4] = {0.0}; + int coeff_unpred_data_count[4] = {0}; + int coeff_index = 0; + //updateQuantizationInfo(intervals); + int intvRadius = intervals/2; + + size_t total_unpred; + memcpy(&total_unpred, comp_data_pos, sizeof(size_t)); + comp_data_pos += sizeof(size_t); + size_t compressed_blockwise_unpred_count_size; + memcpy(&compressed_blockwise_unpred_count_size, comp_data_pos, sizeof(size_t)); + comp_data_pos += sizeof(size_t); + int * blockwise_unpred_count = (int *)SZ_decompress(SZ_INT32, comp_data_pos, compressed_blockwise_unpred_count_size, 0, 0, 0, 0, num_blocks); + comp_data_pos += compressed_blockwise_unpred_count_size; + + float * unpred_data = (float *) comp_data_pos; + comp_data_pos += total_unpred * sizeof(float); + + size_t compressed_type_array_block_size; + memcpy(&compressed_type_array_block_size, comp_data_pos, sizeof(size_t)); + comp_data_pos += sizeof(size_t); + unsigned short * type_array_block_size = (unsigned short *)SZ_decompress(SZ_INT16, comp_data_pos, compressed_type_array_block_size, 0, 0, 0, 0, num_blocks); + comp_data_pos += compressed_type_array_block_size; + + int * result_type = (int *) malloc(num_blocks*max_num_block_elements * sizeof(int)); + // decode(comp_data_pos, num_blocks*max_num_block_elements, root, result_type); + int * block_type = result_type; + unsigned short * type_array_block_size_pos = type_array_block_size; + for(size_t i=0; i= r1) break; + block_data_pos_y = block_data_pos_x; + for(int jj=0; jj= r2) break; + block_data_pos_z = block_data_pos_y; + for(int kk=0; kk= r3) break; + *block_data_pos_z = data_pos[ii*dec_buffer_size*dec_buffer_size + jj*dec_buffer_size + kk]; + block_data_pos_z ++; + } + block_data_pos_y += dim1_offset; + } + block_data_pos_x += dim0_offset; + } + + } + } + } + + } + else{ + type = result_type; + for(size_t i=0; i= r1) break; + block_data_pos_y = block_data_pos_x; + for(int jj=0; jj= r2) break; + block_data_pos_z = block_data_pos_y; + for(int kk=0; kk= r3) break; + *block_data_pos_z = data_pos[ii*dec_buffer_size*dec_buffer_size + jj*dec_buffer_size + kk]; + block_data_pos_z ++; + } + block_data_pos_y += dim1_offset; + } + block_data_pos_x += dim0_offset; + } + } + } + } + } + free(blockwise_unpred_count); + free(dec_buffer); + free(coeff_result_type); + + free(indicator); + free(result_type); +} + + +#ifdef HAVE_RANDOMACCESS +void decompressDataSeries_float_1D_decompression_given_areas_with_blocked_regression(float** data, size_t r1, size_t s1, size_t e1, unsigned char* comp_data){ + + unsigned char * comp_data_pos = comp_data; + + size_t block_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + // calculate block dims + size_t num_x; + num_x = (r1 - 1) / block_size + 1; + + size_t max_num_block_elements = block_size; + size_t num_blocks = num_x; + + double realPrecision = bytesToDouble(comp_data_pos); + comp_data_pos += sizeof(double); + unsigned int intervals = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + //updateQuantizationInfo(intervals); + + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + int stateNum = 2*intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,comp_data_pos+sizeof(int), nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + float mean; + unsigned char use_mean; + memcpy(&use_mean, comp_data_pos, sizeof(unsigned char)); + comp_data_pos += sizeof(unsigned char); + memcpy(&mean, comp_data_pos, sizeof(float)); + comp_data_pos += sizeof(float); + size_t reg_count = 0; + + unsigned char * indicator; + size_t indicator_bitlength = (num_blocks - 1)/8 + 1; + convertByteArray2IntArray_fast_1b(num_blocks, comp_data_pos, indicator_bitlength, &indicator); + comp_data_pos += indicator_bitlength; + for(size_t i=0; i 0){ + for(int i=0; i<2; i++){ + precision[i] = bytesToDouble(comp_data_pos); + comp_data_pos += sizeof(double); + coeff_intvRadius[i] = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + int stateNum = 2*coeff_intvRadius[i]*2; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+sizeof(int), nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + coeff_type[i] = coeff_result_type + i * num_blocks; + size_t typeArray_size = bytesToSize(comp_data_pos); + decode(comp_data_pos + sizeof(size_t), reg_count, root, coeff_type[i]); + comp_data_pos += sizeof(size_t) + typeArray_size; + int coeff_unpred_count = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + coeff_unpred_data[i] = (float *) comp_data_pos; + comp_data_pos += coeff_unpred_count * sizeof(float); + SZ_ReleaseHuffman(huffmanTree); + } + } + float last_coefficients[2] = {0.0}; + int coeff_unpred_data_count[2] = {0}; + // decompress coeffcients + float * reg_params = (float *) malloc(2*num_blocks*sizeof(float)); + memset(reg_params, 0, 2*num_blocks*sizeof(float)); + float * reg_params_pos = reg_params; + size_t coeff_index = 0; + for(size_t i=0; i= r1) break; + *block_data_pos_x = data_pos[ii]; + block_data_pos_x ++; + } + } + + } + else{ + for(size_t i=sx; i= r1) break; + *block_data_pos_x = data_pos[ii]; + block_data_pos_x ++; + } + } + } + free(unpred_offset); + free(reg_params); + free(blockwise_unpred_count); + free(dec_buffer); + free(coeff_result_type); + + free(indicator); + free(result_type); + + // extract data + int resi_x = s1 % block_size; + *data = (float*) malloc(sizeof(float)*(e1 - s1)); + float * final_data_pos = *data; + float * block_data_pos = dec_block_data + resi_x; + for(int i=0; i<(e1 - s1); i++){ + *(final_data_pos++) = *(block_data_pos++); + } + free(dec_block_data); +} + +void decompressDataSeries_float_2D_decompression_given_areas_with_blocked_regression(float** data, size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2, unsigned char* comp_data){ + + unsigned char * comp_data_pos = comp_data; + + size_t block_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + // calculate block dims + size_t num_x, num_y; + num_x = (r1 - 1) / block_size + 1; + num_y = (r2 - 1) / block_size + 1; + + size_t max_num_block_elements = block_size * block_size; + size_t num_blocks = num_x * num_y; + + double realPrecision = bytesToDouble(comp_data_pos); + comp_data_pos += sizeof(double); + unsigned int intervals = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + //updateQuantizationInfo(intervals); + + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + int stateNum = 2*intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,comp_data_pos+sizeof(int), nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + float mean; + unsigned char use_mean; + memcpy(&use_mean, comp_data_pos, sizeof(unsigned char)); + comp_data_pos += sizeof(unsigned char); + memcpy(&mean, comp_data_pos, sizeof(float)); + comp_data_pos += sizeof(float); + size_t reg_count = 0; + + unsigned char * indicator; + size_t indicator_bitlength = (num_blocks - 1)/8 + 1; + convertByteArray2IntArray_fast_1b(num_blocks, comp_data_pos, indicator_bitlength, &indicator); + comp_data_pos += indicator_bitlength; + for(size_t i=0; i 0){ + for(int i=0; i<3; i++){ + precision[i] = bytesToDouble(comp_data_pos); + comp_data_pos += sizeof(double); + coeff_intvRadius[i] = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + int stateNum = 2*coeff_intvRadius[i]*2; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+sizeof(int), nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + coeff_type[i] = coeff_result_type + i * num_blocks; + size_t typeArray_size = bytesToSize(comp_data_pos); + decode(comp_data_pos + sizeof(size_t), reg_count, root, coeff_type[i]); + comp_data_pos += sizeof(size_t) + typeArray_size; + int coeff_unpred_count = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + coeff_unpred_data[i] = (float *) comp_data_pos; + comp_data_pos += coeff_unpred_count * sizeof(float); + SZ_ReleaseHuffman(huffmanTree); + } + } + float last_coefficients[3] = {0.0}; + int coeff_unpred_data_count[3] = {0}; + // decompress coeffcients + float * reg_params = (float *) malloc(3*num_blocks*sizeof(float)); + memset(reg_params, 0, 3*num_blocks*sizeof(float)); + float * reg_params_pos = reg_params; + size_t coeff_index = 0; + for(size_t i=0; i= r1) break; + block_data_pos_y = block_data_pos_x; + for(int jj=0; jj= r2) break; + *block_data_pos_y = data_pos[ii*dec_buffer_size + jj]; + block_data_pos_y ++; + } + block_data_pos_x += dec_block_dim0_offset; + } + + } + } + + } + else{ + for(size_t i=sx; i= r1) break; + block_data_pos_y = block_data_pos_x; + for(int jj=0; jj= r2) break; + *block_data_pos_y = data_pos[ii*dec_buffer_size + jj]; + block_data_pos_y ++; + } + block_data_pos_x += dec_block_dim0_offset; + } + } + } + } + free(unpred_offset); + free(reg_params); + free(blockwise_unpred_count); + free(dec_buffer); + free(coeff_result_type); + + free(indicator); + free(result_type); + + // extract data + int resi_x = s1 % block_size; + int resi_y = s2 % block_size; + *data = (float*) malloc(sizeof(float)*(e1 - s1) * (e2 - s2)); + float * final_data_pos = *data; + for(int i=0; i<(e1 - s1); i++){ + float * block_data_pos = dec_block_data + (i+resi_x)*dec_block_dim0_offset + resi_y; + for(int j=0; j<(e2 - s2); j++){ + *(final_data_pos++) = *(block_data_pos++); + } + } + free(dec_block_data); +} + +void decompressDataSeries_float_3D_decompression_given_areas_with_blocked_regression(float** data, size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, size_t e2, size_t e3, unsigned char* comp_data){ + + // size_t dim0_offset = r2 * r3; + // size_t dim1_offset = r3; + + unsigned char * comp_data_pos = comp_data; + + size_t block_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + // calculate block dims + size_t num_x, num_y, num_z; + num_x = (r1 - 1) / block_size + 1; + num_y = (r2 - 1) / block_size + 1; + num_z = (r3 - 1) / block_size + 1; + + size_t max_num_block_elements = block_size * block_size * block_size; + size_t num_blocks = num_x * num_y * num_z; + + double realPrecision = bytesToDouble(comp_data_pos); + comp_data_pos += sizeof(double); + unsigned int intervals = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + //updateQuantizationInfo(intervals); + + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + int stateNum = 2*intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,comp_data_pos+sizeof(int), nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + float mean; + unsigned char use_mean; + memcpy(&use_mean, comp_data_pos, sizeof(unsigned char)); + comp_data_pos += sizeof(unsigned char); + memcpy(&mean, comp_data_pos, sizeof(float)); + comp_data_pos += sizeof(float); + size_t reg_count = 0; + + unsigned char * indicator; + size_t indicator_bitlength = (num_blocks - 1)/8 + 1; + convertByteArray2IntArray_fast_1b(num_blocks, comp_data_pos, indicator_bitlength, &indicator); + comp_data_pos += indicator_bitlength; + for(size_t i=0; i 0){ + for(int i=0; i<4; i++){ + precision[i] = bytesToDouble(comp_data_pos); + comp_data_pos += sizeof(double); + coeff_intvRadius[i] = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + int stateNum = 2*coeff_intvRadius[i]*2; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+sizeof(int), nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + coeff_type[i] = coeff_result_type + i * num_blocks; + size_t typeArray_size = bytesToSize(comp_data_pos); + decode(comp_data_pos + sizeof(size_t), reg_count, root, coeff_type[i]); + comp_data_pos += sizeof(size_t) + typeArray_size; + int coeff_unpred_count = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + coeff_unpred_data[i] = (float *) comp_data_pos; + comp_data_pos += coeff_unpred_count * sizeof(float); + SZ_ReleaseHuffman(huffmanTree); + } + } + float last_coefficients[4] = {0.0}; + int coeff_unpred_data_count[4] = {0}; + // decompress coeffcients + float * reg_params = (float *) malloc(4*num_blocks*sizeof(float)); + memset(reg_params, 0, 4*num_blocks*sizeof(float)); + float * reg_params_pos = reg_params; + size_t coeff_index = 0; + for(size_t i=0; i= r1) break; + block_data_pos_y = block_data_pos_x; + for(int jj=0; jj= r2) break; + block_data_pos_z = block_data_pos_y; + for(int kk=0; kk= r3) break; + *block_data_pos_z = data_pos[ii*dec_buffer_size*dec_buffer_size + jj*dec_buffer_size + kk]; + block_data_pos_z ++; + } + block_data_pos_y += dec_block_dim1_offset; + } + block_data_pos_x += dec_block_dim0_offset; + } + + } + } + } + + } + else{ + for(size_t i=sx; i= r1) break; + block_data_pos_y = block_data_pos_x; + for(int jj=0; jj= r2) break; + block_data_pos_z = block_data_pos_y; + for(int kk=0; kk= r3) break; + *block_data_pos_z = data_pos[ii*dec_buffer_size*dec_buffer_size + jj*dec_buffer_size + kk]; + block_data_pos_z ++; + } + block_data_pos_y += dec_block_dim1_offset; + } + block_data_pos_x += dec_block_dim0_offset; + } + + } + } + } + } + free(unpred_offset); + free(reg_params); + free(blockwise_unpred_count); + free(dec_buffer); + free(coeff_result_type); + + free(indicator); + free(result_type); + + // extract data + int resi_x = s1 % block_size; + int resi_y = s2 % block_size; + int resi_z = s3 % block_size; + *data = (float*) malloc(sizeof(float)*(e1 - s1) * (e2 - s2) * (e3 - s3)); + float * final_data_pos = *data; + for(int i=0; i<(e1 - s1); i++){ + for(int j=0; j<(e2 - s2); j++){ + float * block_data_pos = dec_block_data + (i+resi_x)*dec_block_dim0_offset + (j+resi_y)*dec_block_dim1_offset + resi_z; + for(int k=0; k<(e3 - s3); k++){ + *(final_data_pos++) = *(block_data_pos++); + } + } + } + free(dec_block_data); + +} + +int SZ_decompress_args_randomaccess_float(float** newData, +size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, +size_t s5, size_t s4, size_t s3, size_t s2, size_t s1, // start point +size_t e5, size_t e4, size_t e3, size_t e2, size_t e1, // end point +unsigned char* cmpBytes, size_t cmpSize) +{ + if(confparams_dec==NULL) + confparams_dec = (sz_params*)malloc(sizeof(sz_params)); + memset(confparams_dec, 0, sizeof(sz_params)); + if(exe_params==NULL) + exe_params = (sz_exedata*)malloc(sizeof(sz_exedata)); + memset(exe_params, 0, sizeof(sz_exedata)); + + int x = 1; + char *y = (char*)&x; + if(*y==1) + sysEndianType = LITTLE_ENDIAN_SYSTEM; + else //=0 + sysEndianType = BIG_ENDIAN_SYSTEM; + + confparams_dec->randomAccess = 1; + + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + + //unsigned char* tmpBytes; + size_t targetUncompressSize = dataLength <<2; //i.e., *4 + //tmpSize must be "much" smaller than dataLength + size_t i, tmpSize = 8+MetaDataByteLength+exe_params->SZ_SIZE_TYPE; + unsigned char* szTmpBytes; + + if(cmpSize!=8+4+MetaDataByteLength && cmpSize!=8+8+MetaDataByteLength) //4,8 means two posibilities of SZ_SIZE_TYPE + { + confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); + if(confparams_dec->szMode!=SZ_TEMPORAL_COMPRESSION) + { + if(confparams_dec->losslessCompressor!=-1) + confparams_dec->szMode = SZ_BEST_COMPRESSION; + else + confparams_dec->szMode = SZ_BEST_SPEED; + } + + if(confparams_dec->szMode==SZ_BEST_SPEED) + { + tmpSize = cmpSize; + szTmpBytes = cmpBytes; + } + else if(confparams_dec->szMode==SZ_BEST_COMPRESSION || confparams_dec->szMode==SZ_DEFAULT_COMPRESSION || confparams_dec->szMode==SZ_TEMPORAL_COMPRESSION) + { + if(targetUncompressSizelosslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize + } + else + { + printf("Wrong value of confparams_dec->szMode in the double compressed bytes.\n"); + status = SZ_MERR; + return status; + } + } + else + szTmpBytes = cmpBytes; + + TightDataPointStorageF* tdps; + new_TightDataPointStorageF_fromFlatBytes(&tdps, szTmpBytes, tmpSize); + + int dim = computeDimension(r5,r4,r3,r2,r1); + int floatSize = sizeof(float); + if(tdps->isLossless) + { + *newData = (float*)malloc(floatSize*dataLength); + if(sysEndianType==BIG_ENDIAN_SYSTEM) + { + memcpy(*newData, szTmpBytes+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, dataLength*floatSize); + } + else + { + unsigned char* p = szTmpBytes+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE; + for(i=0;irandomAccess == 0 && (s1+s2+s3+s4+s5>0 || (r5-e5+r4-e4+r3-e3+r2-e2+r1-e1 > 0))) + { + printf("Error: you specified the random access mode for decompression, but the compressed data were generate in the non-random-access way.!\n"); + status = SZ_DERR; + } + else if (dim == 1) + { + //printf("Error: random access mode doesn't support 1D yet, but only 3D.\n"); + decompressDataSeries_float_1D_decompression_given_areas_with_blocked_regression(newData, r1, s1, e1, tdps->raBytes); + //status = SZ_DERR; + } + else if(dim == 2) + { + //printf("Error: random access mode doesn't support 2D yet, but only 3D.\n"); + decompressDataSeries_float_2D_decompression_given_areas_with_blocked_regression(newData, r2, r1, s2, s1, e2, e1, tdps->raBytes); + //status = SZ_DERR; + } + else if(dim == 3) + { + decompressDataSeries_float_3D_decompression_given_areas_with_blocked_regression(newData, r3, r2, r1, s3, s2, s1, e3, e2, e1, tdps->raBytes); + status = SZ_SCES; + } + else if(dim == 4) + { + printf("Error: random access mode doesn't support 4D yet, but only 3D.\n"); + status = SZ_DERR; + } + else + { + printf("Error: currently support only at most 4 dimensions!\n"); + status = SZ_DERR; + } + } + + free_TightDataPointStorageF2(tdps); + if(confparams_dec->szMode!=SZ_BEST_SPEED && cmpSize!=8+MetaDataByteLength+exe_params->SZ_SIZE_TYPE) + free(szTmpBytes); + return status; +} +#endif diff --git a/deps/SZ/sz/src/szd_float_pwr.c b/deps/SZ/sz/src/szd_float_pwr.c new file mode 100644 index 0000000000000000000000000000000000000000..46b8f1d7e719a73b7f412921a54cd59241fad41c --- /dev/null +++ b/deps/SZ/sz/src/szd_float_pwr.c @@ -0,0 +1,1528 @@ +/** + * @file szd_float_pwr.c + * @author Sheng Di, Dingwen Tao, Xin Liang, Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang + * @date Feb., 2019 + * @brief + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include +#include "TightDataPointStorageF.h" +#include "CompressElement.h" +#include "sz.h" +#include "Huffman.h" +#include "sz_float_pwr.h" +#include "utility.h" +//#include "rw.h" +// +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wchar-subscripts" + + +void decompressDataSeries_float_1D_pwr(float** data, size_t dataSeriesLength, TightDataPointStorageF* tdps) +{ + updateQuantizationInfo(tdps->intervals); + unsigned char tmpPrecBytes[4] = {0}; //used when needing to convert bytes to float values + unsigned char* bp = tdps->pwrErrBoundBytes; + size_t i, j, k = 0, p = 0, l = 0; // k is to track the location of residual_bit + // in resiMidBits, p is to track the + // byte_index of resiMidBits, l is for + // leadNum + unsigned char* leadNum; + float interval = 0;// = (float)tdps->realPrecision*2; + + convertByteArray2IntArray_fast_2b(tdps->exactDataNum, tdps->leadNumArray, tdps->leadNumArray_size, &leadNum); + + *data = (float*)malloc(sizeof(float)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + //sdi:Debug + //writeUShortData(type, dataSeriesLength, "decompressStateBytes.sb"); + + unsigned char preBytes[4]; + unsigned char curBytes[4]; + + memset(preBytes, 0, 4); + + size_t curByteIndex = 0; + int reqLength = 0, reqBytesLength = 0, resiBitsLength = 0, resiBits = 0; + unsigned char leadingNum; + float medianValue, exactData, predValue = 0, realPrecision = 0; + + medianValue = tdps->medianValue; + + int type_, updateReqLength = 0; + for (i = 0; i < dataSeriesLength; i++) + { + if(i%tdps->segment_size==0) + { + tmpPrecBytes[0] = *(bp++); + tmpPrecBytes[1] = *(bp++); + tmpPrecBytes[2] = 0; + tmpPrecBytes[3] = 0; + realPrecision = bytesToFloat(tmpPrecBytes); + interval = realPrecision*2; + updateReqLength = 0; + } + + type_ = type[i]; + switch (type_) { + case 0: + // compute resiBits + if(updateReqLength==0) + { + computeReqLength_float(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[i] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + break; + default: + //predValue = 2 * (*data)[i-1] - (*data)[i-2]; + predValue = (*data)[i-1]; + (*data)[i] = predValue + (type_-exe_params->intvRadius)*interval; + break; + } + //printf("%.30G\n",(*data)[i]); + } + free(leadNum); + free(type); + return; +} + +float* extractRealPrecision_2D_float(size_t R1, size_t R2, int blockSize, TightDataPointStorageF* tdps) +{ + size_t i,j,k=0, I; + unsigned char* bytes = tdps->pwrErrBoundBytes; + unsigned char tmpBytes[4] = {0}; + float* result = (float*)malloc(sizeof(float)*R1*R2); + for(i=0;iintervals); + //printf("tdps->intervals=%d, exe_params->intvRadius=%d\n", tdps->intervals, exe_params->intvRadius); + + size_t j, k = 0, p = 0, l = 0; // k is to track the location of residual_bit + // in resiMidBits, p is to track the + // byte_index of resiMidBits, l is for + // leadNum + size_t dataSeriesLength = r1*r2; + // printf ("%d %d\n", r1, r2); + + unsigned char* leadNum; + + convertByteArray2IntArray_fast_2b(tdps->exactDataNum, tdps->leadNumArray, tdps->leadNumArray_size, &leadNum); + + *data = (float*)malloc(sizeof(float)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + unsigned char preBytes[4]; + unsigned char curBytes[4]; + + memset(preBytes, 0, 4); + + size_t curByteIndex = 0; + int reqLength, reqBytesLength, resiBitsLength, resiBits; + unsigned char leadingNum; + float medianValue, exactData, realPrecision; + int type_; + float pred1D, pred2D; + size_t ii, jj, II = 0, JJ = 0, updateReqLength = 1; + + int blockSize = computeBlockEdgeSize_2D(tdps->segment_size); + size_t R1 = 1+(r1-1)/blockSize; + size_t R2 = 1+(r2-1)/blockSize; + float* pwrErrBound = extractRealPrecision_2D_float(R1, R2, blockSize, tdps); + + realPrecision = pwrErrBound[0]; + computeReqLength_float(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + + /* Process Row-0, data 0 */ + + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[0] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + + /* Process Row-0, data 1 */ + type_ = type[1]; + if (type_ != 0) + { + pred1D = (*data)[0]; + (*data)[1] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[1] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + + /* Process Row-0, data 2 --> data r2-1 */ + for (jj = 2; jj < r2; jj++) + { + if(jj%blockSize==0) + { + II = 0; + JJ++; + realPrecision = pwrErrBound[JJ]; + updateReqLength = 0; + } + + type_ = type[jj]; + if (type_ != 0) + { + pred1D = 2*(*data)[jj-1] - (*data)[jj-2]; + (*data)[jj] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // compute resiBits + if(updateReqLength==0) + { + computeReqLength_float(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[jj] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + } + + size_t index; + /* Process Row-1 --> Row-r1-1 */ + for (ii = 1; ii < r1; ii++) + { + /* Process row-ii data 0 */ + if(ii%blockSize==0) + II++; + JJ = 0; + realPrecision = pwrErrBound[II*R2+JJ]; + updateReqLength = 0; + + index = ii*r2; + + type_ = type[index]; + if (type_ != 0) + { + pred1D = (*data)[index-r2]; + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // compute resiBits + if(updateReqLength==0) + { + computeReqLength_float(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + + /* Process row-ii data 1 --> r2-1*/ + for (jj = 1; jj < r2; jj++) + { + index = ii*r2+jj; + + if(jj%blockSize==0) + JJ++; + realPrecision = pwrErrBound[II*R2+JJ]; + updateReqLength = 0; + + type_ = type[index]; + if (type_ != 0) + { + pred2D = (*data)[index-1] + (*data)[index-r2] - (*data)[index-r2-1]; + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // compute resiBits + if(updateReqLength==0) + { + computeReqLength_float(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + } + } + + free(pwrErrBound); + free(leadNum); + free(type); + return; +} + +float* extractRealPrecision_3D_float(size_t R1, size_t R2, size_t R3, int blockSize, TightDataPointStorageF* tdps) +{ + size_t i,j,k=0, IR, JR, p = 0; + size_t R23 = R2*R3; + unsigned char* bytes = tdps->pwrErrBoundBytes; + unsigned char tmpBytes[4] = {0}; + float* result = (float*)malloc(sizeof(float)*R1*R2*R3); + for(i=0;iintervals); + size_t j, k = 0, p = 0, l = 0; // k is to track the location of residual_bit + // in resiMidBits, p is to track the + // byte_index of resiMidBits, l is for + // leadNum + size_t dataSeriesLength = r1*r2*r3; + size_t r23 = r2*r3; +// printf ("%d %d %d\n", r1, r2, r3); + unsigned char* leadNum; + + convertByteArray2IntArray_fast_2b(tdps->exactDataNum, tdps->leadNumArray, tdps->leadNumArray_size, &leadNum); + + *data = (float*)malloc(sizeof(float)*dataSeriesLength); + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + unsigned char preBytes[4]; + unsigned char curBytes[4]; + + memset(preBytes, 0, 4); + size_t curByteIndex = 0; + int reqLength, reqBytesLength, resiBitsLength, resiBits; + unsigned char leadingNum; + float medianValue, exactData, realPrecision; + int type_; + float pred1D, pred2D, pred3D; + size_t ii, jj, kk, II = 0, JJ = 0, KK = 0, updateReqLength = 1; + + int blockSize = computeBlockEdgeSize_3D(tdps->segment_size); + size_t R1 = 1+(r1-1)/blockSize; + size_t R2 = 1+(r2-1)/blockSize; + size_t R3 = 1+(r3-1)/blockSize; + size_t R23 = R2*R3; + float* pwrErrBound = extractRealPrecision_3D_float(R1, R2, R3, blockSize, tdps); + + realPrecision = pwrErrBound[0]; + computeReqLength_float(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + exactData = bytesToFloat(curBytes); + (*data)[0] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + + /* Process Row-0, data 1 */ + pred1D = (*data)[0]; + + type_ = type[1]; + if (type_ != 0) + { + (*data)[1] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[1] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + /* Process Row-0, data 2 --> data r3-1 */ + for (jj = 2; jj < r3; jj++) + { + if(jj%blockSize==0) + { + KK = 0;//dimension 1 (top) + II = 0;//dimension 2 (mid) + JJ++; + realPrecision = pwrErrBound[JJ]; + updateReqLength = 0; + } + type_ = type[jj]; + if (type_ != 0) + { + pred1D = 2*(*data)[jj-1] - (*data)[jj-2]; + (*data)[jj] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // compute resiBits + if(updateReqLength==0) + { + computeReqLength_float(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[jj] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + } + size_t index; + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process row-ii data 0 */ + if(ii%blockSize==0) + II++; + JJ = 0; + realPrecision = pwrErrBound[II*R3+JJ]; + updateReqLength = 0; + + index = ii*r3; + + type_ = type[index]; + if (type_ != 0) + { + pred1D = (*data)[index-r3]; + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // compute resiBits + if(updateReqLength==0) + { + computeReqLength_float(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + + /* Process row-ii data 1 --> r3-1*/ + for (jj = 1; jj < r3; jj++) + { + index = ii*r3+jj; + + if(jj%blockSize==0) + JJ++; + realPrecision = pwrErrBound[II*R3+JJ]; + updateReqLength = 0; + + type_ = type[index]; + if (type_ != 0) + { + pred2D = (*data)[index-1] + (*data)[index-r3] - (*data)[index-r3-1]; + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // compute resiBits + if(updateReqLength==0) + { + computeReqLength_float(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + } + } + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (kk = 1; kk < r1; kk++) + { + /* Process Row-0 data 0*/ + index = kk*r23; + if(kk%blockSize==0) + KK++; + II = 0; + JJ = 0; + + realPrecision = pwrErrBound[KK*R23]; + updateReqLength = 0; + + type_ = type[index]; + if (type_ != 0) + { + pred1D = (*data)[index-r23]; + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // compute resiBits + if(updateReqLength==0) + { + computeReqLength_float(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + + /* Process Row-0 data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23+jj; + + if(jj%blockSize==0) + JJ++; + + realPrecision = pwrErrBound[KK*R23+JJ]; + updateReqLength = 0; + + type_ = type[index]; + if (type_ != 0) + { + pred2D = (*data)[index-1] + (*data)[index-r23] - (*data)[index-r23-1]; + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // compute resiBits + if(updateReqLength==0) + { + computeReqLength_float(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + } + + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process Row-i data 0 */ + index = kk*r23 + ii*r3; + + if(ii%blockSize==0) + II++; + JJ = 0; + + realPrecision = pwrErrBound[KK*R23+II*R3]; + updateReqLength = 0; + + type_ = type[index]; + if (type_ != 0) + { + pred2D = (*data)[index-r3] + (*data)[index-r23] - (*data)[index-r23-r3]; + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // compute resiBits + if(updateReqLength==0) + { + computeReqLength_float(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + + /* Process Row-i data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23 + ii*r3 + jj; + if(jj%blockSize==0) + JJ++; + + realPrecision = pwrErrBound[KK*R23+II*R3+JJ]; + updateReqLength = 0; + + type_ = type[index]; + if (type_ != 0) + { + pred3D = (*data)[index-1] + (*data)[index-r3] + (*data)[index-r23] + - (*data)[index-r3-1] - (*data)[index-r23-r3] - (*data)[index-r23-1] + (*data)[index-r23-r3-1]; + (*data)[index] = pred3D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // compute resiBits + if(updateReqLength==0) + { + computeReqLength_float(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[index] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } + } + } + + } + + free(pwrErrBound); + free(leadNum); + free(type); + return; +} + +void decompressDataSeries_float_1D_pwrgroup(float** data, size_t dataSeriesLength, TightDataPointStorageF* tdps) +{ + float *posGroups, *negGroups, *groups; + float pos_01_group, neg_01_group; + int *posFlags, *negFlags; + + updateQuantizationInfo(tdps->intervals); + + unsigned char* leadNum; + double interval;// = (float)tdps->realPrecision*2; + + convertByteArray2IntArray_fast_2b(tdps->exactDataNum, tdps->leadNumArray, tdps->leadNumArray_size, &leadNum); + + *data = (float*)malloc(sizeof(float)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + createRangeGroups_float(&posGroups, &negGroups, &posFlags, &negFlags); + + float realGroupPrecision; + float realPrecision = tdps->realPrecision; + char* groupID = decompressGroupIDArray(tdps->pwrErrBoundBytes, tdps->dataSeriesLength); + + //note that the groupID values here are [1,2,3,....,18] or [-1,-2,...,-18] + + double* groupErrorBounds = generateGroupErrBounds(confparams_dec->errorBoundMode, realPrecision, confparams_dec->pw_relBoundRatio); + exe_params->intvRadius = generateGroupMaxIntervalCount(groupErrorBounds); + + size_t nbBins = (size_t)(1/confparams_dec->pw_relBoundRatio + 0.5); + if(nbBins%2==1) + nbBins++; + exe_params->intvRadius = nbBins; + + unsigned char preBytes[4]; + unsigned char curBytes[4]; + + memset(preBytes, 0, 4); + + size_t curByteIndex = 0; + int reqLength, reqBytesLength = 0, resiBitsLength = 0, resiBits; + unsigned char leadingNum; + float medianValue, exactData, curValue, predValue; + + medianValue = tdps->medianValue; + + size_t i, j, k = 0, p = 0, l = 0; // k is to track the location of residual_bit + // in resiMidBits, p is to track the + // byte_index of resiMidBits, l is for + // leadNum + + int type_, updateReqLength = 0; + char rawGrpID = 0, indexGrpID = 0; + for (i = 0; i < dataSeriesLength; i++) + { + rawGrpID = groupID[i]; + + if(rawGrpID >= 2) + { + groups = posGroups; + indexGrpID = rawGrpID - 2; + } + else if(rawGrpID <= -2) + { + groups = negGroups; + indexGrpID = -rawGrpID - 2; } + else if(rawGrpID == 1) + { + groups = &pos_01_group; + indexGrpID = 0; + } + else //rawGrpID == -1 + { + groups = &neg_01_group; + indexGrpID = 0; + } + + type_ = type[i]; + switch (type_) { + case 0: + // compute resiBits + if(updateReqLength==0) + { + computeReqLength_float(realPrecision, tdps->radExpo, &reqLength, &medianValue); + reqBytesLength = reqLength/8; + resiBitsLength = reqLength%8; + updateReqLength = 1; + } + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + exactData = exactData + medianValue; + (*data)[i] = exactData; + memcpy(preBytes,curBytes,4); + + groups[indexGrpID] = exactData; + + break; + default: + predValue = groups[indexGrpID]; //Here, groups[indexGrpID] is the previous value. + realGroupPrecision = groupErrorBounds[indexGrpID]; + interval = realGroupPrecision*2; + + curValue = predValue + (type_-exe_params->intvRadius)*interval; + + //groupNum = computeGroupNum_float(curValue); + + if((curValue>0&&rawGrpID<0)||(curValue<0&&rawGrpID>0)) + curValue = 0; + //else + //{ + // realGrpID = fabs(rawGrpID)-2; + // if(groupNum0?pow(2,realGrpID):-pow(2,realGrpID); + // else if(groupNum>realGrpID) + // curValue = rawGrpID>0?pow(2,groupNum):-pow(2,groupNum); + //} + + (*data)[i] = curValue; + groups[indexGrpID] = curValue; + break; + } + } + + free(leadNum); + free(type); + + free(posGroups); + free(negGroups); + free(posFlags); + free(negFlags); + free(groupErrorBounds); + free(groupID); +} + +void decompressDataSeries_float_1D_pwr_pre_log(float** data, size_t dataSeriesLength, TightDataPointStorageF* tdps) { + + decompressDataSeries_float_1D(data, dataSeriesLength, NULL, tdps); + float threshold = tdps->minLogValue; + if(tdps->pwrErrBoundBytes_size > 0){ + unsigned char * signs; + sz_lossless_decompress(ZSTD_COMPRESSOR, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength); + for(size_t i=0; iminLogValue; + if(tdps->pwrErrBoundBytes_size > 0){ + unsigned char * signs; + sz_lossless_decompress(ZSTD_COMPRESSOR, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength); + for(size_t i=0; iminLogValue; + if(tdps->pwrErrBoundBytes_size > 0){ + unsigned char * signs; + sz_lossless_decompress(ZSTD_COMPRESSOR, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength); + for(size_t i=0; iminLogValue; + uint32_t* ptr; + + if(tdps->pwrErrBoundBytes_size > 0){ + unsigned char * signs = NULL; + if(tdps->pwrErrBoundBytes_size==0) + { + signs = (unsigned char*)malloc(dataSeriesLength); + memset(signs, 0, dataSeriesLength); + } + else + sz_lossless_decompress(ZSTD_COMPRESSOR, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength); + for(size_t i=0; i= 0){ + (*data)[i] = 0; + continue; + } + if(signs[i]){ + ptr = (uint32_t*)(*data) + i; + *ptr |= 0x80000000; + } + } + free(signs); + } + else{ + for(size_t i=0; iminLogValue; + uint32_t* ptr; + + if(tdps->pwrErrBoundBytes_size > 0){ + unsigned char * signs; + if(tdps->pwrErrBoundBytes_size==0) + { + signs = (unsigned char*)malloc(dataSeriesLength); + memset(signs, 0, dataSeriesLength); + } + else + sz_lossless_decompress(ZSTD_COMPRESSOR, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength); + for(size_t i=0; i= 0){ + (*data)[i] = 0; + continue; + } + if(signs[i]){ + ptr = (uint32_t*)(*data) + i; + *ptr |= 0x80000000; + } + } + free(signs); + } + else{ + for(size_t i=0; iminLogValue; + if(tdps->pwrErrBoundBytes_size > 0){ + unsigned char * signs; + uint32_t* ptr; + if(tdps->pwrErrBoundBytes_size==0) + { + signs = (unsigned char*)malloc(dataSeriesLength); + memset(signs, 0, dataSeriesLength); + } + else + sz_lossless_decompress(ZSTD_COMPRESSOR, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength); + for(size_t i=0; i= 0) { + (*data)[i] = 0; + continue; + } + if(signs[i]) { + ptr = (uint32_t*)(*data)+i; + *ptr |= 0x80000000; + } + } + free(signs); + } + else{ + for(size_t i=0; i +#include +#include +#include "szd_float.h" +#include "TightDataPointStorageF.h" +#include "sz.h" +#include "Huffman.h" +#include "szd_float_ts.h" + +void decompressDataSeries_float_1D_ts(float** data, size_t dataSeriesLength, float* hist_data, TightDataPointStorageF* tdps) +{ + float* lastSnapshotData = hist_data; + updateQuantizationInfo(tdps->intervals); + size_t i, j, k = 0, p = 0, l = 0; // k is to track the location of residual_bit + // in resiMidBits, p is to track the + // byte_index of resiMidBits, l is for + // leadNum + unsigned char* leadNum; + double interval = tdps->realPrecision*2; + + convertByteArray2IntArray_fast_2b(tdps->exactDataNum, tdps->leadNumArray, tdps->leadNumArray_size, &leadNum); + + *data = (float*)malloc(sizeof(float)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + unsigned char preBytes[4]; + unsigned char curBytes[4]; + + memset(preBytes, 0, 4); + + size_t curByteIndex = 0; + int reqBytesLength, resiBitsLength, resiBits; + unsigned char leadingNum; + float medianValue, exactData, predValue = 0; + + reqBytesLength = tdps->reqLength/8; + resiBitsLength = tdps->reqLength%8; + medianValue = tdps->medianValue; + + int type_; + for (i = 0; i < dataSeriesLength; i++) { + type_ = type[i]; + switch (type_) { + case 0: + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (tdps->residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((tdps->residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (tdps->residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = tdps->exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*data)[i] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + break; + default: + //predValue = (*data)[i-1]; + if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) + predValue = lastSnapshotData[i]; + (*data)[i] = predValue + (type_-exe_params->intvRadius)*interval; + break; + } + //printf("%.30G\n",(*data)[i]); + } + + memcpy(hist_data, (*data), dataSeriesLength*sizeof(float)); + + free(leadNum); + free(type); + return; +} diff --git a/deps/SZ/sz/src/szd_int16.c b/deps/SZ/sz/src/szd_int16.c new file mode 100644 index 0000000000000000000000000000000000000000..b751c3f3dc968686ee3ffded667b1532914a0f31 --- /dev/null +++ b/deps/SZ/sz/src/szd_int16.c @@ -0,0 +1,924 @@ +/** + * @file szd_int16.c + * @author Sheng Di + * @date Aug, 2017 + * @brief + * (C) 2017 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include +#include "TightDataPointStorageI.h" +#include "sz.h" +#include "szd_int16.h" +#include "Huffman.h" +#include "utility.h" + +/** + * + * + * @return status SUCCESSFUL (SZ_SCES) or not (other error codes) f + * */ +int SZ_decompress_args_int16(int16_t** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize) +{ + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + + //unsigned char* tmpBytes; + size_t targetUncompressSize = dataLength <<2; //i.e., *4 + //tmpSize must be "much" smaller than dataLength + size_t i, tmpSize = 3+MetaDataByteLength+1+sizeof(int16_t)+exe_params->SZ_SIZE_TYPE; + unsigned char* szTmpBytes; + + if(cmpSize!=4+2+4+MetaDataByteLength && cmpSize!=4+2+8+MetaDataByteLength) + { + confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); + if(confparams_dec->losslessCompressor!=-1) + confparams_dec->szMode = SZ_BEST_COMPRESSION; + else + confparams_dec->szMode = SZ_BEST_SPEED; + if(confparams_dec->szMode==SZ_BEST_SPEED) + { + tmpSize = cmpSize; + szTmpBytes = cmpBytes; + } + else if(confparams_dec->szMode==SZ_BEST_COMPRESSION || confparams_dec->szMode==SZ_DEFAULT_COMPRESSION) + { + if(targetUncompressSizelosslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize + //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); + //memcpy(szTmpBytes, tmpBytes, tmpSize); + //free(tmpBytes); //release useless memory + } + else + { + printf("Wrong value of confparams_dec->szMode in the double compressed bytes.\n"); + status = SZ_MERR; + return status; + } + } + else + szTmpBytes = cmpBytes; + //TODO: convert szTmpBytes to data array. + TightDataPointStorageI* tdps; + int errBoundMode = new_TightDataPointStorageI_fromFlatBytes(&tdps, szTmpBytes, tmpSize); + //writeByteData(tdps->typeArray, tdps->typeArray_size, "decompress-typebytes.tbt"); + int dim = computeDimension(r5,r4,r3,r2,r1); + int intSize = sizeof(int16_t); + if(tdps->isLossless) + { + *newData = (int16_t*)malloc(intSize*dataLength); + if(sysEndianType==BIG_ENDIAN_SYSTEM) + { + memcpy(*newData, szTmpBytes+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, dataLength*intSize); + } + else + { + unsigned char* p = szTmpBytes+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE; + for(i=0;isol_ID==SZ_Transpose) + { + getSnapshotData_int16_1D(newData,dataLength,tdps, errBoundMode); + } + else //confparams_dec->sol_ID==SZ + { + if (dim == 1) + getSnapshotData_int16_1D(newData,r1,tdps, errBoundMode); + else + if (dim == 2) + getSnapshotData_int16_2D(newData,r2,r1,tdps, errBoundMode); + else + if (dim == 3) + getSnapshotData_int16_3D(newData,r3,r2,r1,tdps, errBoundMode); + else + if (dim == 4) + getSnapshotData_int16_4D(newData,r4,r3,r2,r1,tdps, errBoundMode); + else + { + printf("Error: currently support only at most 4 dimensions!\n"); + status = SZ_DERR; + } + } + + free_TightDataPointStorageI2(tdps); + if(confparams_dec->szMode!=SZ_BEST_SPEED && cmpSize!=4+sizeof(int16_t)+exe_params->SZ_SIZE_TYPE+MetaDataByteLength) + free(szTmpBytes); + return status; +} + + +void decompressDataSeries_int16_1D(int16_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + size_t i; + double interval = tdps->realPrecision*2; + + *data = (int16_t*)malloc(sizeof(int16_t)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + //sdi:Debug + //writeUShortData(type, dataSeriesLength, "decompressStateBytes.sb"); + + long predValue, tmp; + int16_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_INT16); + if(rightShiftBits<0) + { + printf("Error: rightShift < 0!\n"); + exit(0); + } + int type_; + for (i = 0; i < dataSeriesLength; i++) { + type_ = type[i]; + switch (type_) { + case 0: + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt16_bigEndian(curBytes); + exactData = (uint16_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[i] = exactData + minValue; + break; + default: + //predValue = 2 * (*data)[i-1] - (*data)[i-2]; + predValue = (*data)[i-1]; + tmp = predValue + (type_-exe_params->intvRadius)*interval; + if(tmp >= SZ_INT16_MIN&&tmpintervals); + //printf("tdps->intervals=%d, exe_params->intvRadius=%d\n", tdps->intervals, exe_params->intvRadius); + + size_t dataSeriesLength = r1*r2; + // printf ("%d %d\n", r1, r2); + + double realPrecision = tdps->realPrecision; + + *data = (int16_t*)malloc(sizeof(int16_t)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + int16_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_INT16); + + long pred1D, pred2D, tmp; + size_t ii, jj; + + /* Process Row-0, data 0 */ + + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt16_bigEndian(curBytes); + exactData = (uint16_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[0] = exactData + minValue; + + /* Process Row-0, data 1 */ + int type_ = type[1]; + if (type_ != 0) + { + pred1D = (*data)[0]; + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[1] = exactData + minValue; + } + + /* Process Row-0, data 2 --> data r2-1 */ + for (jj = 2; jj < r2; jj++) + { + type_ = type[jj]; + if (type_ != 0) + { + pred1D = 2*(*data)[jj-1] - (*data)[jj-2]; + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[jj] = exactData + minValue; + } + } + + size_t index; + /* Process Row-1 --> Row-r1-1 */ + for (ii = 1; ii < r1; ii++) + { + /* Process row-ii data 0 */ + index = ii*r2; + + type_ = type[index]; + if (type_ != 0) + { + pred1D = (*data)[index-r2]; + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process row-ii data 1 --> r2-1*/ + for (jj = 1; jj < r2; jj++) + { + index = ii*r2+jj; + pred2D = (*data)[index-1] + (*data)[index-r2] - (*data)[index-r2-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + + free(type); + return; +} + +void decompressDataSeries_int16_3D(int16_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + size_t dataSeriesLength = r1*r2*r3; + size_t r23 = r2*r3; +// printf ("%d %d %d\n", r1, r2, r3); + double realPrecision = tdps->realPrecision; + + *data = (int16_t*)malloc(sizeof(int16_t)*dataSeriesLength); + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + int16_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_INT16); + + long pred1D, pred2D, pred3D, tmp; + size_t ii, jj, kk; + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt16_bigEndian(curBytes); + exactData = (uint16_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[0] = exactData + minValue; + + /* Process Row-0, data 1 */ + pred1D = (*data)[0]; + + int type_ = type[1]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[1] = exactData + minValue; + } + /* Process Row-0, data 2 --> data r3-1 */ + for (jj = 2; jj < r3; jj++) + { + pred1D = 2*(*data)[jj-1] - (*data)[jj-2]; + + type_ = type[jj]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[jj] = exactData + minValue; + } + } + + size_t index; + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process row-ii data 0 */ + index = ii*r3; + pred1D = (*data)[index-r3]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process row-ii data 1 --> r3-1*/ + for (jj = 1; jj < r3; jj++) + { + index = ii*r3+jj; + pred2D = (*data)[index-1] + (*data)[index-r3] - (*data)[index-r3-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (kk = 1; kk < r1; kk++) + { + /* Process Row-0 data 0*/ + index = kk*r23; + pred1D = (*data)[index-r23]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-0 data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23+jj; + pred2D = (*data)[index-1] + (*data)[index-r23] - (*data)[index-r23-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process Row-i data 0 */ + index = kk*r23 + ii*r3; + pred2D = (*data)[index-r3] + (*data)[index-r23] - (*data)[index-r23-r3]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-i data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23 + ii*r3 + jj; + pred3D = (*data)[index-1] + (*data)[index-r3] + (*data)[index-r23] + - (*data)[index-r3-1] - (*data)[index-r23-r3] - (*data)[index-r23-1] + (*data)[index-r23-r3-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred3D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + } + + free(type); + return; +} + + +void decompressDataSeries_int16_4D(int16_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + size_t dataSeriesLength = r1*r2*r3*r4; + size_t r234 = r2*r3*r4; + size_t r34 = r3*r4; + + double realPrecision = tdps->realPrecision; + + *data = (int16_t*)malloc(sizeof(int16_t)*dataSeriesLength); + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + int16_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_INT16); + + int type_; + + long pred1D, pred2D, pred3D, tmp; + size_t ii, jj, kk, ll; + size_t index; + + for (ll = 0; ll < r1; ll++) + { + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + index = ll*r234; + + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt16_bigEndian(curBytes); + exactData = (uint16_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + + /* Process Row-0, data 1 */ + index = ll*r234+1; + + pred1D = (*data)[index-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-0, data 2 --> data r4-1 */ + for (jj = 2; jj < r4; jj++) + { + index = ll*r234+jj; + + pred1D = 2*(*data)[index-1] - (*data)[index-2]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (ii = 1; ii < r3; ii++) + { + /* Process row-ii data 0 */ + index = ll*r234+ii*r4; + + pred1D = (*data)[index-r4]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process row-ii data 1 --> r4-1*/ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+ii*r4+jj; + + pred2D = (*data)[index-1] + (*data)[index-r4] - (*data)[index-r4-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + + /////////////////////////// Process layer-1 --> layer-r2-1 /////////////////////////// + + for (kk = 1; kk < r2; kk++) + { + /* Process Row-0 data 0*/ + index = ll*r234+kk*r34; + + pred1D = (*data)[index-r34]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-0 data 1 --> data r4-1 */ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+kk*r34+jj; + + pred2D = (*data)[index-1] + (*data)[index-r34] - (*data)[index-r34-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (ii = 1; ii < r3; ii++) + { + /* Process Row-i data 0 */ + index = ll*r234+kk*r34+ii*r4; + + pred2D = (*data)[index-r4] + (*data)[index-r34] - (*data)[index-r34-r4]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-i data 1 --> data r4-1 */ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+kk*r34+ii*r4+jj; + + pred3D = (*data)[index-1] + (*data)[index-r4] + (*data)[index-r34] + - (*data)[index-r4-1] - (*data)[index-r34-r4] - (*data)[index-r34-1] + (*data)[index-r34-r4-1]; + + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred3D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + } + } + + free(type); + return; +} + +void getSnapshotData_int16_1D(int16_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + + if (tdps->allSameData) { + int16_t value = bytesToInt16_bigEndian(tdps->exactDataBytes); + *data = (int16_t*)malloc(sizeof(int16_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_int16_1D(data, dataSeriesLength, tdps); + } +} + +void getSnapshotData_int16_2D(int16_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + size_t dataSeriesLength = r1*r2; + if (tdps->allSameData) { + int16_t value = bytesToInt16_bigEndian(tdps->exactDataBytes); + *data = (int16_t*)malloc(sizeof(int16_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_int16_2D(data, r1, r2, tdps); + } +} + +void getSnapshotData_int16_3D(int16_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + size_t dataSeriesLength = r1*r2*r3; + if (tdps->allSameData) { + int16_t value = bytesToInt16_bigEndian(tdps->exactDataBytes); + *data = (int16_t*)malloc(sizeof(int16_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_int16_3D(data, r1, r2, r3, tdps); + } +} + +void getSnapshotData_int16_4D(int16_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + size_t dataSeriesLength = r1*r2*r3*r4; + if (tdps->allSameData) { + int16_t value = bytesToInt16_bigEndian(tdps->exactDataBytes); + *data = (int16_t*)malloc(sizeof(int16_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_int16_4D(data, r1, r2, r3, r4, tdps); + } +} diff --git a/deps/SZ/sz/src/szd_int32.c b/deps/SZ/sz/src/szd_int32.c new file mode 100644 index 0000000000000000000000000000000000000000..b5f31b09aba44de0a1cc1687cb07bd405f2136b1 --- /dev/null +++ b/deps/SZ/sz/src/szd_int32.c @@ -0,0 +1,789 @@ +/** + * @file szd_int32.c + * @author Sheng Di + * @date Aug, 2017 + * @brief + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include +#include "TightDataPointStorageI.h" +#include "sz.h" +#include "szd_int32.h" +#include "Huffman.h" +#include "utility.h" + +/** + * + * + * @return status SUCCESSFUL (SZ_SCES) or not (other error codes) f + * */ +int SZ_decompress_args_int32(int32_t** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize) +{ + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + + //unsigned char* tmpBytes; + size_t targetUncompressSize = dataLength <<2; //i.e., *4 + //tmpSize must be "much" smaller than dataLength + size_t i, tmpSize = 3+MetaDataByteLength+1+sizeof(int32_t)+exe_params->SZ_SIZE_TYPE; + unsigned char* szTmpBytes; + + if(cmpSize!=4+4+4+MetaDataByteLength && cmpSize!=4+4+8+MetaDataByteLength) + { + confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); + if(confparams_dec->losslessCompressor!=-1) + confparams_dec->szMode = SZ_BEST_COMPRESSION; + else + confparams_dec->szMode = SZ_BEST_SPEED; + if(confparams_dec->szMode==SZ_BEST_SPEED) + { + tmpSize = cmpSize; + szTmpBytes = cmpBytes; + } + else if(confparams_dec->szMode==SZ_BEST_COMPRESSION || confparams_dec->szMode==SZ_DEFAULT_COMPRESSION) + { + if(targetUncompressSizelosslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize + //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); + //memcpy(szTmpBytes, tmpBytes, tmpSize); + //free(tmpBytes); //release useless memory + } + else + { + printf("Wrong value of confparams_dec->szMode in the double compressed bytes.\n"); + status = SZ_MERR; + return status; + } + } + else + szTmpBytes = cmpBytes; + //TODO: convert szTmpBytes to data array. + TightDataPointStorageI* tdps; + int errBoundMode = new_TightDataPointStorageI_fromFlatBytes(&tdps, szTmpBytes, tmpSize); + //writeByteData(tdps->typeArray, tdps->typeArray_size, "decompress-typebytes.tbt"); + int dim = computeDimension(r5,r4,r3,r2,r1); + int intSize = sizeof(int32_t); + if(tdps->isLossless) + { + *newData = (int32_t*)malloc(intSize*dataLength); + if(sysEndianType==BIG_ENDIAN_SYSTEM) + { + memcpy(*newData, szTmpBytes+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, dataLength*intSize); + } + else + { + unsigned char* p = szTmpBytes+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE; + for(i=0;iszMode!=SZ_BEST_SPEED && cmpSize!=4+sizeof(int32_t)+exe_params->SZ_SIZE_TYPE+MetaDataByteLength) + free(szTmpBytes); + return status; +} + + +void decompressDataSeries_int32_1D(int32_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + size_t i; + double interval = tdps->realPrecision*2; + + *data = (int32_t*)malloc(sizeof(int32_t)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + //sdi:Debug + //writeUShortData(type, dataSeriesLength, "decompressStateBytes.sb"); + + int32_t minValue, exactData, predValue; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_INT32); + if(rightShiftBits<0) + { + printf("Error: rightShift < 0!\n"); + exit(0); + } + int type_; + for (i = 0; i < dataSeriesLength; i++) { + type_ = type[i]; + switch (type_) { + case 0: + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[i] = exactData + minValue; + break; + default: + //predValue = 2 * (*data)[i-1] - (*data)[i-2]; + predValue = (*data)[i-1]; + (*data)[i] = predValue + (type_-exe_params->intvRadius)*interval; + break; + } + //printf("%.30G\n",(*data)[i]); + } + free(type); + return; +} + +void decompressDataSeries_int32_2D(int32_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + //printf("tdps->intervals=%d, exe_params->intvRadius=%d\n", tdps->intervals, exe_params->intvRadius); + + size_t dataSeriesLength = r1*r2; + // printf ("%d %d\n", r1, r2); + + double realPrecision = tdps->realPrecision; + + *data = (int32_t*)malloc(sizeof(int32_t)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + int32_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_INT32); + + int32_t pred1D, pred2D; + size_t ii, jj; + + /* Process Row-0, data 0 */ + + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[0] = exactData + minValue; + + /* Process Row-0, data 1 */ + int type_ = type[1]; + if (type_ != 0) + { + pred1D = (*data)[0]; + (*data)[1] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[1] = exactData + minValue; + } + + /* Process Row-0, data 2 --> data r2-1 */ + for (jj = 2; jj < r2; jj++) + { + type_ = type[jj]; + if (type_ != 0) + { + pred1D = 2*(*data)[jj-1] - (*data)[jj-2]; + (*data)[jj] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[jj] = exactData + minValue; + } + } + + size_t index; + /* Process Row-1 --> Row-r1-1 */ + for (ii = 1; ii < r1; ii++) + { + /* Process row-ii data 0 */ + index = ii*r2; + + type_ = type[index]; + if (type_ != 0) + { + pred1D = (*data)[index-r2]; + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process row-ii data 1 --> r2-1*/ + for (jj = 1; jj < r2; jj++) + { + index = ii*r2+jj; + pred2D = (*data)[index-1] + (*data)[index-r2] - (*data)[index-r2-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + + free(type); + return; +} + +void decompressDataSeries_int32_3D(int32_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + size_t dataSeriesLength = r1*r2*r3; + size_t r23 = r2*r3; +// printf ("%d %d %d\n", r1, r2, r3); + double realPrecision = tdps->realPrecision; + + *data = (int32_t*)malloc(sizeof(int32_t)*dataSeriesLength); + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + int32_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_INT32); + + int32_t pred1D, pred2D, pred3D; + size_t ii, jj, kk; + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[0] = exactData + minValue; + + /* Process Row-0, data 1 */ + pred1D = (*data)[0]; + + int type_ = type[1]; + if (type_ != 0) + { + (*data)[1] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[1] = exactData + minValue; + } + /* Process Row-0, data 2 --> data r3-1 */ + for (jj = 2; jj < r3; jj++) + { + pred1D = 2*(*data)[jj-1] - (*data)[jj-2]; + + type_ = type[jj]; + if (type_ != 0) + { + (*data)[jj] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[jj] = exactData + minValue; + } + } + + size_t index; + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process row-ii data 0 */ + index = ii*r3; + pred1D = (*data)[index-r3]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process row-ii data 1 --> r3-1*/ + for (jj = 1; jj < r3; jj++) + { + index = ii*r3+jj; + pred2D = (*data)[index-1] + (*data)[index-r3] - (*data)[index-r3-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (kk = 1; kk < r1; kk++) + { + /* Process Row-0 data 0*/ + index = kk*r23; + pred1D = (*data)[index-r23]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-0 data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23+jj; + pred2D = (*data)[index-1] + (*data)[index-r23] - (*data)[index-r23-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process Row-i data 0 */ + index = kk*r23 + ii*r3; + pred2D = (*data)[index-r3] + (*data)[index-r23] - (*data)[index-r23-r3]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-i data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23 + ii*r3 + jj; + pred3D = (*data)[index-1] + (*data)[index-r3] + (*data)[index-r23] + - (*data)[index-r3-1] - (*data)[index-r23-r3] - (*data)[index-r23-1] + (*data)[index-r23-r3-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred3D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + } + + free(type); + return; +} + + +void decompressDataSeries_int32_4D(int32_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + size_t dataSeriesLength = r1*r2*r3*r4; + size_t r234 = r2*r3*r4; + size_t r34 = r3*r4; + + double realPrecision = tdps->realPrecision; + + *data = (int32_t*)malloc(sizeof(int32_t)*dataSeriesLength); + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + int32_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_INT32); + + int type_; + + int32_t pred1D, pred2D, pred3D; + size_t ii, jj, kk, ll; + size_t index; + + for (ll = 0; ll < r1; ll++) + { + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + index = ll*r234; + + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + + /* Process Row-0, data 1 */ + index = ll*r234+1; + + pred1D = (*data)[index-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-0, data 2 --> data r4-1 */ + for (jj = 2; jj < r4; jj++) + { + index = ll*r234+jj; + + pred1D = 2*(*data)[index-1] - (*data)[index-2]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (ii = 1; ii < r3; ii++) + { + /* Process row-ii data 0 */ + index = ll*r234+ii*r4; + + pred1D = (*data)[index-r4]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process row-ii data 1 --> r4-1*/ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+ii*r4+jj; + + pred2D = (*data)[index-1] + (*data)[index-r4] - (*data)[index-r4-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + + /////////////////////////// Process layer-1 --> layer-r2-1 /////////////////////////// + + for (kk = 1; kk < r2; kk++) + { + /* Process Row-0 data 0*/ + index = ll*r234+kk*r34; + + pred1D = (*data)[index-r34]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-0 data 1 --> data r4-1 */ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+kk*r34+jj; + + pred2D = (*data)[index-1] + (*data)[index-r34] - (*data)[index-r34-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (ii = 1; ii < r3; ii++) + { + /* Process Row-i data 0 */ + index = ll*r234+kk*r34+ii*r4; + + pred2D = (*data)[index-r4] + (*data)[index-r34] - (*data)[index-r34-r4]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-i data 1 --> data r4-1 */ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+kk*r34+ii*r4+jj; + + pred3D = (*data)[index-1] + (*data)[index-r4] + (*data)[index-r34] + - (*data)[index-r4-1] - (*data)[index-r34-r4] - (*data)[index-r34-1] + (*data)[index-r34-r4-1]; + + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred3D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + } + } + + free(type); + return; +} + +void getSnapshotData_int32_1D(int32_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + + if (tdps->allSameData) { + int32_t value = bytesToInt32_bigEndian(tdps->exactDataBytes); + *data = (int32_t*)malloc(sizeof(int32_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_int32_1D(data, dataSeriesLength, tdps); + } +} + +void getSnapshotData_int32_2D(int32_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + size_t dataSeriesLength = r1*r2; + if (tdps->allSameData) { + int32_t value = bytesToInt32_bigEndian(tdps->exactDataBytes); + *data = (int32_t*)malloc(sizeof(int32_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_int32_2D(data, r1, r2, tdps); + } +} + +void getSnapshotData_int32_3D(int32_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + size_t dataSeriesLength = r1*r2*r3; + if (tdps->allSameData) { + int32_t value = bytesToInt32_bigEndian(tdps->exactDataBytes); + *data = (int32_t*)malloc(sizeof(int32_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_int32_3D(data, r1, r2, r3, tdps); + } +} + +void getSnapshotData_int32_4D(int32_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + size_t dataSeriesLength = r1*r2*r3*r4; + if (tdps->allSameData) { + int32_t value = bytesToInt32_bigEndian(tdps->exactDataBytes); + *data = (int32_t*)malloc(sizeof(int32_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_int32_4D(data, r1, r2, r3, r4, tdps); + } +} diff --git a/deps/SZ/sz/src/szd_int64.c b/deps/SZ/sz/src/szd_int64.c new file mode 100644 index 0000000000000000000000000000000000000000..07a054f54a196f31fe6e9b3ab1eafc532cdad4cf --- /dev/null +++ b/deps/SZ/sz/src/szd_int64.c @@ -0,0 +1,789 @@ +/** + * @file szd_int64.c + * @author Sheng Di + * @date Aug, 2017 + * @brief + * (C) 2017 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include +#include "TightDataPointStorageI.h" +#include "sz.h" +#include "szd_int64.h" +#include "Huffman.h" +#include "utility.h" + +/** + * + * + * @return status SUCCESSFUL (SZ_SCES) or not (other error codes) f + * */ +int SZ_decompress_args_int64(int64_t** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize) +{ + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + + //unsigned char* tmpBytes; + size_t targetUncompressSize = dataLength <<2; //i.e., *4 + //tmpSize must be "much" smaller than dataLength + size_t i, tmpSize = 3+MetaDataByteLength+1+sizeof(int64_t)+exe_params->SZ_SIZE_TYPE; + unsigned char* szTmpBytes; + + if(cmpSize!=4+8+4+MetaDataByteLength && cmpSize!=4+8+8+MetaDataByteLength) + { + confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); + if(confparams_dec->losslessCompressor!=-1) + confparams_dec->szMode = SZ_BEST_COMPRESSION; + else + confparams_dec->szMode = SZ_BEST_SPEED; + if(confparams_dec->szMode==SZ_BEST_SPEED) + { + tmpSize = cmpSize; + szTmpBytes = cmpBytes; + } + else if(confparams_dec->szMode==SZ_BEST_COMPRESSION || confparams_dec->szMode==SZ_DEFAULT_COMPRESSION) + { + if(targetUncompressSizelosslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize + //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); + //memcpy(szTmpBytes, tmpBytes, tmpSize); + //free(tmpBytes); //release useless memory + } + else + { + printf("Wrong value of confparams_dec->szMode in the double compressed bytes.\n"); + status = SZ_MERR; + return status; + } + } + else + szTmpBytes = cmpBytes; + //TODO: convert szTmpBytes to data array. + TightDataPointStorageI* tdps; + int errBoundMode = new_TightDataPointStorageI_fromFlatBytes(&tdps, szTmpBytes, tmpSize); + //writeByteData(tdps->typeArray, tdps->typeArray_size, "decompress-typebytes.tbt"); + int dim = computeDimension(r5,r4,r3,r2,r1); + int intSize = sizeof(int64_t); + if(tdps->isLossless) + { + *newData = (int64_t*)malloc(intSize*dataLength); + if(sysEndianType==BIG_ENDIAN_SYSTEM) + { + memcpy(*newData, szTmpBytes+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, dataLength*intSize); + } + else + { + unsigned char* p = szTmpBytes+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE; + for(i=0;iszMode!=SZ_BEST_SPEED && cmpSize!=4+sizeof(int64_t)+exe_params->SZ_SIZE_TYPE+MetaDataByteLength) + free(szTmpBytes); + return status; +} + + +void decompressDataSeries_int64_1D(int64_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + size_t i; + double interval = tdps->realPrecision*2; + + *data = (int64_t*)malloc(sizeof(int64_t)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + //sdi:Debug + //writeUShortData(type, dataSeriesLength, "decompressStateBytes.sb"); + + int64_t minValue, exactData, predValue; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_INT64); + if(rightShiftBits<0) + { + printf("Error: rightShift < 0!\n"); + exit(0); + } + int type_; + for (i = 0; i < dataSeriesLength; i++) { + type_ = type[i]; + switch (type_) { + case 0: + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[i] = exactData + minValue; + break; + default: + //predValue = 2 * (*data)[i-1] - (*data)[i-2]; + predValue = (*data)[i-1]; + (*data)[i] = predValue + (type_-exe_params->intvRadius)*interval; + break; + } + //printf("%.30G\n",(*data)[i]); + } + free(type); + return; +} + +void decompressDataSeries_int64_2D(int64_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + //printf("tdps->intervals=%d, exe_params->intvRadius=%d\n", tdps->intervals, exe_params->intvRadius); + + size_t dataSeriesLength = r1*r2; + // printf ("%d %d\n", r1, r2); + + double realPrecision = tdps->realPrecision; + + *data = (int64_t*)malloc(sizeof(int64_t)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + int64_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_INT64); + + int64_t pred1D, pred2D; + size_t ii, jj; + + /* Process Row-0, data 0 */ + + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[0] = exactData + minValue; + + /* Process Row-0, data 1 */ + int type_ = type[1]; + if (type_ != 0) + { + pred1D = (*data)[0]; + (*data)[1] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[1] = exactData + minValue; + } + + /* Process Row-0, data 2 --> data r2-1 */ + for (jj = 2; jj < r2; jj++) + { + type_ = type[jj]; + if (type_ != 0) + { + pred1D = 2*(*data)[jj-1] - (*data)[jj-2]; + (*data)[jj] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[jj] = exactData + minValue; + } + } + + size_t index; + /* Process Row-1 --> Row-r1-1 */ + for (ii = 1; ii < r1; ii++) + { + /* Process row-ii data 0 */ + index = ii*r2; + + type_ = type[index]; + if (type_ != 0) + { + pred1D = (*data)[index-r2]; + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process row-ii data 1 --> r2-1*/ + for (jj = 1; jj < r2; jj++) + { + index = ii*r2+jj; + pred2D = (*data)[index-1] + (*data)[index-r2] - (*data)[index-r2-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + + free(type); + return; +} + +void decompressDataSeries_int64_3D(int64_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + size_t dataSeriesLength = r1*r2*r3; + size_t r23 = r2*r3; +// printf ("%d %d %d\n", r1, r2, r3); + double realPrecision = tdps->realPrecision; + + *data = (int64_t*)malloc(sizeof(int64_t)*dataSeriesLength); + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + int64_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_INT64); + + int64_t pred1D, pred2D, pred3D; + size_t ii, jj, kk; + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[0] = exactData + minValue; + + /* Process Row-0, data 1 */ + pred1D = (*data)[0]; + + int type_ = type[1]; + if (type_ != 0) + { + (*data)[1] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[1] = exactData + minValue; + } + /* Process Row-0, data 2 --> data r3-1 */ + for (jj = 2; jj < r3; jj++) + { + pred1D = 2*(*data)[jj-1] - (*data)[jj-2]; + + type_ = type[jj]; + if (type_ != 0) + { + (*data)[jj] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[jj] = exactData + minValue; + } + } + + size_t index; + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process row-ii data 0 */ + index = ii*r3; + pred1D = (*data)[index-r3]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process row-ii data 1 --> r3-1*/ + for (jj = 1; jj < r3; jj++) + { + index = ii*r3+jj; + pred2D = (*data)[index-1] + (*data)[index-r3] - (*data)[index-r3-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (kk = 1; kk < r1; kk++) + { + /* Process Row-0 data 0*/ + index = kk*r23; + pred1D = (*data)[index-r23]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-0 data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23+jj; + pred2D = (*data)[index-1] + (*data)[index-r23] - (*data)[index-r23-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process Row-i data 0 */ + index = kk*r23 + ii*r3; + pred2D = (*data)[index-r3] + (*data)[index-r23] - (*data)[index-r23-r3]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-i data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23 + ii*r3 + jj; + pred3D = (*data)[index-1] + (*data)[index-r3] + (*data)[index-r23] + - (*data)[index-r3-1] - (*data)[index-r23-r3] - (*data)[index-r23-1] + (*data)[index-r23-r3-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred3D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + } + + free(type); + return; +} + + +void decompressDataSeries_int64_4D(int64_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + size_t dataSeriesLength = r1*r2*r3*r4; + size_t r234 = r2*r3*r4; + size_t r34 = r3*r4; + + double realPrecision = tdps->realPrecision; + + *data = (int64_t*)malloc(sizeof(int64_t)*dataSeriesLength); + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + int64_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_INT64); + + int type_; + + int64_t pred1D, pred2D, pred3D; + size_t ii, jj, kk, ll; + size_t index; + + for (ll = 0; ll < r1; ll++) + { + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + index = ll*r234; + + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + + /* Process Row-0, data 1 */ + index = ll*r234+1; + + pred1D = (*data)[index-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-0, data 2 --> data r4-1 */ + for (jj = 2; jj < r4; jj++) + { + index = ll*r234+jj; + + pred1D = 2*(*data)[index-1] - (*data)[index-2]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (ii = 1; ii < r3; ii++) + { + /* Process row-ii data 0 */ + index = ll*r234+ii*r4; + + pred1D = (*data)[index-r4]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process row-ii data 1 --> r4-1*/ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+ii*r4+jj; + + pred2D = (*data)[index-1] + (*data)[index-r4] - (*data)[index-r4-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + + /////////////////////////// Process layer-1 --> layer-r2-1 /////////////////////////// + + for (kk = 1; kk < r2; kk++) + { + /* Process Row-0 data 0*/ + index = ll*r234+kk*r34; + + pred1D = (*data)[index-r34]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-0 data 1 --> data r4-1 */ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+kk*r34+jj; + + pred2D = (*data)[index-1] + (*data)[index-r34] - (*data)[index-r34-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (ii = 1; ii < r3; ii++) + { + /* Process Row-i data 0 */ + index = ll*r234+kk*r34+ii*r4; + + pred2D = (*data)[index-r4] + (*data)[index-r34] - (*data)[index-r34-r4]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-i data 1 --> data r4-1 */ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+kk*r34+ii*r4+jj; + + pred3D = (*data)[index-1] + (*data)[index-r4] + (*data)[index-r34] + - (*data)[index-r4-1] - (*data)[index-r34-r4] - (*data)[index-r34-1] + (*data)[index-r34-r4-1]; + + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred3D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + } + } + + free(type); + return; +} + +void getSnapshotData_int64_1D(int64_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + + if (tdps->allSameData) { + int64_t value = bytesToInt64_bigEndian(tdps->exactDataBytes); + *data = (int64_t*)malloc(sizeof(int64_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_int64_1D(data, dataSeriesLength, tdps); + } +} + +void getSnapshotData_int64_2D(int64_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + size_t dataSeriesLength = r1*r2; + if (tdps->allSameData) { + int64_t value = bytesToInt64_bigEndian(tdps->exactDataBytes); + *data = (int64_t*)malloc(sizeof(int64_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_int64_2D(data, r1, r2, tdps); + } +} + +void getSnapshotData_int64_3D(int64_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + size_t dataSeriesLength = r1*r2*r3; + if (tdps->allSameData) { + int64_t value = bytesToInt64_bigEndian(tdps->exactDataBytes); + *data = (int64_t*)malloc(sizeof(int64_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_int64_3D(data, r1, r2, r3, tdps); + } +} + +void getSnapshotData_int64_4D(int64_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + size_t dataSeriesLength = r1*r2*r3*r4; + if (tdps->allSameData) { + int64_t value = bytesToInt64_bigEndian(tdps->exactDataBytes); + *data = (int64_t*)malloc(sizeof(int64_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_int64_4D(data, r1, r2, r3, r4, tdps); + } +} diff --git a/deps/SZ/sz/src/szd_int8.c b/deps/SZ/sz/src/szd_int8.c new file mode 100644 index 0000000000000000000000000000000000000000..850b4595b7501e8651c43efa3b48d77b5f4f12eb --- /dev/null +++ b/deps/SZ/sz/src/szd_int8.c @@ -0,0 +1,913 @@ +/** + * @file szd_int8.c + * @author Sheng Di + * @date Aug, 2017 + * @brief + * (C) 2017 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include +#include "TightDataPointStorageI.h" +#include "sz.h" +#include "szd_int8.h" +#include "Huffman.h" +#include "utility.h" + +/** + * + * + * @return status SUCCESSFUL (SZ_SCES) or not (other error codes) f + * */ +int SZ_decompress_args_int8(int8_t** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize) +{ + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + + //unsigned char* tmpBytes; + size_t targetUncompressSize = dataLength <<2; //i.e., *4 + //tmpSize must be "much" smaller than dataLength + size_t i, tmpSize = 3+MetaDataByteLength+1+sizeof(int8_t)+exe_params->SZ_SIZE_TYPE; + unsigned char* szTmpBytes; + + if(cmpSize!=4+1+4+MetaDataByteLength && cmpSize!=4+1+8+MetaDataByteLength) + { + confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); + if(confparams_dec->losslessCompressor!=-1) + confparams_dec->szMode = SZ_BEST_COMPRESSION; + else + confparams_dec->szMode = SZ_BEST_SPEED; + if(confparams_dec->szMode==SZ_BEST_SPEED) + { + tmpSize = cmpSize; + szTmpBytes = cmpBytes; + } + else if(confparams_dec->szMode==SZ_BEST_COMPRESSION || confparams_dec->szMode==SZ_DEFAULT_COMPRESSION) + { + if(targetUncompressSizelosslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize + //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); + //memcpy(szTmpBytes, tmpBytes, tmpSize); + //free(tmpBytes); //release useless memory + } + else + { + printf("Wrong value of confparams_dec->szMode in the double compressed bytes.\n"); + status = SZ_MERR; + return status; + } + } + else + szTmpBytes = cmpBytes; + //TODO: convert szTmpBytes to data array. + TightDataPointStorageI* tdps; + int errBoundMode = new_TightDataPointStorageI_fromFlatBytes(&tdps, szTmpBytes, tmpSize); + //writeByteData(tdps->typeArray, tdps->typeArray_size, "decompress-typebytes.tbt"); + int dim = computeDimension(r5,r4,r3,r2,r1); + int intSize = sizeof(int8_t); + if(tdps->isLossless) + { + *newData = (int8_t*)malloc(intSize*dataLength); + if(sysEndianType==BIG_ENDIAN_SYSTEM) + { + memcpy(*newData, szTmpBytes+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, dataLength*intSize); + } + else + { + unsigned char* p = szTmpBytes+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE; + for(i=0;iszMode!=SZ_BEST_SPEED && cmpSize!=4+sizeof(int8_t)+exe_params->SZ_SIZE_TYPE+MetaDataByteLength) + free(szTmpBytes); + return status; +} + + +void decompressDataSeries_int8_1D(int8_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + double interval = tdps->realPrecision*2; + + *data = (int8_t*)malloc(sizeof(int8_t)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + //sdi:Debug + //writeUShortData(type, dataSeriesLength, "decompressStateBytes.sb"); + + long predValue, tmp; + int8_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_INT8); + if(rightShiftBits<0) + { + printf("Error: rightShift < 0!\n"); + exit(0); + } + int type_; + for (size_t i = 0; i < dataSeriesLength; i++) { + type_ = type[i]; + switch (type_) { + case 0: + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = curBytes[0]; + exactData = (uint8_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[i] = exactData + minValue; + break; + default: + //predValue = 2 * (*data)[i-1] - (*data)[i-2]; + predValue = (*data)[i-1]; + tmp = predValue + (type_-exe_params->intvRadius)*interval; + if(tmp >= SZ_INT8_MIN&&tmpintervals); + //printf("tdps->intervals=%d, exe_params->intvRadius=%d\n", tdps->intervals, exe_params->intvRadius); + + size_t dataSeriesLength = r1*r2; + // printf ("%d %d\n", r1, r2); + + double realPrecision = tdps->realPrecision; + + *data = (int8_t*)malloc(sizeof(int8_t)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + int8_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_INT8); + + long pred1D, pred2D, tmp; + size_t ii, jj; + + /* Process Row-0, data 0 */ + + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = curBytes[0]; + exactData = (uint8_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[0] = exactData + minValue; + + /* Process Row-0, data 1 */ + int type_ = type[1]; + if (type_ != 0) + { + pred1D = (*data)[0]; + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[1] = exactData + minValue; + } + + /* Process Row-0, data 2 --> data r2-1 */ + for (jj = 2; jj < r2; jj++) + { + type_ = type[jj]; + if (type_ != 0) + { + pred1D = 2*(*data)[jj-1] - (*data)[jj-2]; + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[jj] = exactData + minValue; + } + } + + size_t index; + /* Process Row-1 --> Row-r1-1 */ + for (ii = 1; ii < r1; ii++) + { + /* Process row-ii data 0 */ + index = ii*r2; + + type_ = type[index]; + if (type_ != 0) + { + pred1D = (*data)[index-r2]; + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process row-ii data 1 --> r2-1*/ + for (jj = 1; jj < r2; jj++) + { + index = ii*r2+jj; + pred2D = (*data)[index-1] + (*data)[index-r2] - (*data)[index-r2-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + + free(type); + return; +} + +void decompressDataSeries_int8_3D(int8_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + size_t dataSeriesLength = r1*r2*r3; + size_t r23 = r2*r3; +// printf ("%d %d %d\n", r1, r2, r3); + double realPrecision = tdps->realPrecision; + + *data = (int8_t*)malloc(sizeof(int8_t)*dataSeriesLength); + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + int8_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_INT8); + + long pred1D, pred2D, pred3D, tmp; + size_t ii, jj, kk; + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = curBytes[0]; + exactData = (uint8_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[0] = exactData + minValue; + + /* Process Row-0, data 1 */ + pred1D = (*data)[0]; + + int type_ = type[1]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[1] = exactData + minValue; + } + /* Process Row-0, data 2 --> data r3-1 */ + for (jj = 2; jj < r3; jj++) + { + pred1D = 2*(*data)[jj-1] - (*data)[jj-2]; + + type_ = type[jj]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[jj] = exactData + minValue; + } + } + + size_t index; + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process row-ii data 0 */ + index = ii*r3; + pred1D = (*data)[index-r3]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process row-ii data 1 --> r3-1*/ + for (jj = 1; jj < r3; jj++) + { + index = ii*r3+jj; + pred2D = (*data)[index-1] + (*data)[index-r3] - (*data)[index-r3-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (kk = 1; kk < r1; kk++) + { + /* Process Row-0 data 0*/ + index = kk*r23; + pred1D = (*data)[index-r23]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-0 data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23+jj; + pred2D = (*data)[index-1] + (*data)[index-r23] - (*data)[index-r23-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process Row-i data 0 */ + index = kk*r23 + ii*r3; + pred2D = (*data)[index-r3] + (*data)[index-r23] - (*data)[index-r23-r3]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-i data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23 + ii*r3 + jj; + pred3D = (*data)[index-1] + (*data)[index-r3] + (*data)[index-r23] + - (*data)[index-r3-1] - (*data)[index-r23-r3] - (*data)[index-r23-1] + (*data)[index-r23-r3-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred3D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + } + + free(type); + return; +} + + +void decompressDataSeries_int8_4D(int8_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + size_t dataSeriesLength = r1*r2*r3*r4; + size_t r234 = r2*r3*r4; + size_t r34 = r3*r4; + + double realPrecision = tdps->realPrecision; + + *data = (int8_t*)malloc(sizeof(int8_t)*dataSeriesLength); + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + int8_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_INT8); + + int type_; + + long pred1D, pred2D, pred3D, tmp; + size_t ii, jj, kk, ll; + size_t index; + + for (ll = 0; ll < r1; ll++) + { + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + index = ll*r234; + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = curBytes[0]; + exactData = (uint8_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + + /* Process Row-0, data 1 */ + index = ll*r234+1; + + pred1D = (*data)[index-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-0, data 2 --> data r4-1 */ + for (jj = 2; jj < r4; jj++) + { + index = ll*r234+jj; + + pred1D = 2*(*data)[index-1] - (*data)[index-2]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (ii = 1; ii < r3; ii++) + { + /* Process row-ii data 0 */ + index = ll*r234+ii*r4; + + pred1D = (*data)[index-r4]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process row-ii data 1 --> r4-1*/ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+ii*r4+jj; + + pred2D = (*data)[index-1] + (*data)[index-r4] - (*data)[index-r4-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + + /////////////////////////// Process layer-1 --> layer-r2-1 /////////////////////////// + + for (kk = 1; kk < r2; kk++) + { + /* Process Row-0 data 0*/ + index = ll*r234+kk*r34; + + pred1D = (*data)[index-r34]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-0 data 1 --> data r4-1 */ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+kk*r34+jj; + + pred2D = (*data)[index-1] + (*data)[index-r34] - (*data)[index-r34-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (ii = 1; ii < r3; ii++) + { + /* Process Row-i data 0 */ + index = ll*r234+kk*r34+ii*r4; + + pred2D = (*data)[index-r4] + (*data)[index-r34] - (*data)[index-r34-r4]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-i data 1 --> data r4-1 */ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+kk*r34+ii*r4+jj; + + pred3D = (*data)[index-1] + (*data)[index-r4] + (*data)[index-r34] + - (*data)[index-r4-1] - (*data)[index-r34-r4] - (*data)[index-r34-1] + (*data)[index-r34-r4-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred3D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_INT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + } + } + + free(type); + return; +} + +void getSnapshotData_int8_1D(int8_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + + if (tdps->allSameData) { + int8_t value = tdps->exactDataBytes[0]; + *data = (int8_t*)malloc(sizeof(int8_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_int8_1D(data, dataSeriesLength, tdps); + } +} + +void getSnapshotData_int8_2D(int8_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + size_t dataSeriesLength = r1*r2; + if (tdps->allSameData) { + int8_t value = tdps->exactDataBytes[0]; + *data = (int8_t*)malloc(sizeof(int8_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_int8_2D(data, r1, r2, tdps); + } +} + +void getSnapshotData_int8_3D(int8_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + size_t dataSeriesLength = r1*r2*r3; + if (tdps->allSameData) { + int8_t value = tdps->exactDataBytes[0]; + *data = (int8_t*)malloc(sizeof(int8_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_int8_3D(data, r1, r2, r3, tdps); + } +} + +void getSnapshotData_int8_4D(int8_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + size_t dataSeriesLength = r1*r2*r3*r4; + if (tdps->allSameData) { + int8_t value = tdps->exactDataBytes[0]; + *data = (int8_t*)malloc(sizeof(int8_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_int8_4D(data, r1, r2, r3, r4, tdps); + } +} diff --git a/deps/SZ/sz/src/szd_uint16.c b/deps/SZ/sz/src/szd_uint16.c new file mode 100644 index 0000000000000000000000000000000000000000..ecf42302ab38dc85edb0cb4f2ff14fd4dbc9eb48 --- /dev/null +++ b/deps/SZ/sz/src/szd_uint16.c @@ -0,0 +1,922 @@ +/** + * @file szd_uint16.c + * @author Sheng Di + * @date Aug, 2017 + * @brief + * (C) 2017 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include +#include "TightDataPointStorageI.h" +#include "sz.h" +#include "szd_uint16.h" +#include "Huffman.h" +#include "utility.h" + +/** + * + * + * @return status SUCCESSFUL (SZ_SCES) or not (other error codes) f + * */ +int SZ_decompress_args_uint16(uint16_t** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize) +{ + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + + //unsigned char* tmpBytes; + size_t targetUncompressSize = dataLength <<2; //i.e., *4 + //tmpSize must be "much" smaller than dataLength + size_t i, tmpSize = 3+MetaDataByteLength+1+sizeof(uint16_t)+exe_params->SZ_SIZE_TYPE; + unsigned char* szTmpBytes; + + if(cmpSize!=4+2+4+MetaDataByteLength && cmpSize!=4+2+8+MetaDataByteLength) + { + confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); + if(confparams_dec->losslessCompressor!=-1) + confparams_dec->szMode = SZ_BEST_COMPRESSION; + else + confparams_dec->szMode = SZ_BEST_SPEED; + if(confparams_dec->szMode==SZ_BEST_SPEED) + { + tmpSize = cmpSize; + szTmpBytes = cmpBytes; + } + else if(confparams_dec->szMode==SZ_BEST_COMPRESSION || confparams_dec->szMode==SZ_DEFAULT_COMPRESSION) + { + if(targetUncompressSizelosslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize + //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); + //memcpy(szTmpBytes, tmpBytes, tmpSize); + //free(tmpBytes); //release useless memory + } + else + { + printf("Wrong value of confparams_dec->szMode in the double compressed bytes.\n"); + status = SZ_MERR; + return status; + } + } + else + szTmpBytes = cmpBytes; + //TODO: convert szTmpBytes to data array. + TightDataPointStorageI* tdps; + int errBoundMode = new_TightDataPointStorageI_fromFlatBytes(&tdps, szTmpBytes, tmpSize); + //writeByteData(tdps->typeArray, tdps->typeArray_size, "decompress-typebytes.tbt"); + int dim = computeDimension(r5,r4,r3,r2,r1); + int intSize = sizeof(uint16_t); + if(tdps->isLossless) + { + *newData = (uint16_t*)malloc(intSize*dataLength); + if(sysEndianType==BIG_ENDIAN_SYSTEM) + { + memcpy(*newData, szTmpBytes+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, dataLength*intSize); + } + else + { + unsigned char* p = szTmpBytes+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE; + for(i=0;isol_ID==SZ_Transpose) + { + getSnapshotData_uint16_1D(newData,dataLength,tdps, errBoundMode); + } + else //confparams_dec->sol_ID==SZ + { + if (dim == 1) + getSnapshotData_uint16_1D(newData,r1,tdps, errBoundMode); + else + if (dim == 2) + getSnapshotData_uint16_2D(newData,r2,r1,tdps, errBoundMode); + else + if (dim == 3) + getSnapshotData_uint16_3D(newData,r3,r2,r1,tdps, errBoundMode); + else + if (dim == 4) + getSnapshotData_uint16_4D(newData,r4,r3,r2,r1,tdps, errBoundMode); + else + { + printf("Error: currently support only at most 4 dimensions!\n"); + status = SZ_DERR; + } + } + free_TightDataPointStorageI2(tdps); + if(confparams_dec->szMode!=SZ_BEST_SPEED && cmpSize!=4+sizeof(uint16_t)+exe_params->SZ_SIZE_TYPE+MetaDataByteLength) + free(szTmpBytes); + return status; +} + + +void decompressDataSeries_uint16_1D(uint16_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + double interval = tdps->realPrecision*2; + + *data = (uint16_t*)malloc(sizeof(uint16_t)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + //sdi:Debug + //writeUShortData(type, dataSeriesLength, "decompressStateBytes.sb"); + + long predValue, tmp; + uint16_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_UINT16); + if(rightShiftBits<0) + { + printf("Error: rightShift < 0!\n"); + exit(0); + } + int type_; + for (size_t i = 0; i < dataSeriesLength; i++) { + type_ = type[i]; + switch (type_) { + case 0: + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt16_bigEndian(curBytes); + exactData = (uint16_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[i] = exactData + minValue; + break; + default: + //predValue = 2 * (*data)[i-1] - (*data)[i-2]; + predValue = (*data)[i-1]; + tmp = predValue + (type_-exe_params->intvRadius)*interval; + if(tmp >= SZ_UINT16_MIN&&tmpintervals); + //printf("tdps->intervals=%d, exe_params->intvRadius=%d\n", tdps->intervals, exe_params->intvRadius); + + size_t dataSeriesLength = r1*r2; + // printf ("%d %d\n", r1, r2); + + double realPrecision = tdps->realPrecision; + + *data = (uint16_t*)malloc(sizeof(uint16_t)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + uint16_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_UINT16); + + long pred1D, pred2D, tmp; + size_t ii, jj; + + /* Process Row-0, data 0 */ + + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt16_bigEndian(curBytes); + exactData = (uint16_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[0] = exactData + minValue; + + /* Process Row-0, data 1 */ + int type_ = type[1]; + if (type_ != 0) + { + pred1D = (*data)[0]; + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[1] = exactData + minValue; + } + + /* Process Row-0, data 2 --> data r2-1 */ + for (jj = 2; jj < r2; jj++) + { + type_ = type[jj]; + if (type_ != 0) + { + pred1D = 2*(*data)[jj-1] - (*data)[jj-2]; + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[jj] = exactData + minValue; + } + } + + size_t index; + /* Process Row-1 --> Row-r1-1 */ + for (ii = 1; ii < r1; ii++) + { + /* Process row-ii data 0 */ + index = ii*r2; + + type_ = type[index]; + if (type_ != 0) + { + pred1D = (*data)[index-r2]; + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process row-ii data 1 --> r2-1*/ + for (jj = 1; jj < r2; jj++) + { + index = ii*r2+jj; + pred2D = (*data)[index-1] + (*data)[index-r2] - (*data)[index-r2-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + + free(type); + return; +} + +void decompressDataSeries_uint16_3D(uint16_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + size_t dataSeriesLength = r1*r2*r3; + size_t r23 = r2*r3; +// printf ("%d %d %d\n", r1, r2, r3); + double realPrecision = tdps->realPrecision; + + *data = (uint16_t*)malloc(sizeof(uint16_t)*dataSeriesLength); + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + uint16_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_UINT16); + + long pred1D, pred2D, pred3D, tmp; + size_t ii, jj, kk; + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt16_bigEndian(curBytes); + exactData = (uint16_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[0] = exactData + minValue; + + /* Process Row-0, data 1 */ + pred1D = (*data)[0]; + + int type_ = type[1]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[1] = exactData + minValue; + } + /* Process Row-0, data 2 --> data r3-1 */ + for (jj = 2; jj < r3; jj++) + { + pred1D = 2*(*data)[jj-1] - (*data)[jj-2]; + + type_ = type[jj]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[jj] = exactData + minValue; + } + } + + size_t index; + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process row-ii data 0 */ + index = ii*r3; + pred1D = (*data)[index-r3]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process row-ii data 1 --> r3-1*/ + for (jj = 1; jj < r3; jj++) + { + index = ii*r3+jj; + pred2D = (*data)[index-1] + (*data)[index-r3] - (*data)[index-r3-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (kk = 1; kk < r1; kk++) + { + /* Process Row-0 data 0*/ + index = kk*r23; + pred1D = (*data)[index-r23]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-0 data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23+jj; + pred2D = (*data)[index-1] + (*data)[index-r23] - (*data)[index-r23-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process Row-i data 0 */ + index = kk*r23 + ii*r3; + pred2D = (*data)[index-r3] + (*data)[index-r23] - (*data)[index-r23-r3]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-i data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23 + ii*r3 + jj; + pred3D = (*data)[index-1] + (*data)[index-r3] + (*data)[index-r23] + - (*data)[index-r3-1] - (*data)[index-r23-r3] - (*data)[index-r23-1] + (*data)[index-r23-r3-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred3D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + } + + free(type); + return; +} + + +void decompressDataSeries_uint16_4D(uint16_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + size_t dataSeriesLength = r1*r2*r3*r4; + size_t r234 = r2*r3*r4; + size_t r34 = r3*r4; + + double realPrecision = tdps->realPrecision; + + *data = (uint16_t*)malloc(sizeof(uint16_t)*dataSeriesLength); + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + uint16_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_UINT16); + + int type_; + + long pred1D, pred2D, pred3D, tmp; + size_t ii, jj, kk, ll; + size_t index; + + for (ll = 0; ll < r1; ll++) + { + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + index = ll*r234; + + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt16_bigEndian(curBytes); + exactData = (uint16_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + + /* Process Row-0, data 1 */ + index = ll*r234+1; + + pred1D = (*data)[index-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-0, data 2 --> data r4-1 */ + for (jj = 2; jj < r4; jj++) + { + index = ll*r234+jj; + + pred1D = 2*(*data)[index-1] - (*data)[index-2]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (ii = 1; ii < r3; ii++) + { + /* Process row-ii data 0 */ + index = ll*r234+ii*r4; + + pred1D = (*data)[index-r4]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process row-ii data 1 --> r4-1*/ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+ii*r4+jj; + + pred2D = (*data)[index-1] + (*data)[index-r4] - (*data)[index-r4-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + + /////////////////////////// Process layer-1 --> layer-r2-1 /////////////////////////// + + for (kk = 1; kk < r2; kk++) + { + /* Process Row-0 data 0*/ + index = ll*r234+kk*r34; + + pred1D = (*data)[index-r34]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-0 data 1 --> data r4-1 */ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+kk*r34+jj; + + pred2D = (*data)[index-1] + (*data)[index-r34] - (*data)[index-r34-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (ii = 1; ii < r3; ii++) + { + /* Process Row-i data 0 */ + index = ll*r234+kk*r34+ii*r4; + + pred2D = (*data)[index-r4] + (*data)[index-r34] - (*data)[index-r34-r4]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-i data 1 --> data r4-1 */ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+kk*r34+ii*r4+jj; + + pred3D = (*data)[index-1] + (*data)[index-r4] + (*data)[index-r34] + - (*data)[index-r4-1] - (*data)[index-r34-r4] - (*data)[index-r34-1] + (*data)[index-r34-r4-1]; + + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred3D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT16_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + } + } + + free(type); + return; +} + +void getSnapshotData_uint16_1D(uint16_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + + if (tdps->allSameData) { + uint16_t value = bytesToUInt16_bigEndian(tdps->exactDataBytes); + *data = (uint16_t*)malloc(sizeof(uint16_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_uint16_1D(data, dataSeriesLength, tdps); + } +} + +void getSnapshotData_uint16_2D(uint16_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + size_t dataSeriesLength = r1*r2; + if (tdps->allSameData) { + uint16_t value = bytesToUInt16_bigEndian(tdps->exactDataBytes); + *data = (uint16_t*)malloc(sizeof(uint16_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_uint16_2D(data, r1, r2, tdps); + } +} + +void getSnapshotData_uint16_3D(uint16_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + size_t dataSeriesLength = r1*r2*r3; + if (tdps->allSameData) { + uint16_t value = bytesToUInt16_bigEndian(tdps->exactDataBytes); + *data = (uint16_t*)malloc(sizeof(uint16_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_uint16_3D(data, r1, r2, r3, tdps); + } +} + +void getSnapshotData_uint16_4D(uint16_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + size_t dataSeriesLength = r1*r2*r3*r4; + if (tdps->allSameData) { + uint16_t value = bytesToUInt16_bigEndian(tdps->exactDataBytes); + *data = (uint16_t*)malloc(sizeof(uint16_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_uint16_4D(data, r1, r2, r3, r4, tdps); + } +} diff --git a/deps/SZ/sz/src/szd_uint32.c b/deps/SZ/sz/src/szd_uint32.c new file mode 100644 index 0000000000000000000000000000000000000000..04e8049f9dc9f3a8cf6ba01aa0fc4bb691b4d735 --- /dev/null +++ b/deps/SZ/sz/src/szd_uint32.c @@ -0,0 +1,789 @@ +/** + * @file szd_uint32.c + * @author Sheng Di + * @date Aug, 2017 + * @brief + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include +#include "TightDataPointStorageI.h" +#include "sz.h" +#include "szd_uint32.h" +#include "Huffman.h" +#include "utility.h" + +/** + * + * + * @return status SUCCESSFUL (SZ_SCES) or not (other error codes) f + * */ +int SZ_decompress_args_uint32(uint32_t** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize) +{ + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + + //unsigned char* tmpBytes; + size_t targetUncompressSize = dataLength <<2; //i.e., *4 + //tmpSize must be "much" smaller than dataLength + size_t i, tmpSize = 3+MetaDataByteLength+1+sizeof(uint32_t)+exe_params->SZ_SIZE_TYPE; + unsigned char* szTmpBytes; + + if(cmpSize!=4+4+4+MetaDataByteLength && cmpSize!=4+4+8+MetaDataByteLength) + { + confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); + if(confparams_dec->losslessCompressor!=-1) + confparams_dec->szMode = SZ_BEST_COMPRESSION; + else + confparams_dec->szMode = SZ_BEST_SPEED; + if(confparams_dec->szMode==SZ_BEST_SPEED) + { + tmpSize = cmpSize; + szTmpBytes = cmpBytes; + } + else if(confparams_dec->szMode==SZ_BEST_COMPRESSION || confparams_dec->szMode==SZ_DEFAULT_COMPRESSION) + { + if(targetUncompressSizelosslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize + //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); + //memcpy(szTmpBytes, tmpBytes, tmpSize); + //free(tmpBytes); //release useless memory + } + else + { + printf("Wrong value of confparams_dec->szMode in the double compressed bytes.\n"); + status = SZ_MERR; + return status; + } + } + else + szTmpBytes = cmpBytes; + //TODO: convert szTmpBytes to data array. + TightDataPointStorageI* tdps; + int errBoundMode = new_TightDataPointStorageI_fromFlatBytes(&tdps, szTmpBytes, tmpSize); + //writeByteData(tdps->typeArray, tdps->typeArray_size, "decompress-typebytes.tbt"); + int dim = computeDimension(r5,r4,r3,r2,r1); + int intSize = sizeof(uint32_t); + if(tdps->isLossless) + { + *newData = (uint32_t*)malloc(intSize*dataLength); + if(sysEndianType==BIG_ENDIAN_SYSTEM) + { + memcpy(*newData, szTmpBytes+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, dataLength*intSize); + } + else + { + unsigned char* p = szTmpBytes+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE; + for(i=0;iszMode!=SZ_BEST_SPEED && cmpSize!=4+sizeof(uint32_t)+exe_params->SZ_SIZE_TYPE+MetaDataByteLength) + free(szTmpBytes); + return status; +} + + +void decompressDataSeries_uint32_1D(uint32_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + size_t i; + double interval = tdps->realPrecision*2; + + *data = (uint32_t*)malloc(sizeof(uint32_t)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + //sdi:Debug + //writeUShortData(type, dataSeriesLength, "decompressStateBytes.sb"); + + uint32_t minValue, exactData, predValue; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_UINT32); + if(rightShiftBits<0) + { + printf("Error: rightShift < 0!\n"); + exit(0); + } + int type_; + for (i = 0; i < dataSeriesLength; i++) { + type_ = type[i]; + switch (type_) { + case 0: + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[i] = exactData + minValue; + break; + default: + //predValue = 2 * (*data)[i-1] - (*data)[i-2]; + predValue = (*data)[i-1]; + (*data)[i] = predValue + (type_-exe_params->intvRadius)*interval; + break; + } + //printf("%.30G\n",(*data)[i]); + } + free(type); + return; +} + +void decompressDataSeries_uint32_2D(uint32_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + //printf("tdps->intervals=%d, exe_params->intvRadius=%d\n", tdps->intervals, exe_params->intvRadius); + + size_t dataSeriesLength = r1*r2; + // printf ("%d %d\n", r1, r2); + + double realPrecision = tdps->realPrecision; + + *data = (uint32_t*)malloc(sizeof(uint32_t)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + uint32_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_UINT32); + + uint32_t pred1D, pred2D; + size_t ii, jj; + + /* Process Row-0, data 0 */ + + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[0] = exactData + minValue; + + /* Process Row-0, data 1 */ + int type_ = type[1]; + if (type_ != 0) + { + pred1D = (*data)[0]; + (*data)[1] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[1] = exactData + minValue; + } + + /* Process Row-0, data 2 --> data r2-1 */ + for (jj = 2; jj < r2; jj++) + { + type_ = type[jj]; + if (type_ != 0) + { + pred1D = 2*(*data)[jj-1] - (*data)[jj-2]; + (*data)[jj] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[jj] = exactData + minValue; + } + } + + size_t index; + /* Process Row-1 --> Row-r1-1 */ + for (ii = 1; ii < r1; ii++) + { + /* Process row-ii data 0 */ + index = ii*r2; + + type_ = type[index]; + if (type_ != 0) + { + pred1D = (*data)[index-r2]; + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process row-ii data 1 --> r2-1*/ + for (jj = 1; jj < r2; jj++) + { + index = ii*r2+jj; + pred2D = (*data)[index-1] + (*data)[index-r2] - (*data)[index-r2-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + + free(type); + return; +} + +void decompressDataSeries_uint32_3D(uint32_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + size_t dataSeriesLength = r1*r2*r3; + size_t r23 = r2*r3; +// printf ("%d %d %d\n", r1, r2, r3); + double realPrecision = tdps->realPrecision; + + *data = (uint32_t*)malloc(sizeof(uint32_t)*dataSeriesLength); + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + uint32_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_UINT32); + + uint32_t pred1D, pred2D, pred3D; + size_t ii, jj, kk; + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[0] = exactData + minValue; + + /* Process Row-0, data 1 */ + pred1D = (*data)[0]; + + int type_ = type[1]; + if (type_ != 0) + { + (*data)[1] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[1] = exactData + minValue; + } + /* Process Row-0, data 2 --> data r3-1 */ + for (jj = 2; jj < r3; jj++) + { + pred1D = 2*(*data)[jj-1] - (*data)[jj-2]; + + type_ = type[jj]; + if (type_ != 0) + { + (*data)[jj] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[jj] = exactData + minValue; + } + } + + size_t index; + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process row-ii data 0 */ + index = ii*r3; + pred1D = (*data)[index-r3]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process row-ii data 1 --> r3-1*/ + for (jj = 1; jj < r3; jj++) + { + index = ii*r3+jj; + pred2D = (*data)[index-1] + (*data)[index-r3] - (*data)[index-r3-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (kk = 1; kk < r1; kk++) + { + /* Process Row-0 data 0*/ + index = kk*r23; + pred1D = (*data)[index-r23]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-0 data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23+jj; + pred2D = (*data)[index-1] + (*data)[index-r23] - (*data)[index-r23-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process Row-i data 0 */ + index = kk*r23 + ii*r3; + pred2D = (*data)[index-r3] + (*data)[index-r23] - (*data)[index-r23-r3]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-i data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23 + ii*r3 + jj; + pred3D = (*data)[index-1] + (*data)[index-r3] + (*data)[index-r23] + - (*data)[index-r3-1] - (*data)[index-r23-r3] - (*data)[index-r23-1] + (*data)[index-r23-r3-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred3D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + } + + free(type); + return; +} + + +void decompressDataSeries_uint32_4D(uint32_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + size_t dataSeriesLength = r1*r2*r3*r4; + size_t r234 = r2*r3*r4; + size_t r34 = r3*r4; + + double realPrecision = tdps->realPrecision; + + *data = (uint32_t*)malloc(sizeof(uint32_t)*dataSeriesLength); + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + uint32_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_UINT32); + + int type_; + + uint32_t pred1D, pred2D, pred3D; + size_t ii, jj, kk, ll; + size_t index; + + for (ll = 0; ll < r1; ll++) + { + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + index = ll*r234; + + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + + /* Process Row-0, data 1 */ + index = ll*r234+1; + + pred1D = (*data)[index-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-0, data 2 --> data r4-1 */ + for (jj = 2; jj < r4; jj++) + { + index = ll*r234+jj; + + pred1D = 2*(*data)[index-1] - (*data)[index-2]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (ii = 1; ii < r3; ii++) + { + /* Process row-ii data 0 */ + index = ll*r234+ii*r4; + + pred1D = (*data)[index-r4]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process row-ii data 1 --> r4-1*/ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+ii*r4+jj; + + pred2D = (*data)[index-1] + (*data)[index-r4] - (*data)[index-r4-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + + /////////////////////////// Process layer-1 --> layer-r2-1 /////////////////////////// + + for (kk = 1; kk < r2; kk++) + { + /* Process Row-0 data 0*/ + index = ll*r234+kk*r34; + + pred1D = (*data)[index-r34]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-0 data 1 --> data r4-1 */ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+kk*r34+jj; + + pred2D = (*data)[index-1] + (*data)[index-r34] - (*data)[index-r34-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (ii = 1; ii < r3; ii++) + { + /* Process Row-i data 0 */ + index = ll*r234+kk*r34+ii*r4; + + pred2D = (*data)[index-r4] + (*data)[index-r34] - (*data)[index-r34-r4]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-i data 1 --> data r4-1 */ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+kk*r34+ii*r4+jj; + + pred3D = (*data)[index-1] + (*data)[index-r4] + (*data)[index-r34] + - (*data)[index-r4-1] - (*data)[index-r34-r4] - (*data)[index-r34-1] + (*data)[index-r34-r4-1]; + + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred3D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt32_bigEndian(curBytes); + exactData = (uint32_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + } + } + + free(type); + return; +} + +void getSnapshotData_uint32_1D(uint32_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + + if (tdps->allSameData) { + uint32_t value = bytesToUInt32_bigEndian(tdps->exactDataBytes); + *data = (uint32_t*)malloc(sizeof(uint32_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_uint32_1D(data, dataSeriesLength, tdps); + } +} + +void getSnapshotData_uint32_2D(uint32_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + size_t dataSeriesLength = r1*r2; + if (tdps->allSameData) { + uint32_t value = bytesToUInt32_bigEndian(tdps->exactDataBytes); + *data = (uint32_t*)malloc(sizeof(uint32_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_uint32_2D(data, r1, r2, tdps); + } +} + +void getSnapshotData_uint32_3D(uint32_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + size_t dataSeriesLength = r1*r2*r3; + if (tdps->allSameData) { + uint32_t value = bytesToUInt32_bigEndian(tdps->exactDataBytes); + *data = (uint32_t*)malloc(sizeof(uint32_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_uint32_3D(data, r1, r2, r3, tdps); + } +} + +void getSnapshotData_uint32_4D(uint32_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + size_t dataSeriesLength = r1*r2*r3*r4; + if (tdps->allSameData) { + uint32_t value = bytesToUInt32_bigEndian(tdps->exactDataBytes); + *data = (uint32_t*)malloc(sizeof(uint32_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_uint32_4D(data, r1, r2, r3, r4, tdps); + } +} diff --git a/deps/SZ/sz/src/szd_uint64.c b/deps/SZ/sz/src/szd_uint64.c new file mode 100644 index 0000000000000000000000000000000000000000..84d57168c4f7eed0bd49bf60cf8b8a3d19271b27 --- /dev/null +++ b/deps/SZ/sz/src/szd_uint64.c @@ -0,0 +1,789 @@ +/** + * @file szd_uint64.c + * @author Sheng Di + * @date Aug, 2017 + * @brief + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include +#include "TightDataPointStorageI.h" +#include "sz.h" +#include "szd_uint64.h" +#include "Huffman.h" +#include "utility.h" + +/** + * + * + * @return status SUCCESSFUL (SZ_SCES) or not (other error codes) f + * */ +int SZ_decompress_args_uint64(uint64_t** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize) +{ + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + + //unsigned char* tmpBytes; + size_t targetUncompressSize = dataLength <<2; //i.e., *4 + //tmpSize must be "much" smaller than dataLength + size_t i, tmpSize = 3+MetaDataByteLength+1+sizeof(uint64_t)+exe_params->SZ_SIZE_TYPE; + unsigned char* szTmpBytes; + + if(cmpSize!=4+8+4+MetaDataByteLength && cmpSize!=4+8+8+MetaDataByteLength) + { + confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); + if(confparams_dec->losslessCompressor!=-1) + confparams_dec->szMode = SZ_BEST_COMPRESSION; + else + confparams_dec->szMode = SZ_BEST_SPEED; + if(confparams_dec->szMode==SZ_BEST_SPEED) + { + tmpSize = cmpSize; + szTmpBytes = cmpBytes; + } + else if(confparams_dec->szMode==SZ_BEST_COMPRESSION || confparams_dec->szMode==SZ_DEFAULT_COMPRESSION) + { + if(targetUncompressSizelosslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize + //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); + //memcpy(szTmpBytes, tmpBytes, tmpSize); + //free(tmpBytes); //release useless memory + } + else + { + printf("Wrong value of confparams_dec->szMode in the double compressed bytes.\n"); + status = SZ_MERR; + return status; + } + } + else + szTmpBytes = cmpBytes; + //TODO: convert szTmpBytes to data array. + TightDataPointStorageI* tdps; + int errBoundMode = new_TightDataPointStorageI_fromFlatBytes(&tdps, szTmpBytes, tmpSize); + //writeByteData(tdps->typeArray, tdps->typeArray_size, "decompress-typebytes.tbt"); + int dim = computeDimension(r5,r4,r3,r2,r1); + int intSize = sizeof(uint64_t); + if(tdps->isLossless) + { + *newData = (uint64_t*)malloc(intSize*dataLength); + if(sysEndianType==BIG_ENDIAN_SYSTEM) + { + memcpy(*newData, szTmpBytes+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, dataLength*intSize); + } + else + { + unsigned char* p = szTmpBytes+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE; + for(i=0;iszMode!=SZ_BEST_SPEED && cmpSize!=4+sizeof(uint64_t)+exe_params->SZ_SIZE_TYPE+MetaDataByteLength) + free(szTmpBytes); + return status; +} + + +void decompressDataSeries_uint64_1D(uint64_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + size_t i; + double interval = tdps->realPrecision*2; + + *data = (uint64_t*)malloc(sizeof(uint64_t)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + //sdi:Debug + //writeUShortData(type, dataSeriesLength, "decompressStateBytes.sb"); + + uint64_t minValue, exactData, predValue; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_UINT64); + if(rightShiftBits<0) + { + printf("Error: rightShift < 0!\n"); + exit(0); + } + int type_; + for (i = 0; i < dataSeriesLength; i++) { + type_ = type[i]; + switch (type_) { + case 0: + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[i] = exactData + minValue; + break; + default: + //predValue = 2 * (*data)[i-1] - (*data)[i-2]; + predValue = (*data)[i-1]; + (*data)[i] = predValue + (type_-exe_params->intvRadius)*interval; + break; + } + //printf("%.30G\n",(*data)[i]); + } + free(type); + return; +} + +void decompressDataSeries_uint64_2D(uint64_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + //printf("tdps->intervals=%d, exe_params->intvRadius=%d\n", tdps->intervals, exe_params->intvRadius); + + size_t dataSeriesLength = r1*r2; + // printf ("%d %d\n", r1, r2); + + double realPrecision = tdps->realPrecision; + + *data = (uint64_t*)malloc(sizeof(uint64_t)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + uint64_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_UINT64); + + uint64_t pred1D, pred2D; + size_t ii, jj; + + /* Process Row-0, data 0 */ + + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[0] = exactData + minValue; + + /* Process Row-0, data 1 */ + int type_ = type[1]; + if (type_ != 0) + { + pred1D = (*data)[0]; + (*data)[1] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[1] = exactData + minValue; + } + + /* Process Row-0, data 2 --> data r2-1 */ + for (jj = 2; jj < r2; jj++) + { + type_ = type[jj]; + if (type_ != 0) + { + pred1D = 2*(*data)[jj-1] - (*data)[jj-2]; + (*data)[jj] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[jj] = exactData + minValue; + } + } + + size_t index; + /* Process Row-1 --> Row-r1-1 */ + for (ii = 1; ii < r1; ii++) + { + /* Process row-ii data 0 */ + index = ii*r2; + + type_ = type[index]; + if (type_ != 0) + { + pred1D = (*data)[index-r2]; + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process row-ii data 1 --> r2-1*/ + for (jj = 1; jj < r2; jj++) + { + index = ii*r2+jj; + pred2D = (*data)[index-1] + (*data)[index-r2] - (*data)[index-r2-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + + free(type); + return; +} + +void decompressDataSeries_uint64_3D(uint64_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + size_t dataSeriesLength = r1*r2*r3; + size_t r23 = r2*r3; +// printf ("%d %d %d\n", r1, r2, r3); + double realPrecision = tdps->realPrecision; + + *data = (uint64_t*)malloc(sizeof(uint64_t)*dataSeriesLength); + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + uint64_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_UINT64); + + uint64_t pred1D, pred2D, pred3D; + size_t ii, jj, kk; + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[0] = exactData + minValue; + + /* Process Row-0, data 1 */ + pred1D = (*data)[0]; + + int type_ = type[1]; + if (type_ != 0) + { + (*data)[1] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[1] = exactData + minValue; + } + /* Process Row-0, data 2 --> data r3-1 */ + for (jj = 2; jj < r3; jj++) + { + pred1D = 2*(*data)[jj-1] - (*data)[jj-2]; + + type_ = type[jj]; + if (type_ != 0) + { + (*data)[jj] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[jj] = exactData + minValue; + } + } + + size_t index; + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process row-ii data 0 */ + index = ii*r3; + pred1D = (*data)[index-r3]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process row-ii data 1 --> r3-1*/ + for (jj = 1; jj < r3; jj++) + { + index = ii*r3+jj; + pred2D = (*data)[index-1] + (*data)[index-r3] - (*data)[index-r3-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (kk = 1; kk < r1; kk++) + { + /* Process Row-0 data 0*/ + index = kk*r23; + pred1D = (*data)[index-r23]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-0 data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23+jj; + pred2D = (*data)[index-1] + (*data)[index-r23] - (*data)[index-r23-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process Row-i data 0 */ + index = kk*r23 + ii*r3; + pred2D = (*data)[index-r3] + (*data)[index-r23] - (*data)[index-r23-r3]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-i data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23 + ii*r3 + jj; + pred3D = (*data)[index-1] + (*data)[index-r3] + (*data)[index-r23] + - (*data)[index-r3-1] - (*data)[index-r23-r3] - (*data)[index-r23-1] + (*data)[index-r23-r3-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred3D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + } + + free(type); + return; +} + + +void decompressDataSeries_uint64_4D(uint64_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + size_t dataSeriesLength = r1*r2*r3*r4; + size_t r234 = r2*r3*r4; + size_t r34 = r3*r4; + + double realPrecision = tdps->realPrecision; + + *data = (uint64_t*)malloc(sizeof(uint64_t)*dataSeriesLength); + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + uint64_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_UINT64); + + int type_; + + uint64_t pred1D, pred2D, pred3D; + size_t ii, jj, kk, ll; + size_t index; + + for (ll = 0; ll < r1; ll++) + { + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + index = ll*r234; + + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + + /* Process Row-0, data 1 */ + index = ll*r234+1; + + pred1D = (*data)[index-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-0, data 2 --> data r4-1 */ + for (jj = 2; jj < r4; jj++) + { + index = ll*r234+jj; + + pred1D = 2*(*data)[index-1] - (*data)[index-2]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (ii = 1; ii < r3; ii++) + { + /* Process row-ii data 0 */ + index = ll*r234+ii*r4; + + pred1D = (*data)[index-r4]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process row-ii data 1 --> r4-1*/ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+ii*r4+jj; + + pred2D = (*data)[index-1] + (*data)[index-r4] - (*data)[index-r4-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + + /////////////////////////// Process layer-1 --> layer-r2-1 /////////////////////////// + + for (kk = 1; kk < r2; kk++) + { + /* Process Row-0 data 0*/ + index = ll*r234+kk*r34; + + pred1D = (*data)[index-r34]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-0 data 1 --> data r4-1 */ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+kk*r34+jj; + + pred2D = (*data)[index-1] + (*data)[index-r34] - (*data)[index-r34-1]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (ii = 1; ii < r3; ii++) + { + /* Process Row-i data 0 */ + index = ll*r234+kk*r34+ii*r4; + + pred2D = (*data)[index-r4] + (*data)[index-r34] - (*data)[index-r34-r4]; + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-i data 1 --> data r4-1 */ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+kk*r34+ii*r4+jj; + + pred3D = (*data)[index-1] + (*data)[index-r4] + (*data)[index-r34] + - (*data)[index-r4-1] - (*data)[index-r34-r4] - (*data)[index-r34-1] + (*data)[index-r34-r4-1]; + + + type_ = type[index]; + if (type_ != 0) + { + (*data)[index] = pred3D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else + { + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = bytesToUInt64_bigEndian(curBytes); + exactData = (uint64_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + } + } + + free(type); + return; +} + +void getSnapshotData_uint64_1D(uint64_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + + if (tdps->allSameData) { + uint64_t value = bytesToUInt64_bigEndian(tdps->exactDataBytes); + *data = (uint64_t*)malloc(sizeof(uint64_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_uint64_1D(data, dataSeriesLength, tdps); + } +} + +void getSnapshotData_uint64_2D(uint64_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + size_t dataSeriesLength = r1*r2; + if (tdps->allSameData) { + uint64_t value = bytesToUInt64_bigEndian(tdps->exactDataBytes); + *data = (uint64_t*)malloc(sizeof(uint64_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_uint64_2D(data, r1, r2, tdps); + } +} + +void getSnapshotData_uint64_3D(uint64_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + size_t dataSeriesLength = r1*r2*r3; + if (tdps->allSameData) { + uint64_t value = bytesToUInt64_bigEndian(tdps->exactDataBytes); + *data = (uint64_t*)malloc(sizeof(uint64_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_uint64_3D(data, r1, r2, r3, tdps); + } +} + +void getSnapshotData_uint64_4D(uint64_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + size_t dataSeriesLength = r1*r2*r3*r4; + if (tdps->allSameData) { + uint64_t value = bytesToUInt64_bigEndian(tdps->exactDataBytes); + *data = (uint64_t*)malloc(sizeof(uint64_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_uint64_4D(data, r1, r2, r3, r4, tdps); + } +} diff --git a/deps/SZ/sz/src/szd_uint8.c b/deps/SZ/sz/src/szd_uint8.c new file mode 100644 index 0000000000000000000000000000000000000000..8b992bc2d4d9400325936648479e2cf31151c5f4 --- /dev/null +++ b/deps/SZ/sz/src/szd_uint8.c @@ -0,0 +1,914 @@ +/** + * @file szd_uint8.c + * @author Sheng Di + * @date Aug, 2017 + * @brief + * (C) 2017 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include +#include "TightDataPointStorageI.h" +#include "sz.h" +#include "szd_uint8.h" +#include "Huffman.h" +#include "utility.h" + +/** + * + * + * @return status SUCCESSFUL (SZ_SCES) or not (other error codes) f + * */ +int SZ_decompress_args_uint8(uint8_t** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize) +{ + int status = SZ_SCES; + size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); + + //unsigned char* tmpBytes; + size_t targetUncompressSize = dataLength <<2; //i.e., *4 + //tmpSize must be "much" smaller than dataLength + size_t i, tmpSize = 3+MetaDataByteLength+1+sizeof(uint8_t)+exe_params->SZ_SIZE_TYPE; + unsigned char* szTmpBytes; + + if(cmpSize!=4+1+4+MetaDataByteLength && cmpSize!=4+1+8+MetaDataByteLength) + { + confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); + if(confparams_dec->losslessCompressor!=-1) + confparams_dec->szMode = SZ_BEST_COMPRESSION; + else + confparams_dec->szMode = SZ_BEST_SPEED; + if(confparams_dec->szMode==SZ_BEST_SPEED) + { + tmpSize = cmpSize; + szTmpBytes = cmpBytes; + } + else if(confparams_dec->szMode==SZ_BEST_COMPRESSION || confparams_dec->szMode==SZ_DEFAULT_COMPRESSION) + { + if(targetUncompressSizelosslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize + //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); + //memcpy(szTmpBytes, tmpBytes, tmpSize); + //free(tmpBytes); //release useless memory + } + else + { + printf("Wrong value of confparams_dec->szMode in the double compressed bytes.\n"); + status = SZ_MERR; + return status; + } + } + else + szTmpBytes = cmpBytes; + //TODO: convert szTmpBytes to data array. + TightDataPointStorageI* tdps; + int errBoundMode = new_TightDataPointStorageI_fromFlatBytes(&tdps, szTmpBytes, tmpSize); + //writeByteData(tdps->typeArray, tdps->typeArray_size, "decompress-typebytes.tbt"); + int dim = computeDimension(r5,r4,r3,r2,r1); + int intSize = sizeof(uint8_t); + if(tdps->isLossless) + { + *newData = (uint8_t*)malloc(intSize*dataLength); + if(sysEndianType==BIG_ENDIAN_SYSTEM) + { + memcpy(*newData, szTmpBytes+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, dataLength*intSize); + } + else + { + unsigned char* p = szTmpBytes+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE; + for(i=0;iszMode!=SZ_BEST_SPEED && cmpSize!=4+sizeof(uint8_t)+exe_params->SZ_SIZE_TYPE+MetaDataByteLength) + free(szTmpBytes); + return status; +} + + +void decompressDataSeries_uint8_1D(uint8_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + size_t i; + double interval = tdps->realPrecision*2; + + *data = (uint8_t*)malloc(sizeof(uint8_t)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + //sdi:Debug + //writeUShortData(type, dataSeriesLength, "decompressStateBytes.sb"); + + long predValue, tmp; + uint8_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_UINT8); + if(rightShiftBits<0) + { + printf("Error: rightShift < 0!\n"); + exit(0); + } + int type_; + for (i = 0; i < dataSeriesLength; i++) { + type_ = type[i]; + switch (type_) { + case 0: + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = curBytes[0]; + exactData = (uint8_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[i] = exactData + minValue; + break; + default: + //predValue = 2 * (*data)[i-1] - (*data)[i-2]; + predValue = (*data)[i-1]; + tmp = predValue + (type_-exe_params->intvRadius)*interval; + if(tmp >= SZ_UINT8_MIN&&tmpintervals); + //printf("tdps->intervals=%d, exe_params->intvRadius=%d\n", tdps->intervals, exe_params->intvRadius); + + size_t dataSeriesLength = r1*r2; + // printf ("%d %d\n", r1, r2); + + double realPrecision = tdps->realPrecision; + + *data = (uint8_t*)malloc(sizeof(uint8_t)*dataSeriesLength); + + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + uint8_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_UINT8); + + long pred1D, pred2D, tmp; + size_t ii, jj; + + /* Process Row-0, data 0 */ + + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = curBytes[0]; + exactData = (uint8_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[0] = exactData + minValue; + + /* Process Row-0, data 1 */ + int type_ = type[1]; + if (type_ != 0) + { + pred1D = (*data)[0]; + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[1] = exactData + minValue; + } + + /* Process Row-0, data 2 --> data r2-1 */ + for (jj = 2; jj < r2; jj++) + { + type_ = type[jj]; + if (type_ != 0) + { + pred1D = 2*(*data)[jj-1] - (*data)[jj-2]; + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[jj] = exactData + minValue; + } + } + + size_t index; + /* Process Row-1 --> Row-r1-1 */ + for (ii = 1; ii < r1; ii++) + { + /* Process row-ii data 0 */ + index = ii*r2; + + type_ = type[index]; + if (type_ != 0) + { + pred1D = (*data)[index-r2]; + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process row-ii data 1 --> r2-1*/ + for (jj = 1; jj < r2; jj++) + { + index = ii*r2+jj; + pred2D = (*data)[index-1] + (*data)[index-r2] - (*data)[index-r2-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + + free(type); + return; +} + +void decompressDataSeries_uint8_3D(uint8_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + size_t dataSeriesLength = r1*r2*r3; + size_t r23 = r2*r3; +// printf ("%d %d %d\n", r1, r2, r3); + double realPrecision = tdps->realPrecision; + + *data = (uint8_t*)malloc(sizeof(uint8_t)*dataSeriesLength); + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + uint8_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_UINT8); + + long pred1D, pred2D, pred3D, tmp; + size_t ii, jj, kk; + + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = curBytes[0]; + exactData = (uint8_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[0] = exactData + minValue; + + /* Process Row-0, data 1 */ + pred1D = (*data)[0]; + + int type_ = type[1]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[1] = exactData + minValue; + } + /* Process Row-0, data 2 --> data r3-1 */ + for (jj = 2; jj < r3; jj++) + { + pred1D = 2*(*data)[jj-1] - (*data)[jj-2]; + + type_ = type[jj]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[jj] = exactData + minValue; + } + } + + size_t index; + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process row-ii data 0 */ + index = ii*r3; + pred1D = (*data)[index-r3]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process row-ii data 1 --> r3-1*/ + for (jj = 1; jj < r3; jj++) + { + index = ii*r3+jj; + pred2D = (*data)[index-1] + (*data)[index-r3] - (*data)[index-r3-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (kk = 1; kk < r1; kk++) + { + /* Process Row-0 data 0*/ + index = kk*r23; + pred1D = (*data)[index-r23]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-0 data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23+jj; + pred2D = (*data)[index-1] + (*data)[index-r23] - (*data)[index-r23-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + + /* Process Row-1 --> Row-r2-1 */ + for (ii = 1; ii < r2; ii++) + { + /* Process Row-i data 0 */ + index = kk*r23 + ii*r3; + pred2D = (*data)[index-r3] + (*data)[index-r23] - (*data)[index-r23-r3]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-i data 1 --> data r3-1 */ + for (jj = 1; jj < r3; jj++) + { + index = kk*r23 + ii*r3 + jj; + pred3D = (*data)[index-1] + (*data)[index-r3] + (*data)[index-r23] + - (*data)[index-r3-1] - (*data)[index-r23-r3] - (*data)[index-r23-1] + (*data)[index-r23-r3-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred3D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + } + + free(type); + return; +} + + +void decompressDataSeries_uint8_4D(uint8_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps) +{ + updateQuantizationInfo(tdps->intervals); + size_t dataSeriesLength = r1*r2*r3*r4; + size_t r234 = r2*r3*r4; + size_t r34 = r3*r4; + + double realPrecision = tdps->realPrecision; + + *data = (uint8_t*)malloc(sizeof(uint8_t)*dataSeriesLength); + int* type = (int*)malloc(dataSeriesLength*sizeof(int)); + + HuffmanTree* huffmanTree = createHuffmanTree(tdps->stateNum); + decode_withTree(huffmanTree, tdps->typeArray, dataSeriesLength, type); + SZ_ReleaseHuffman(huffmanTree); + + uint8_t minValue, exactData; + + minValue = tdps->minValue; + + int exactByteSize = tdps->exactByteSize; + unsigned char* exactDataBytePointer = tdps->exactDataBytes; + + unsigned char curBytes[8] = {0,0,0,0,0,0,0,0}; + + int rightShiftBits = computeRightShiftBits(exactByteSize, SZ_UINT8); + + int type_; + + long pred1D, pred2D, pred3D, tmp; + size_t ii, jj, kk, ll; + size_t index; + + for (ll = 0; ll < r1; ll++) + { + /////////////////////////// Process layer-0 /////////////////////////// + /* Process Row-0 data 0*/ + index = ll*r234; + // recover the exact data + memcpy(curBytes, exactDataBytePointer, exactByteSize); + exactData = curBytes[0]; + exactData = (uint8_t)exactData >> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + + /* Process Row-0, data 1 */ + index = ll*r234+1; + + pred1D = (*data)[index-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-0, data 2 --> data r4-1 */ + for (jj = 2; jj < r4; jj++) + { + index = ll*r234+jj; + + pred1D = 2*(*data)[index-1] - (*data)[index-2]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (ii = 1; ii < r3; ii++) + { + /* Process row-ii data 0 */ + index = ll*r234+ii*r4; + + pred1D = (*data)[index-r4]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process row-ii data 1 --> r4-1*/ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+ii*r4+jj; + + pred2D = (*data)[index-1] + (*data)[index-r4] - (*data)[index-r4-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + + /////////////////////////// Process layer-1 --> layer-r2-1 /////////////////////////// + + for (kk = 1; kk < r2; kk++) + { + /* Process Row-0 data 0*/ + index = ll*r234+kk*r34; + + pred1D = (*data)[index-r34]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-0 data 1 --> data r4-1 */ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+kk*r34+jj; + + pred2D = (*data)[index-1] + (*data)[index-r34] - (*data)[index-r34-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + + /* Process Row-1 --> Row-r3-1 */ + for (ii = 1; ii < r3; ii++) + { + /* Process Row-i data 0 */ + index = ll*r234+kk*r34+ii*r4; + + pred2D = (*data)[index-r4] + (*data)[index-r34] - (*data)[index-r34-r4]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + + /* Process Row-i data 1 --> data r4-1 */ + for (jj = 1; jj < r4; jj++) + { + index = ll*r234+kk*r34+ii*r4+jj; + + pred3D = (*data)[index-1] + (*data)[index-r4] + (*data)[index-r34] + - (*data)[index-r4-1] - (*data)[index-r34-r4] - (*data)[index-r34-1] + (*data)[index-r34-r4-1]; + + type_ = type[index]; + if (type_ != 0) + { + tmp = pred3D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + if(tmp >= SZ_UINT8_MIN&&tmp> rightShiftBits; + exactDataBytePointer += exactByteSize; + (*data)[index] = exactData + minValue; + } + } + } + } + } + + free(type); + return; +} + +void getSnapshotData_uint8_1D(uint8_t** data, size_t dataSeriesLength, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + + if (tdps->allSameData) { + uint8_t value = tdps->exactDataBytes[0]; + *data = (uint8_t*)malloc(sizeof(uint8_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_uint8_1D(data, dataSeriesLength, tdps); + } +} + +void getSnapshotData_uint8_2D(uint8_t** data, size_t r1, size_t r2, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + size_t dataSeriesLength = r1*r2; + if (tdps->allSameData) { + uint8_t value = tdps->exactDataBytes[0]; + *data = (uint8_t*)malloc(sizeof(uint8_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_uint8_2D(data, r1, r2, tdps); + } +} + +void getSnapshotData_uint8_3D(uint8_t** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + size_t dataSeriesLength = r1*r2*r3; + if (tdps->allSameData) { + uint8_t value = tdps->exactDataBytes[0]; + *data = (uint8_t*)malloc(sizeof(uint8_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_uint8_3D(data, r1, r2, r3, tdps); + } +} + +void getSnapshotData_uint8_4D(uint8_t** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageI* tdps, int errBoundMode) +{ + size_t i; + size_t dataSeriesLength = r1*r2*r3*r4; + if (tdps->allSameData) { + uint8_t value = tdps->exactDataBytes[0]; + *data = (uint8_t*)malloc(sizeof(uint8_t)*dataSeriesLength); + for (i = 0; i < dataSeriesLength; i++) + (*data)[i] = value; + } else { + decompressDataSeries_uint8_4D(data, r1, r2, r3, r4, tdps); + } +} diff --git a/deps/SZ/sz/src/szf.c b/deps/SZ/sz/src/szf.c new file mode 100644 index 0000000000000000000000000000000000000000..a40dc38f8f6611a36e5908186ab6f6f04bcbe6f1 --- /dev/null +++ b/deps/SZ/sz/src/szf.c @@ -0,0 +1,570 @@ +/** + * @file szf.c + * @author Sheng Di + * @date April, 2015 + * @brief the key C binding file to connect Fortran and C + * (C) 2015 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + + +#include +#include +#include +#include "sz.h" +#include "szf.h" + +//special notice: all the function names in this file must be lower-cases!! +void sz_init_c_(char *configFile,int *len,int *ierr) +{ + int i; + char s2[*len+1]; + for(i=0;i<*len;i++) + s2[i]=configFile[i]; + s2[*len]='\0'; + // printf("sconfigFile=%s\n",configFile); + *ierr = SZ_Init(s2); +} + +void sz_finalize_c_() +{ + SZ_Finalize(); +} + +//compress with config (without args in function) +void sz_compress_d1_float_(float* data, unsigned char *bytes, size_t *outSize, size_t *r1) +{ + unsigned char *tmp_bytes = SZ_compress(SZ_FLOAT, data, outSize, 0, 0, 0, 0, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d1_float_rev_(float* data, float *reservedValue, unsigned char *bytes, size_t *outSize, size_t *r1) +{ + unsigned char *tmp_bytes = SZ_compress_rev(SZ_FLOAT, data, reservedValue, outSize, 0, 0, 0, 0, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d2_float_(float* data, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2) +{ + unsigned char *tmp_bytes = SZ_compress(SZ_FLOAT, data, outSize, 0, 0, 0, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d2_float_rev_(float* data, float *reservedValue, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2) +{ + unsigned char *tmp_bytes = SZ_compress_rev(SZ_FLOAT, data, reservedValue, outSize, 0, 0, 0, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d3_float_(float* data, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2, size_t *r3) +{ + unsigned char *tmp_bytes = SZ_compress(SZ_FLOAT, data, outSize, 0, 0, *r3, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d3_float_rev_(float* data, float *reservedValue, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2, size_t *r3) +{ + unsigned char *tmp_bytes = SZ_compress_rev(SZ_FLOAT, data, reservedValue, outSize, 0, 0, *r3, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d4_float_(float* data, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2, size_t *r3, size_t *r4) +{ + unsigned char *tmp_bytes = SZ_compress(SZ_FLOAT, data, outSize, 0, *r4, *r3, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d4_float_rev_(float* data, float *reservedValue, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2, size_t *r3, size_t *r4) +{ + unsigned char *tmp_bytes = SZ_compress_rev(SZ_FLOAT, data, reservedValue, outSize, 0, *r4, *r3, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d5_float_(float* data, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5) +{ + unsigned char *tmp_bytes = SZ_compress(SZ_FLOAT, data, outSize, *r5, *r4, *r3, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d5_float_rev_(float* data, float *reservedValue, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5) +{ + unsigned char *tmp_bytes = SZ_compress_rev(SZ_FLOAT, data, reservedValue, outSize, *r5, *r4, *r3, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d1_double_(double* data, unsigned char *bytes, size_t *outSize, size_t *r1) +{ + unsigned char *tmp_bytes = SZ_compress(SZ_DOUBLE, data, outSize, 0, 0, 0, 0, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d1_double_rev_(double* data, double *reservedValue, unsigned char *bytes, size_t *outSize, size_t *r1) +{ + unsigned char *tmp_bytes = SZ_compress_rev(SZ_DOUBLE, data, reservedValue, outSize, 0, 0, 0, 0, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d2_double_(double* data, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2) +{ + unsigned char *tmp_bytes = SZ_compress(SZ_DOUBLE, data, outSize, 0, 0, 0, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d2_double_rev_(double* data, double *reservedValue, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2) +{ + unsigned char *tmp_bytes = SZ_compress_rev(SZ_DOUBLE, data, reservedValue, outSize, 0, 0, 0, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d3_double_(double* data, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2, size_t *r3) +{ + unsigned char *tmp_bytes = SZ_compress(SZ_DOUBLE, data, outSize, 0, 0, *r3, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d3_double_rev_(double* data, double *reservedValue, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2, size_t *r3) +{ + unsigned char *tmp_bytes = SZ_compress_rev(SZ_DOUBLE, data, reservedValue, outSize, 0, 0, *r3, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d4_double_(double* data, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2, size_t *r3, size_t *r4) +{ + unsigned char *tmp_bytes = SZ_compress(SZ_DOUBLE, data, outSize, 0, *r4, *r3, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d4_double_rev_(double* data, double *reservedValue, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2, size_t *r3, size_t *r4) +{ + unsigned char *tmp_bytes = SZ_compress_rev(SZ_DOUBLE, data, reservedValue, outSize, 0, *r4, *r3, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d5_double_(double* data, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5) +{ + unsigned char *tmp_bytes = SZ_compress(SZ_DOUBLE, data, outSize, *r5, *r4, *r3, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d5_double_rev_(double* data, double *reservedValue, unsigned char *bytes, size_t *outSize, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5) +{ + unsigned char *tmp_bytes = SZ_compress_rev(SZ_DOUBLE, data, reservedValue, outSize, *r5, *r4, *r3, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +//compress with args + +void sz_compress_d1_float_args_(float* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1) +{ + unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, 0, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d2_float_args_(float* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2) +{ + unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d3_float_args_(float* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3) +{ + unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, *r3, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d4_float_args_(float* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4) +{ + unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, *r4, *r3, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d5_float_args_(float* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5) +{ + unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, *r5, *r4, *r3, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d1_double_args_(double* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1) +{ + unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, 0, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d2_double_args_(double* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2) +{ + unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d3_double_args_(double* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3) +{ + unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, *r3, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d4_double_args_(double* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4) +{ + unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, *r4, *r3, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d5_double_args_(double* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5) +{ + unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, *r5, *r4, *r3, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +//-------------- + +void sz_compress_d1_float_rev_args_(float* data, float *reservedValue, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1) +{ + unsigned char *tmp_bytes = SZ_compress_rev_args(SZ_FLOAT, data, reservedValue, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0, 0, 0, 0, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d2_float_rev_args_(float* data, float *reservedValue, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2) +{ + unsigned char *tmp_bytes = SZ_compress_rev_args(SZ_FLOAT, data, reservedValue, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0, 0, 0, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d3_float_rev_args_(float* data, float *reservedValue, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3) +{ + unsigned char *tmp_bytes = SZ_compress_rev_args(SZ_FLOAT, data, reservedValue, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0, 0, *r3, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d4_float_rev_args_(float* data, float *reservedValue, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4) +{ + unsigned char *tmp_bytes = SZ_compress_rev_args(SZ_FLOAT, data, reservedValue, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0, *r4, *r3, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d5_float_rev_args_(float* data, float *reservedValue, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5) +{ + unsigned char *tmp_bytes = SZ_compress_rev_args(SZ_FLOAT, data, reservedValue, outSize, *errBoundMode, *absErrBound, *relBoundRatio, *r5, *r4, *r3, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d1_double_rev_args_(double* data, float *reservedValue, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1) +{ + unsigned char *tmp_bytes = SZ_compress_rev_args(SZ_DOUBLE, data, reservedValue, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0, 0, 0, 0, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d2_double_rev_args_(double* data, float *reservedValue, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2) +{ + unsigned char *tmp_bytes = SZ_compress_rev_args(SZ_DOUBLE, data, reservedValue, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0, 0, 0, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d3_double_rev_args_(double* data, float *reservedValue, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3) +{ + unsigned char *tmp_bytes = SZ_compress_rev_args(SZ_DOUBLE, data, reservedValue, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0, 0, *r3, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); +} + +void sz_compress_d4_double_rev_args_(double* data, double *reservedValue, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4) +{ + unsigned char *tmp_bytes = SZ_compress_rev_args(SZ_DOUBLE, data, reservedValue, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0, *r4, *r3, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +void sz_compress_d5_double_rev_args_(double* data, double *reservedValue, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5) +{ + unsigned char *tmp_bytes = SZ_compress_rev_args(SZ_DOUBLE, data, reservedValue, outSize, *errBoundMode, *absErrBound, *relBoundRatio, *r5, *r4, *r3, *r2, *r1); + memcpy(bytes, tmp_bytes, *outSize); + free(tmp_bytes); +} + +//decompress + +void sz_decompress_d1_float_(unsigned char *bytes, size_t *byteLength, float *data, size_t *r1) +{ + float *tmp_data = SZ_decompress(SZ_FLOAT, bytes, *byteLength, 0, 0, 0, 0, *r1); + memcpy(data, tmp_data, (*r1)*sizeof(float)); + free(tmp_data); +} + +void sz_decompress_d2_float_(unsigned char *bytes, size_t *byteLength, float *data, size_t *r1, size_t *r2) +{ + size_t r; + float *tmp_data = SZ_decompress(SZ_FLOAT, bytes, *byteLength, 0, 0, 0, *r2, *r1); + r=(*r1)*(*r2); + memcpy(data, tmp_data, r*sizeof(float)); + free(tmp_data); +} + +void sz_decompress_d3_float_(unsigned char *bytes, size_t *byteLength, float *data, size_t *r1, size_t *r2, size_t *r3) +{ + size_t r; + float *tmp_data = SZ_decompress(SZ_FLOAT, bytes, *byteLength, 0, 0, *r3, *r2, *r1); + r=(*r1)*(*r2)*(*r3); + memcpy(data, tmp_data, r*sizeof(float)); + free(tmp_data); +} + +void sz_decompress_d4_float_(unsigned char *bytes, size_t *byteLength, float *data, size_t *r1, size_t *r2, size_t *r3, size_t *r4) +{ + size_t r; + float *tmp_data = SZ_decompress(SZ_FLOAT, bytes, *byteLength, 0, *r4, *r3, *r2, *r1); + r=(*r1)*(*r2)*(*r3)*(*r4); + memcpy(data, tmp_data, r*sizeof(float)); + free(tmp_data); +} + +void sz_decompress_d5_float_(unsigned char *bytes, size_t *byteLength, float *data, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5) +{ + size_t r; + float *tmp_data = SZ_decompress(SZ_FLOAT, bytes, *byteLength, *r5, *r4, *r3, *r2, *r1); + r=(*r1)*(*r2)*(*r3)*(*r4)*(*r5); + memcpy(data, tmp_data, r*sizeof(float)); + free(tmp_data); +} + +void sz_decompress_d1_double_(unsigned char *bytes, size_t *byteLength, double *data, size_t *r1) +{ + double *tmp_data = SZ_decompress(SZ_DOUBLE, bytes, *byteLength, 0, 0, 0, 0, *r1); + memcpy(data, tmp_data, (*r1)*sizeof(double)); + free(tmp_data); +} + +void sz_decompress_d2_double_(unsigned char *bytes, size_t *byteLength, double *data, size_t *r1, size_t *r2) +{ + size_t r; + double *tmp_data = SZ_decompress(SZ_DOUBLE, bytes, *byteLength, 0, 0, 0, *r2, *r1); + r=(*r1)*(*r2); + memcpy(data, tmp_data, r*sizeof(double)); + free(tmp_data); +} + +void sz_decompress_d3_double_(unsigned char *bytes, size_t *byteLength, double *data, size_t *r1, size_t *r2, size_t *r3) +{ + size_t r; + double *tmp_data = SZ_decompress(SZ_DOUBLE, bytes, *byteLength, 0, 0, *r3, *r2, *r1); + r=(*r1)*(*r2)*(*r3); + memcpy(data, tmp_data, r*sizeof(double)); + free(tmp_data); +} + +void sz_decompress_d4_double_(unsigned char *bytes, size_t *byteLength, double *data, size_t *r1, size_t *r2, size_t *r3, size_t *r4) +{ + size_t r; + double *tmp_data = SZ_decompress(SZ_DOUBLE, bytes, *byteLength, 0, *r4, *r3, *r2, *r1); + r=(*r1)*(*r2)*(*r3)*(*r4); + memcpy(data, tmp_data, r*sizeof(double)); + free(tmp_data); +} + +void sz_decompress_d5_double_(unsigned char *bytes, size_t *byteLength, double *data, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5) +{ + size_t r; + double *tmp_data = SZ_decompress(SZ_DOUBLE, bytes, *byteLength, *r5, *r4, *r3, *r2, *r1); + r=(*r1)*(*r2)*(*r3)*(*r4)*(*r5); + memcpy(data, tmp_data, r*sizeof(double)); + free(tmp_data); +} + +//-----------------TODO: batch mode----------- +void sz_batchaddvar_d1_float_(int var_id, char* varName, int *len, float* data, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1) +{ + int i; + char s2[*len+1]; + for(i=0;i<*len;i++) + s2[i]=varName[i]; + s2[*len]='\0'; + SZ_batchAddVar(var_id, s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, 0, *r1); +} +void sz_batchaddvar_d2_float_(int var_id, char* varName, int *len, float* data, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2) +{ + int i; + char s2[*len+1]; + for(i=0;i<*len;i++) + s2[i]=varName[i]; + s2[*len]='\0'; + SZ_batchAddVar(var_id, s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, *r2, *r1); +} +void sz_batchaddvar_d3_float_(int var_id, char* varName, int *len, float* data, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3) +{ + int i; + char s2[*len+1]; + for(i=0;i<*len;i++) + s2[i]=varName[i]; + s2[*len]='\0'; + SZ_batchAddVar(var_id, s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, *r3, *r2, *r1); +} +void sz_batchaddvar_d4_float_(int var_id, char* varName, int *len, float* data, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4) +{ + int i; + char s2[*len+1]; + for(i=0;i<*len;i++) + s2[i]=varName[i]; + s2[*len]='\0'; + SZ_batchAddVar(var_id, s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, *r4, *r3, *r2, *r1); +} +void sz_batchaddvar_d5_float_(int var_id, char* varName, int *len, float* data, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5) +{ + int i; + char s2[*len+1]; + for(i=0;i<*len;i++) + s2[i]=varName[i]; + s2[*len]='\0'; + SZ_batchAddVar(var_id, s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, *r5, *r4, *r3, *r2, *r1); +} +void sz_batchaddvar_d1_double_(int var_id, char* varName, int *len, double* data, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1) +{ + int i; + char s2[*len+1]; + for(i=0;i<*len;i++) + s2[i]=varName[i]; + s2[*len]='\0'; + SZ_batchAddVar(var_id, s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, 0, *r1); +} +void sz_batchaddvar_d2_double_(int var_id, char* varName, int *len, double* data, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2) +{ + int i; + char s2[*len+1]; + for(i=0;i<*len;i++) + s2[i]=varName[i]; + s2[*len]='\0'; + SZ_batchAddVar(var_id, s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, *r2, *r1); +} +void sz_batchaddvar_d3_double_(int var_id, char* varName, int *len, double* data, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3) +{ + int i; + char s2[*len+1]; + for(i=0;i<*len;i++) + s2[i]=varName[i]; + s2[*len]='\0'; + SZ_batchAddVar(var_id, s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, *r3, *r2, *r1); +} +void sz_batchaddvar_d4_double_(int var_id, char* varName, int *len, double* data, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4) +{ + int i; + char s2[*len+1]; + for(i=0;i<*len;i++) + s2[i]=varName[i]; + s2[*len]='\0'; + SZ_batchAddVar(var_id, s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, *r4, *r3, *r2, *r1); +} +void sz_batchaddvar_d5_double_(int var_id, char* varName, int *len, double* data, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5) +{ + int i; + char s2[*len+1]; + for(i=0;i<*len;i++) + s2[i]=varName[i]; + s2[*len]='\0'; + SZ_batchAddVar(var_id, s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, *r5, *r4, *r3, *r2, *r1); +} +void sz_batchdelvar_c_(char* varName, int *len, int *errState) +{ + int i; + char s2[*len+1]; + for(i=0;i<*len;i++) + s2[i]=varName[i]; + s2[*len]='\0'; + *errState = SZ_batchDelVar(s2); +} + +/*@deprecated*/ +void sz_batch_compress_c_(unsigned char* bytes, size_t *outSize) +{ + //unsigned char* tmp_bytes = SZ_batch_compress(outSize); + //memcpy(bytes, tmp_bytes, *outSize); + //free(tmp_bytes); +} +/*@deprecated*/ +void sz_batch_decompress_c_(unsigned char* bytes, size_t *byteLength, int *ierr) +{ + //SZ_batch_decompress(bytes, *byteLength, ierr); +} + +void sz_getvardim_c_(char* varName, int *len, int *dim, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5) +{ + int i; + char s2[*len+1]; + for(i=0;i<*len;i++) + s2[i]=varName[i]; + s2[*len]='\0'; + + SZ_getVarData(s2, r5, r4, r3, r2, r1); + *dim = computeDimension(*r5, *r4, *r3, *r2, *r1); +} + +void compute_total_batch_size_c_(size_t *totalSize) +{ + *totalSize = compute_total_batch_size(); +} + +void sz_getvardata_float_(char* varName, int *len, float* data) +{ + int i; + size_t r1, r2, r3, r4, r5; + char s2[*len+1]; + for(i=0;i<*len;i++) + s2[i]=varName[i]; + s2[*len]='\0'; + + float* tmp_data = (float*)SZ_getVarData(s2, &r5, &r4, &r3, &r2, &r1); + int size = computeDataLength(r5, r4, r3, r2, r1); + memcpy(data, tmp_data, size*sizeof(float)); + free(tmp_data); +} +void sz_getvardata_double_(char* varName, int *len, double* data) +{ + int i; + size_t r1, r2, r3, r4, r5; + char s2[*len+1]; + for(i=0;i<*len;i++) + s2[i]=varName[i]; + s2[*len]='\0'; + + double* tmp_data = (double*)SZ_getVarData(s2, &r5, &r4, &r3, &r2, &r1); + int size = computeDataLength(r5, r4, r3, r2, r1); + memcpy(data, tmp_data, size*sizeof(double)); + //free(tmp_data); +} + +void sz_freevarset_c_(int *mode) +{ + SZ_freeVarSet(*mode); +} + diff --git a/deps/SZ/sz/src/utility.c b/deps/SZ/sz/src/utility.c new file mode 100644 index 0000000000000000000000000000000000000000..64788522064cdc724b6d60c3f0ec60583e790d61 --- /dev/null +++ b/deps/SZ/sz/src/utility.c @@ -0,0 +1,652 @@ +/** + * @file utility.c + * @author Sheng Di, Sihuan Li + * @date Aug, 2018 + * @brief + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include +#include "utility.h" +#include "sz.h" +#include "callZlib.h" +#include "zstd.h" + +int compare_struct(const void* obj1, const void* obj2){ + struct sort_ast_particle * srt1 = (struct sort_ast_particle*)obj1; + struct sort_ast_particle * srt2 = (struct sort_ast_particle*)obj2; + return srt1->id - srt2->id; +} + +void reorder_vars(SZ_VarSet* vset){ + SZ_Variable* v[7]; + SZ_Variable* v_tmp; + int i, j; + //v[0] + for (v_tmp = vset->header->next, i = 0; i < 7; i++){ + v[i] = v_tmp; + v_tmp = v_tmp->next; + } + //printf("here"); + size_t dataLen = computeDataLength(v[0]->r5, v[0]->r4, v[0]->r3, v[0]->r2, v[0]->r1); + //sihuan debug + //printf("the data length is (in sorting): %u", dataLen); + struct sort_ast_particle* particle = (struct sort_ast_particle*) malloc(sizeof(struct sort_ast_particle)*dataLen); + + for (i = 0; i < dataLen; i++){ + particle[i].id = ((int64_t*)v[6]->data)[i]; + // printf("%llu ", particle[i].id); + for (j = 0; j < 6; j++) + particle[i].var[j] = ((float*)v[j]->data)[i]; + } + + //sihuan debug + #if 0 + printf("index before sorting: \n"); + for (i = 0; i < 5; i++){ + printf("%llu ", particle[i].id); + printf("%.5f ", ((float*)v[0]->data)[i]); + } + #endif + //printf("\n"); + //sihuan debug + //for (i = 0; i < 5; i++)//{ + //for (j = 0; j < 6; j++) + // printf("%.5f ", particle[i].var[j]); + // printf("%llu ", particle[i].id ); + ///} + //printf("\n\n"); + + + qsort(particle, dataLen, sizeof(struct sort_ast_particle), compare_struct); + for (i = 0; i < dataLen; i++){ + ((int64_t*)v[6]->data)[i] = particle[i].id; + for (j = 0; j < 6; j++) + ((float*)v[j]->data)[i] = particle[i].var[j]; + } + free(particle); + + //sihuan debug + #if 0 + for (i = 0; i < 5; i++){ + printf("%llu ", particle[i].id); + printf("%.5f ", ((float*)v[0]->data)[i]); + } + printf("\n"); + #endif +} + +size_t intersectAndsort(int64_t* preIndex, size_t preLen, SZ_VarSet* curVar, size_t dataLen, unsigned char* bitarray){ + size_t i, j, k, m, cnt; + i = j = k = m = cnt = 0; + SZ_Variable* v[7]; + SZ_Variable* v_tmp; + //v[0] + for (v_tmp = curVar->header->next, i = 0; i < 7; i++){ + v[i] = v_tmp; + v_tmp = v_tmp->next; + } + for (i = 0; i < preLen; i++) + bitarray[i] = '0'; + i = 0; + while(i < preLen && j < dataLen){ + if (preIndex[i] == ((int64_t*)v[6]->data)[j]){ + cnt++; + int64_t tmp; + tmp = ((int64_t*)v[6]->data)[k]; + ((int64_t*)v[6]->data)[k] = ((int64_t*)v[6]->data)[j]; + ((int64_t*)v[6]->data)[j] = tmp; + float data_tmp; + for (m = 0; m < 6; m++){ + data_tmp = ((float*)v[m]->data)[k]; + ((float*)v[m]->data)[k] = ((float*)v[m]->data)[j]; + ((float*)v[m]->data)[j] = data_tmp; + } + k++; i++; j++; + } + else if (preIndex[i] < ((int64_t*)v[6]->data)[j]){ + bitarray[i] = '1'; + i++; + } + else j++; + } + printf("intersect count is: %zu, i j k pre curlen is: %zu, %zu, %zu, %zu, %zu\n\n", cnt, i, j, k, preLen, dataLen); + return cnt; +} + +void write_reordered_tofile(SZ_VarSet* curVar, size_t dataLen){ + int var_index; //0 for x, 1 for y...,3 for vx...5 for vz + int i; + char outputfile_name[256]; + SZ_Variable* v[7]; SZ_Variable* v_tmp; + for (v_tmp = curVar->header->next, i = 0; i < 6; i++){ + v[i] = v_tmp; + v_tmp = v_tmp->next; + } + for (var_index = 0; var_index < 6; var_index++){ + sprintf(outputfile_name, "reordered_input_%d_%d.in", sz_tsc->currentStep, var_index); + int status_tmp; + writeFloatData_inBytes((float*)v[var_index]->data, dataLen, outputfile_name, &status_tmp); + } +} + +float calculate_delta_t(size_t size){ + SZ_Variable* v_tmp = sz_varset->header->next; + while(strcmp(v_tmp->varName, "x")) v_tmp = v_tmp->next; + float* x1 = (float*) v_tmp->data; + float* x0 = (float*) v_tmp->multisteps->hist_data; + while(strcmp(v_tmp->varName, "vx")) v_tmp = v_tmp->next; + float* vx0 = (float*) v_tmp->multisteps->hist_data; + int i, j; + double denom = 0.0; + double div = 0.0; + for (i = 0, j = 0; i < size; i++, j++){ + while(sz_tsc->bit_array[j] == '1') j++; + denom += vx0[j] * (x1[i] - x0[j]); + div += vx0[j] * vx0[j]; + } + printf("the calculated delta_t is: %.10f\n", denom/div); + return denom/div; +} + +int is_lossless_compressed_data(unsigned char* compressedBytes, size_t cmpSize) +{ +#if ZSTD_VERSION_NUMBER >= 10300 + unsigned long long frameContentSize = ZSTD_getFrameContentSize(compressedBytes, cmpSize); + if(frameContentSize != ZSTD_CONTENTSIZE_ERROR) + return ZSTD_COMPRESSOR; +#else + unsigned long long frameContentSize = ZSTD_getDecompressedSize(compressedBytes, cmpSize); + if(frameContentSize != 0) + return ZSTD_COMPRESSOR; +#endif + int flag = isZlibFormat(compressedBytes[0], compressedBytes[1]); + if(flag) + return GZIP_COMPRESSOR; + + return -1; //fast mode (without GZIP or ZSTD) +} + +unsigned long sz_lossless_compress(int losslessCompressor, int level, unsigned char* data, unsigned long dataLength, unsigned char** compressBytes) +{ + unsigned long outSize = 0; + size_t estimatedCompressedSize = 0; + showme(); + switch(losslessCompressor) + { + case GZIP_COMPRESSOR: + outSize = zlib_compress5(data, dataLength, compressBytes, level); + break; + case ZSTD_COMPRESSOR: + if(dataLength < 100) + estimatedCompressedSize = 200; + else + estimatedCompressedSize = dataLength*1.2; + *compressBytes = (unsigned char*)malloc(estimatedCompressedSize); + outSize = ZSTD_compress(*compressBytes, estimatedCompressedSize, data, dataLength, level); //default setting of level is 3 + break; + default: + printf("Error: Unrecognized lossless compressor in sz_lossless_compress()\n"); + } + return outSize; +} + +unsigned long sz_lossless_decompress(int losslessCompressor, unsigned char* compressBytes, unsigned long cmpSize, unsigned char** oriData, unsigned long targetOriSize) +{ + unsigned long outSize = 0; + switch(losslessCompressor) + { + case GZIP_COMPRESSOR: + outSize = zlib_uncompress5(compressBytes, cmpSize, oriData, targetOriSize); + break; + case ZSTD_COMPRESSOR: + *oriData = (unsigned char*)malloc(targetOriSize); + ZSTD_decompress(*oriData, targetOriSize, compressBytes, cmpSize); + outSize = targetOriSize; + break; + default: + printf("Error: Unrecognized lossless compressor in sz_lossless_decompress()\n"); + } + return outSize; +} + +unsigned long sz_lossless_decompress65536bytes(int losslessCompressor, unsigned char* compressBytes, unsigned long cmpSize, unsigned char** oriData) +{ + unsigned long outSize = 0; + switch(losslessCompressor) + { + case GZIP_COMPRESSOR: + outSize = zlib_uncompress65536bytes(compressBytes, cmpSize, oriData); + break; + case ZSTD_COMPRESSOR: + *oriData = (unsigned char*)malloc(65536); + memset(*oriData, 0, 65536); + ZSTD_decompress(*oriData, 65536, compressBytes, cmpSize); //the first 32768 bytes should be exact the same. + outSize = 65536; + break; + default: + printf("Error: Unrecognized lossless compressor\n"); + } + return outSize; +} + +void* detransposeData(void* data, int dataType, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1) +{ + size_t len = computeDataLength(r5, r4, r3, r2, r1); + int dim = computeDimension(r5, r4, r3, r2, r1); + if(dataType == SZ_FLOAT) + { + float* ori_data = data; + float* new_data = (float*)malloc(sizeof(float)*len); + if(dim==1) + { + memcpy(new_data, ori_data, sizeof(float)*len); + return new_data; + } + else if(dim==2) + { + size_t i, j, s = 0; + for(i=0;i + ) + + diff --git a/deps/SZ/zlib/Makefile.am b/deps/SZ/zlib/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..1ad34fd9065130bcf5d8abe52bfb8ad193d04457 --- /dev/null +++ b/deps/SZ/zlib/Makefile.am @@ -0,0 +1,6 @@ +AUTOMAKE_OPTIONS=foreign +include_HEADERS=inffixed.h inflate.h inftrees.h trees.h zconf.h crc32.h deflate.h gzguts.h inffast.h zlib.h zutil.h +lib_LTLIBRARIES=libzlib.la +libzlib_la_CFLAGS=-I./ +libzlib_la_SOURCES=adler32.c crc32.c deflate.c gzclose.c gzlib.c gzwrite.c inffast.c zutil.c compress.c \ + gzread.c infback.c inflate.c inftrees.c trees.c uncompr.c diff --git a/deps/SZ/zlib/Makefile.in b/deps/SZ/zlib/Makefile.in new file mode 100644 index 0000000000000000000000000000000000000000..a0327365f124057ebfe2bdf851a121fe1933ca5b --- /dev/null +++ b/deps/SZ/zlib/Makefile.in @@ -0,0 +1,864 @@ +# Makefile.in generated by automake 1.16.2 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2020 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = zlib +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(include_HEADERS) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" +LTLIBRARIES = $(lib_LTLIBRARIES) +libzlib_la_LIBADD = +am_libzlib_la_OBJECTS = libzlib_la-adler32.lo libzlib_la-crc32.lo \ + libzlib_la-deflate.lo libzlib_la-gzclose.lo \ + libzlib_la-gzlib.lo libzlib_la-gzwrite.lo \ + libzlib_la-inffast.lo libzlib_la-zutil.lo \ + libzlib_la-compress.lo libzlib_la-gzread.lo \ + libzlib_la-infback.lo libzlib_la-inflate.lo \ + libzlib_la-inftrees.lo libzlib_la-trees.lo \ + libzlib_la-uncompr.lo +libzlib_la_OBJECTS = $(am_libzlib_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libzlib_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(libzlib_la_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/libzlib_la-adler32.Plo \ + ./$(DEPDIR)/libzlib_la-compress.Plo \ + ./$(DEPDIR)/libzlib_la-crc32.Plo \ + ./$(DEPDIR)/libzlib_la-deflate.Plo \ + ./$(DEPDIR)/libzlib_la-gzclose.Plo \ + ./$(DEPDIR)/libzlib_la-gzlib.Plo \ + ./$(DEPDIR)/libzlib_la-gzread.Plo \ + ./$(DEPDIR)/libzlib_la-gzwrite.Plo \ + ./$(DEPDIR)/libzlib_la-infback.Plo \ + ./$(DEPDIR)/libzlib_la-inffast.Plo \ + ./$(DEPDIR)/libzlib_la-inflate.Plo \ + ./$(DEPDIR)/libzlib_la-inftrees.Plo \ + ./$(DEPDIR)/libzlib_la-trees.Plo \ + ./$(DEPDIR)/libzlib_la-uncompr.Plo \ + ./$(DEPDIR)/libzlib_la-zutil.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libzlib_la_SOURCES) +DIST_SOURCES = $(libzlib_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +HEADERS = $(include_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FGREP = @FGREP@ +GREP = @GREP@ +GSL_CFLAGS = @GSL_CFLAGS@ +GSL_CONFIG = @GSL_CONFIG@ +GSL_HDR = @GSL_HDR@ +GSL_LIB = @GSL_LIB@ +GSL_LIBS = @GSL_LIBS@ +GSL_STATIC = @GSL_STATIC@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_FLAGS = @OPENMP_FLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PASTRI_FLAGS = @PASTRI_FLAGS@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +RANDOMACCESS_FLAGS = @RANDOMACCESS_FLAGS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +TIMECMPR_FLAGS = @TIMECMPR_FLAGS@ +VERSION = @VERSION@ +WRITESTATS_FLAGS = @WRITESTATS_FLAGS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AUTOMAKE_OPTIONS = foreign +include_HEADERS = inffixed.h inflate.h inftrees.h trees.h zconf.h crc32.h deflate.h gzguts.h inffast.h zlib.h zutil.h +lib_LTLIBRARIES = libzlib.la +libzlib_la_CFLAGS = -I./ +libzlib_la_SOURCES = adler32.c crc32.c deflate.c gzclose.c gzlib.c gzwrite.c inffast.c zutil.c compress.c \ + gzread.c infback.c inflate.c inftrees.c trees.c uncompr.c + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign zlib/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign zlib/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libzlib.la: $(libzlib_la_OBJECTS) $(libzlib_la_DEPENDENCIES) $(EXTRA_libzlib_la_DEPENDENCIES) + $(AM_V_CCLD)$(libzlib_la_LINK) -rpath $(libdir) $(libzlib_la_OBJECTS) $(libzlib_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzlib_la-adler32.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzlib_la-compress.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzlib_la-crc32.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzlib_la-deflate.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzlib_la-gzclose.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzlib_la-gzlib.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzlib_la-gzread.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzlib_la-gzwrite.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzlib_la-infback.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzlib_la-inffast.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzlib_la-inflate.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzlib_la-inftrees.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzlib_la-trees.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzlib_la-uncompr.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libzlib_la-zutil.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +libzlib_la-adler32.lo: adler32.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -MT libzlib_la-adler32.lo -MD -MP -MF $(DEPDIR)/libzlib_la-adler32.Tpo -c -o libzlib_la-adler32.lo `test -f 'adler32.c' || echo '$(srcdir)/'`adler32.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzlib_la-adler32.Tpo $(DEPDIR)/libzlib_la-adler32.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='adler32.c' object='libzlib_la-adler32.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -c -o libzlib_la-adler32.lo `test -f 'adler32.c' || echo '$(srcdir)/'`adler32.c + +libzlib_la-crc32.lo: crc32.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -MT libzlib_la-crc32.lo -MD -MP -MF $(DEPDIR)/libzlib_la-crc32.Tpo -c -o libzlib_la-crc32.lo `test -f 'crc32.c' || echo '$(srcdir)/'`crc32.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzlib_la-crc32.Tpo $(DEPDIR)/libzlib_la-crc32.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='crc32.c' object='libzlib_la-crc32.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -c -o libzlib_la-crc32.lo `test -f 'crc32.c' || echo '$(srcdir)/'`crc32.c + +libzlib_la-deflate.lo: deflate.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -MT libzlib_la-deflate.lo -MD -MP -MF $(DEPDIR)/libzlib_la-deflate.Tpo -c -o libzlib_la-deflate.lo `test -f 'deflate.c' || echo '$(srcdir)/'`deflate.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzlib_la-deflate.Tpo $(DEPDIR)/libzlib_la-deflate.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='deflate.c' object='libzlib_la-deflate.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -c -o libzlib_la-deflate.lo `test -f 'deflate.c' || echo '$(srcdir)/'`deflate.c + +libzlib_la-gzclose.lo: gzclose.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -MT libzlib_la-gzclose.lo -MD -MP -MF $(DEPDIR)/libzlib_la-gzclose.Tpo -c -o libzlib_la-gzclose.lo `test -f 'gzclose.c' || echo '$(srcdir)/'`gzclose.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzlib_la-gzclose.Tpo $(DEPDIR)/libzlib_la-gzclose.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gzclose.c' object='libzlib_la-gzclose.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -c -o libzlib_la-gzclose.lo `test -f 'gzclose.c' || echo '$(srcdir)/'`gzclose.c + +libzlib_la-gzlib.lo: gzlib.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -MT libzlib_la-gzlib.lo -MD -MP -MF $(DEPDIR)/libzlib_la-gzlib.Tpo -c -o libzlib_la-gzlib.lo `test -f 'gzlib.c' || echo '$(srcdir)/'`gzlib.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzlib_la-gzlib.Tpo $(DEPDIR)/libzlib_la-gzlib.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gzlib.c' object='libzlib_la-gzlib.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -c -o libzlib_la-gzlib.lo `test -f 'gzlib.c' || echo '$(srcdir)/'`gzlib.c + +libzlib_la-gzwrite.lo: gzwrite.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -MT libzlib_la-gzwrite.lo -MD -MP -MF $(DEPDIR)/libzlib_la-gzwrite.Tpo -c -o libzlib_la-gzwrite.lo `test -f 'gzwrite.c' || echo '$(srcdir)/'`gzwrite.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzlib_la-gzwrite.Tpo $(DEPDIR)/libzlib_la-gzwrite.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gzwrite.c' object='libzlib_la-gzwrite.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -c -o libzlib_la-gzwrite.lo `test -f 'gzwrite.c' || echo '$(srcdir)/'`gzwrite.c + +libzlib_la-inffast.lo: inffast.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -MT libzlib_la-inffast.lo -MD -MP -MF $(DEPDIR)/libzlib_la-inffast.Tpo -c -o libzlib_la-inffast.lo `test -f 'inffast.c' || echo '$(srcdir)/'`inffast.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzlib_la-inffast.Tpo $(DEPDIR)/libzlib_la-inffast.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='inffast.c' object='libzlib_la-inffast.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -c -o libzlib_la-inffast.lo `test -f 'inffast.c' || echo '$(srcdir)/'`inffast.c + +libzlib_la-zutil.lo: zutil.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -MT libzlib_la-zutil.lo -MD -MP -MF $(DEPDIR)/libzlib_la-zutil.Tpo -c -o libzlib_la-zutil.lo `test -f 'zutil.c' || echo '$(srcdir)/'`zutil.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzlib_la-zutil.Tpo $(DEPDIR)/libzlib_la-zutil.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='zutil.c' object='libzlib_la-zutil.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -c -o libzlib_la-zutil.lo `test -f 'zutil.c' || echo '$(srcdir)/'`zutil.c + +libzlib_la-compress.lo: compress.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -MT libzlib_la-compress.lo -MD -MP -MF $(DEPDIR)/libzlib_la-compress.Tpo -c -o libzlib_la-compress.lo `test -f 'compress.c' || echo '$(srcdir)/'`compress.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzlib_la-compress.Tpo $(DEPDIR)/libzlib_la-compress.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compress.c' object='libzlib_la-compress.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -c -o libzlib_la-compress.lo `test -f 'compress.c' || echo '$(srcdir)/'`compress.c + +libzlib_la-gzread.lo: gzread.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -MT libzlib_la-gzread.lo -MD -MP -MF $(DEPDIR)/libzlib_la-gzread.Tpo -c -o libzlib_la-gzread.lo `test -f 'gzread.c' || echo '$(srcdir)/'`gzread.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzlib_la-gzread.Tpo $(DEPDIR)/libzlib_la-gzread.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gzread.c' object='libzlib_la-gzread.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -c -o libzlib_la-gzread.lo `test -f 'gzread.c' || echo '$(srcdir)/'`gzread.c + +libzlib_la-infback.lo: infback.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -MT libzlib_la-infback.lo -MD -MP -MF $(DEPDIR)/libzlib_la-infback.Tpo -c -o libzlib_la-infback.lo `test -f 'infback.c' || echo '$(srcdir)/'`infback.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzlib_la-infback.Tpo $(DEPDIR)/libzlib_la-infback.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='infback.c' object='libzlib_la-infback.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -c -o libzlib_la-infback.lo `test -f 'infback.c' || echo '$(srcdir)/'`infback.c + +libzlib_la-inflate.lo: inflate.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -MT libzlib_la-inflate.lo -MD -MP -MF $(DEPDIR)/libzlib_la-inflate.Tpo -c -o libzlib_la-inflate.lo `test -f 'inflate.c' || echo '$(srcdir)/'`inflate.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzlib_la-inflate.Tpo $(DEPDIR)/libzlib_la-inflate.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='inflate.c' object='libzlib_la-inflate.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -c -o libzlib_la-inflate.lo `test -f 'inflate.c' || echo '$(srcdir)/'`inflate.c + +libzlib_la-inftrees.lo: inftrees.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -MT libzlib_la-inftrees.lo -MD -MP -MF $(DEPDIR)/libzlib_la-inftrees.Tpo -c -o libzlib_la-inftrees.lo `test -f 'inftrees.c' || echo '$(srcdir)/'`inftrees.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzlib_la-inftrees.Tpo $(DEPDIR)/libzlib_la-inftrees.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='inftrees.c' object='libzlib_la-inftrees.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -c -o libzlib_la-inftrees.lo `test -f 'inftrees.c' || echo '$(srcdir)/'`inftrees.c + +libzlib_la-trees.lo: trees.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -MT libzlib_la-trees.lo -MD -MP -MF $(DEPDIR)/libzlib_la-trees.Tpo -c -o libzlib_la-trees.lo `test -f 'trees.c' || echo '$(srcdir)/'`trees.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzlib_la-trees.Tpo $(DEPDIR)/libzlib_la-trees.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='trees.c' object='libzlib_la-trees.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -c -o libzlib_la-trees.lo `test -f 'trees.c' || echo '$(srcdir)/'`trees.c + +libzlib_la-uncompr.lo: uncompr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -MT libzlib_la-uncompr.lo -MD -MP -MF $(DEPDIR)/libzlib_la-uncompr.Tpo -c -o libzlib_la-uncompr.lo `test -f 'uncompr.c' || echo '$(srcdir)/'`uncompr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libzlib_la-uncompr.Tpo $(DEPDIR)/libzlib_la-uncompr.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uncompr.c' object='libzlib_la-uncompr.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzlib_la_CFLAGS) $(CFLAGS) -c -o libzlib_la-uncompr.lo `test -f 'uncompr.c' || echo '$(srcdir)/'`uncompr.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-includeHEADERS: $(include_HEADERS) + @$(NORMAL_INSTALL) + @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(includedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(includedir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \ + done + +uninstall-includeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir) + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) $(HEADERS) +installdirs: + for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/libzlib_la-adler32.Plo + -rm -f ./$(DEPDIR)/libzlib_la-compress.Plo + -rm -f ./$(DEPDIR)/libzlib_la-crc32.Plo + -rm -f ./$(DEPDIR)/libzlib_la-deflate.Plo + -rm -f ./$(DEPDIR)/libzlib_la-gzclose.Plo + -rm -f ./$(DEPDIR)/libzlib_la-gzlib.Plo + -rm -f ./$(DEPDIR)/libzlib_la-gzread.Plo + -rm -f ./$(DEPDIR)/libzlib_la-gzwrite.Plo + -rm -f ./$(DEPDIR)/libzlib_la-infback.Plo + -rm -f ./$(DEPDIR)/libzlib_la-inffast.Plo + -rm -f ./$(DEPDIR)/libzlib_la-inflate.Plo + -rm -f ./$(DEPDIR)/libzlib_la-inftrees.Plo + -rm -f ./$(DEPDIR)/libzlib_la-trees.Plo + -rm -f ./$(DEPDIR)/libzlib_la-uncompr.Plo + -rm -f ./$(DEPDIR)/libzlib_la-zutil.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-includeHEADERS + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-libLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/libzlib_la-adler32.Plo + -rm -f ./$(DEPDIR)/libzlib_la-compress.Plo + -rm -f ./$(DEPDIR)/libzlib_la-crc32.Plo + -rm -f ./$(DEPDIR)/libzlib_la-deflate.Plo + -rm -f ./$(DEPDIR)/libzlib_la-gzclose.Plo + -rm -f ./$(DEPDIR)/libzlib_la-gzlib.Plo + -rm -f ./$(DEPDIR)/libzlib_la-gzread.Plo + -rm -f ./$(DEPDIR)/libzlib_la-gzwrite.Plo + -rm -f ./$(DEPDIR)/libzlib_la-infback.Plo + -rm -f ./$(DEPDIR)/libzlib_la-inffast.Plo + -rm -f ./$(DEPDIR)/libzlib_la-inflate.Plo + -rm -f ./$(DEPDIR)/libzlib_la-inftrees.Plo + -rm -f ./$(DEPDIR)/libzlib_la-trees.Plo + -rm -f ./$(DEPDIR)/libzlib_la-uncompr.Plo + -rm -f ./$(DEPDIR)/libzlib_la-zutil.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-includeHEADERS uninstall-libLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-generic clean-libLTLIBRARIES clean-libtool cscopelist-am \ + ctags ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am \ + install-includeHEADERS install-info install-info-am \ + install-libLTLIBRARIES install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am uninstall-includeHEADERS \ + uninstall-libLTLIBRARIES + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/deps/SZ/zlib/adler32.c b/deps/SZ/zlib/adler32.c new file mode 100644 index 0000000000000000000000000000000000000000..d0be4380a39c9c5bf439b1552c43585b5aafad0a --- /dev/null +++ b/deps/SZ/zlib/adler32.c @@ -0,0 +1,186 @@ +/* adler32.c -- compute the Adler-32 checksum of a data stream + * Copyright (C) 1995-2011, 2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#include "zutil.h" + +local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2)); + +#define BASE 65521U /* largest prime smaller than 65536 */ +#define NMAX 5552 +/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ + +#define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;} +#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); +#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); +#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); +#define DO16(buf) DO8(buf,0); DO8(buf,8); + +/* use NO_DIVIDE if your processor does not do division in hardware -- + try it both ways to see which is faster */ +#ifdef NO_DIVIDE +/* note that this assumes BASE is 65521, where 65536 % 65521 == 15 + (thank you to John Reiser for pointing this out) */ +# define CHOP(a) \ + do { \ + unsigned long tmp = a >> 16; \ + a &= 0xffffUL; \ + a += (tmp << 4) - tmp; \ + } while (0) +# define MOD28(a) \ + do { \ + CHOP(a); \ + if (a >= BASE) a -= BASE; \ + } while (0) +# define MOD(a) \ + do { \ + CHOP(a); \ + MOD28(a); \ + } while (0) +# define MOD63(a) \ + do { /* this assumes a is not negative */ \ + z_off64_t tmp = a >> 32; \ + a &= 0xffffffffL; \ + a += (tmp << 8) - (tmp << 5) + tmp; \ + tmp = a >> 16; \ + a &= 0xffffL; \ + a += (tmp << 4) - tmp; \ + tmp = a >> 16; \ + a &= 0xffffL; \ + a += (tmp << 4) - tmp; \ + if (a >= BASE) a -= BASE; \ + } while (0) +#else +# define MOD(a) a %= BASE +# define MOD28(a) a %= BASE +# define MOD63(a) a %= BASE +#endif + +/* ========================================================================= */ +uLong ZEXPORT adler32_z(adler, buf, len) + uLong adler; + const Bytef *buf; + z_size_t len; +{ + unsigned long sum2; + unsigned n; + + /* split Adler-32 into component sums */ + sum2 = (adler >> 16) & 0xffff; + adler &= 0xffff; + + /* in case user likes doing a byte at a time, keep it fast */ + if (len == 1) { + adler += buf[0]; + if (adler >= BASE) + adler -= BASE; + sum2 += adler; + if (sum2 >= BASE) + sum2 -= BASE; + return adler | (sum2 << 16); + } + + /* initial Adler-32 value (deferred check for len == 1 speed) */ + if (buf == Z_NULL) + return 1L; + + /* in case short lengths are provided, keep it somewhat fast */ + if (len < 16) { + while (len--) { + adler += *buf++; + sum2 += adler; + } + if (adler >= BASE) + adler -= BASE; + MOD28(sum2); /* only added so many BASE's */ + return adler | (sum2 << 16); + } + + /* do length NMAX blocks -- requires just one modulo operation */ + while (len >= NMAX) { + len -= NMAX; + n = NMAX / 16; /* NMAX is divisible by 16 */ + do { + DO16(buf); /* 16 sums unrolled */ + buf += 16; + } while (--n); + MOD(adler); + MOD(sum2); + } + + /* do remaining bytes (less than NMAX, still just one modulo) */ + if (len) { /* avoid modulos if none remaining */ + while (len >= 16) { + len -= 16; + DO16(buf); + buf += 16; + } + while (len--) { + adler += *buf++; + sum2 += adler; + } + MOD(adler); + MOD(sum2); + } + + /* return recombined sums */ + return adler | (sum2 << 16); +} + +/* ========================================================================= */ +uLong ZEXPORT adler32(adler, buf, len) + uLong adler; + const Bytef *buf; + uInt len; +{ + return adler32_z(adler, buf, len); +} + +/* ========================================================================= */ +local uLong adler32_combine_(adler1, adler2, len2) + uLong adler1; + uLong adler2; + z_off64_t len2; +{ + unsigned long sum1; + unsigned long sum2; + unsigned rem; + + /* for negative len, return invalid adler32 as a clue for debugging */ + if (len2 < 0) + return 0xffffffffUL; + + /* the derivation of this formula is left as an exercise for the reader */ + MOD63(len2); /* assumes len2 >= 0 */ + rem = (unsigned)len2; + sum1 = adler1 & 0xffff; + sum2 = rem * sum1; + MOD(sum2); + sum1 += (adler2 & 0xffff) + BASE - 1; + sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem; + if (sum1 >= BASE) sum1 -= BASE; + if (sum1 >= BASE) sum1 -= BASE; + if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1); + if (sum2 >= BASE) sum2 -= BASE; + return sum1 | (sum2 << 16); +} + +/* ========================================================================= */ +uLong ZEXPORT adler32_combine(adler1, adler2, len2) + uLong adler1; + uLong adler2; + z_off_t len2; +{ + return adler32_combine_(adler1, adler2, len2); +} + +uLong ZEXPORT adler32_combine64(adler1, adler2, len2) + uLong adler1; + uLong adler2; + z_off64_t len2; +{ + return adler32_combine_(adler1, adler2, len2); +} diff --git a/deps/SZ/zlib/compress.c b/deps/SZ/zlib/compress.c new file mode 100644 index 0000000000000000000000000000000000000000..e2db404abf888bd2c85844985b5ae9784b955c63 --- /dev/null +++ b/deps/SZ/zlib/compress.c @@ -0,0 +1,86 @@ +/* compress.c -- compress a memory buffer + * Copyright (C) 1995-2005, 2014, 2016 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#define ZLIB_INTERNAL +#include "zlib.h" + +/* =========================================================================== + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least 0.1% larger than sourceLen plus + 12 bytes. Upon exit, destLen is the actual size of the compressed buffer. + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +*/ +int ZEXPORT compress2 (dest, destLen, source, sourceLen, level) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong sourceLen; + int level; +{ + z_stream stream; + int err; + const uInt max = (uInt)-1; + uLong left; + + left = *destLen; + *destLen = 0; + + stream.zalloc = (alloc_func)0; + stream.zfree = (free_func)0; + stream.opaque = (voidpf)0; + + err = deflateInit(&stream, level); + if (err != Z_OK) return err; + + stream.next_out = dest; + stream.avail_out = 0; + stream.next_in = (z_const Bytef *)source; + stream.avail_in = 0; + + do { + if (stream.avail_out == 0) { + stream.avail_out = left > (uLong)max ? max : (uInt)left; + left -= stream.avail_out; + } + if (stream.avail_in == 0) { + stream.avail_in = sourceLen > (uLong)max ? max : (uInt)sourceLen; + sourceLen -= stream.avail_in; + } + err = deflate(&stream, sourceLen ? Z_NO_FLUSH : Z_FINISH); + } while (err == Z_OK); + + *destLen = stream.total_out; + deflateEnd(&stream); + return err == Z_STREAM_END ? Z_OK : err; +} + +/* =========================================================================== + */ +int ZEXPORT compress (dest, destLen, source, sourceLen) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong sourceLen; +{ + return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION); +} + +/* =========================================================================== + If the default memLevel or windowBits for deflateInit() is changed, then + this function needs to be updated. + */ +uLong ZEXPORT compressBound (sourceLen) + uLong sourceLen; +{ + return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + + (sourceLen >> 25) + 13; +} diff --git a/deps/SZ/zlib/crc32.c b/deps/SZ/zlib/crc32.c new file mode 100644 index 0000000000000000000000000000000000000000..9580440c0e6b673c43e57daab03274ebdca8f77e --- /dev/null +++ b/deps/SZ/zlib/crc32.c @@ -0,0 +1,442 @@ +/* crc32.c -- compute the CRC-32 of a data stream + * Copyright (C) 1995-2006, 2010, 2011, 2012, 2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * + * Thanks to Rodney Brown for his contribution of faster + * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing + * tables for updating the shift register in one step with three exclusive-ors + * instead of four steps with four exclusive-ors. This results in about a + * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3. + */ + +/* @(#) $Id$ */ + +/* + Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore + protection on the static variables used to control the first-use generation + of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should + first call get_crc_table() to initialize the tables before allowing more than + one thread to use crc32(). + + DYNAMIC_CRC_TABLE and MAKECRCH can be #defined to write out crc32.h. + */ + +#ifdef MAKECRCH +# include +# ifndef DYNAMIC_CRC_TABLE +# define DYNAMIC_CRC_TABLE +# endif /* !DYNAMIC_CRC_TABLE */ +#endif /* MAKECRCH */ + +#include "zutil.h" /* for STDC and FAR definitions */ + +/* Definitions for doing the crc four data bytes at a time. */ +#if !defined(NOBYFOUR) && defined(Z_U4) +# define BYFOUR +#endif +#ifdef BYFOUR + local unsigned long crc32_little OF((unsigned long, + const unsigned char FAR *, z_size_t)); + local unsigned long crc32_big OF((unsigned long, + const unsigned char FAR *, z_size_t)); +# define TBLS 8 +#else +# define TBLS 1 +#endif /* BYFOUR */ + +/* Local functions for crc concatenation */ +local unsigned long gf2_matrix_times OF((unsigned long *mat, + unsigned long vec)); +local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat)); +local uLong crc32_combine_ OF((uLong crc1, uLong crc2, z_off64_t len2)); + + +#ifdef DYNAMIC_CRC_TABLE + +local volatile int crc_table_empty = 1; +local z_crc_t FAR crc_table[TBLS][256]; +local void make_crc_table OF((void)); +#ifdef MAKECRCH + local void write_table OF((FILE *, const z_crc_t FAR *)); +#endif /* MAKECRCH */ +/* + Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: + x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. + + Polynomials over GF(2) are represented in binary, one bit per coefficient, + with the lowest powers in the most significant bit. Then adding polynomials + is just exclusive-or, and multiplying a polynomial by x is a right shift by + one. If we call the above polynomial p, and represent a byte as the + polynomial q, also with the lowest power in the most significant bit (so the + byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p, + where a mod b means the remainder after dividing a by b. + + This calculation is done using the shift-register method of multiplying and + taking the remainder. The register is initialized to zero, and for each + incoming bit, x^32 is added mod p to the register if the bit is a one (where + x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by + x (which is shifting right by one and adding x^32 mod p if the bit shifted + out is a one). We start with the highest power (least significant bit) of + q and repeat for all eight bits of q. + + The first table is simply the CRC of all possible eight bit values. This is + all the information needed to generate CRCs on data a byte at a time for all + combinations of CRC register values and incoming bytes. The remaining tables + allow for word-at-a-time CRC calculation for both big-endian and little- + endian machines, where a word is four bytes. +*/ +local void make_crc_table() +{ + z_crc_t c; + int n, k; + z_crc_t poly; /* polynomial exclusive-or pattern */ + /* terms of polynomial defining this crc (except x^32): */ + static volatile int first = 1; /* flag to limit concurrent making */ + static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26}; + + /* See if another task is already doing this (not thread-safe, but better + than nothing -- significantly reduces duration of vulnerability in + case the advice about DYNAMIC_CRC_TABLE is ignored) */ + if (first) { + first = 0; + + /* make exclusive-or pattern from polynomial (0xedb88320UL) */ + poly = 0; + for (n = 0; n < (int)(sizeof(p)/sizeof(unsigned char)); n++) + poly |= (z_crc_t)1 << (31 - p[n]); + + /* generate a crc for every 8-bit value */ + for (n = 0; n < 256; n++) { + c = (z_crc_t)n; + for (k = 0; k < 8; k++) + c = c & 1 ? poly ^ (c >> 1) : c >> 1; + crc_table[0][n] = c; + } + +#ifdef BYFOUR + /* generate crc for each value followed by one, two, and three zeros, + and then the byte reversal of those as well as the first table */ + for (n = 0; n < 256; n++) { + c = crc_table[0][n]; + crc_table[4][n] = ZSWAP32(c); + for (k = 1; k < 4; k++) { + c = crc_table[0][c & 0xff] ^ (c >> 8); + crc_table[k][n] = c; + crc_table[k + 4][n] = ZSWAP32(c); + } + } +#endif /* BYFOUR */ + + crc_table_empty = 0; + } + else { /* not first */ + /* wait for the other guy to finish (not efficient, but rare) */ + while (crc_table_empty) + ; + } + +#ifdef MAKECRCH + /* write out CRC tables to crc32.h */ + { + FILE *out; + + out = fopen("crc32.h", "w"); + if (out == NULL) return; + fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n"); + fprintf(out, " * Generated automatically by crc32.c\n */\n\n"); + fprintf(out, "local const z_crc_t FAR "); + fprintf(out, "crc_table[TBLS][256] =\n{\n {\n"); + write_table(out, crc_table[0]); +# ifdef BYFOUR + fprintf(out, "#ifdef BYFOUR\n"); + for (k = 1; k < 8; k++) { + fprintf(out, " },\n {\n"); + write_table(out, crc_table[k]); + } + fprintf(out, "#endif\n"); +# endif /* BYFOUR */ + fprintf(out, " }\n};\n"); + fclose(out); + } +#endif /* MAKECRCH */ +} + +#ifdef MAKECRCH +local void write_table(out, table) + FILE *out; + const z_crc_t FAR *table; +{ + int n; + + for (n = 0; n < 256; n++) + fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : " ", + (unsigned long)(table[n]), + n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", ")); +} +#endif /* MAKECRCH */ + +#else /* !DYNAMIC_CRC_TABLE */ +/* ======================================================================== + * Tables of CRC-32s of all single-byte values, made by make_crc_table(). + */ +#include "crc32.h" +#endif /* DYNAMIC_CRC_TABLE */ + +/* ========================================================================= + * This function can be used by asm versions of crc32() + */ +const z_crc_t FAR * ZEXPORT get_crc_table() +{ +#ifdef DYNAMIC_CRC_TABLE + if (crc_table_empty) + make_crc_table(); +#endif /* DYNAMIC_CRC_TABLE */ + return (const z_crc_t FAR *)crc_table; +} + +/* ========================================================================= */ +#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8) +#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 + +/* ========================================================================= */ +unsigned long ZEXPORT crc32_z(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + z_size_t len; +{ + if (buf == Z_NULL) return 0UL; + +#ifdef DYNAMIC_CRC_TABLE + if (crc_table_empty) + make_crc_table(); +#endif /* DYNAMIC_CRC_TABLE */ + +#ifdef BYFOUR + if (sizeof(void *) == sizeof(ptrdiff_t)) { + z_crc_t endian; + + endian = 1; + if (*((unsigned char *)(&endian))) + return crc32_little(crc, buf, len); + else + return crc32_big(crc, buf, len); + } +#endif /* BYFOUR */ + crc = crc ^ 0xffffffffUL; + while (len >= 8) { + DO8; + len -= 8; + } + if (len) do { + DO1; + } while (--len); + return crc ^ 0xffffffffUL; +} + +/* ========================================================================= */ +unsigned long ZEXPORT crc32(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + uInt len; +{ + return crc32_z(crc, buf, len); +} + +#ifdef BYFOUR + +/* + This BYFOUR code accesses the passed unsigned char * buffer with a 32-bit + integer pointer type. This violates the strict aliasing rule, where a + compiler can assume, for optimization purposes, that two pointers to + fundamentally different types won't ever point to the same memory. This can + manifest as a problem only if one of the pointers is written to. This code + only reads from those pointers. So long as this code remains isolated in + this compilation unit, there won't be a problem. For this reason, this code + should not be copied and pasted into a compilation unit in which other code + writes to the buffer that is passed to these routines. + */ + +/* ========================================================================= */ +#define DOLIT4 c ^= *buf4++; \ + c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \ + crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24] +#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4 + +/* ========================================================================= */ +local unsigned long crc32_little(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + z_size_t len; +{ + register z_crc_t c; + register const z_crc_t FAR *buf4; + + c = (z_crc_t)crc; + c = ~c; + while (len && ((ptrdiff_t)buf & 3)) { + c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); + len--; + } + + buf4 = (const z_crc_t FAR *)(const void FAR *)buf; + while (len >= 32) { + DOLIT32; + len -= 32; + } + while (len >= 4) { + DOLIT4; + len -= 4; + } + buf = (const unsigned char FAR *)buf4; + + if (len) do { + c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); + } while (--len); + c = ~c; + return (unsigned long)c; +} + +/* ========================================================================= */ +#define DOBIG4 c ^= *buf4++; \ + c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ + crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] +#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 + +/* ========================================================================= */ +local unsigned long crc32_big(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + z_size_t len; +{ + register z_crc_t c; + register const z_crc_t FAR *buf4; + + c = ZSWAP32((z_crc_t)crc); + c = ~c; + while (len && ((ptrdiff_t)buf & 3)) { + c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); + len--; + } + + buf4 = (const z_crc_t FAR *)(const void FAR *)buf; + while (len >= 32) { + DOBIG32; + len -= 32; + } + while (len >= 4) { + DOBIG4; + len -= 4; + } + buf = (const unsigned char FAR *)buf4; + + if (len) do { + c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); + } while (--len); + c = ~c; + return (unsigned long)(ZSWAP32(c)); +} + +#endif /* BYFOUR */ + +#define GF2_DIM 32 /* dimension of GF(2) vectors (length of CRC) */ + +/* ========================================================================= */ +local unsigned long gf2_matrix_times(mat, vec) + unsigned long *mat; + unsigned long vec; +{ + unsigned long sum; + + sum = 0; + while (vec) { + if (vec & 1) + sum ^= *mat; + vec >>= 1; + mat++; + } + return sum; +} + +/* ========================================================================= */ +local void gf2_matrix_square(square, mat) + unsigned long *square; + unsigned long *mat; +{ + int n; + + for (n = 0; n < GF2_DIM; n++) + square[n] = gf2_matrix_times(mat, mat[n]); +} + +/* ========================================================================= */ +local uLong crc32_combine_(crc1, crc2, len2) + uLong crc1; + uLong crc2; + z_off64_t len2; +{ + int n; + unsigned long row; + unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */ + unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */ + + /* degenerate case (also disallow negative lengths) */ + if (len2 <= 0) + return crc1; + + /* put operator for one zero bit in odd */ + odd[0] = 0xedb88320UL; /* CRC-32 polynomial */ + row = 1; + for (n = 1; n < GF2_DIM; n++) { + odd[n] = row; + row <<= 1; + } + + /* put operator for two zero bits in even */ + gf2_matrix_square(even, odd); + + /* put operator for four zero bits in odd */ + gf2_matrix_square(odd, even); + + /* apply len2 zeros to crc1 (first square will put the operator for one + zero byte, eight zero bits, in even) */ + do { + /* apply zeros operator for this bit of len2 */ + gf2_matrix_square(even, odd); + if (len2 & 1) + crc1 = gf2_matrix_times(even, crc1); + len2 >>= 1; + + /* if no more bits set, then done */ + if (len2 == 0) + break; + + /* another iteration of the loop with odd and even swapped */ + gf2_matrix_square(odd, even); + if (len2 & 1) + crc1 = gf2_matrix_times(odd, crc1); + len2 >>= 1; + + /* if no more bits set, then done */ + } while (len2 != 0); + + /* return combined crc */ + crc1 ^= crc2; + return crc1; +} + +/* ========================================================================= */ +uLong ZEXPORT crc32_combine(crc1, crc2, len2) + uLong crc1; + uLong crc2; + z_off_t len2; +{ + return crc32_combine_(crc1, crc2, len2); +} + +uLong ZEXPORT crc32_combine64(crc1, crc2, len2) + uLong crc1; + uLong crc2; + z_off64_t len2; +{ + return crc32_combine_(crc1, crc2, len2); +} diff --git a/deps/SZ/zlib/crc32.h b/deps/SZ/zlib/crc32.h new file mode 100644 index 0000000000000000000000000000000000000000..9e0c7781025148380d130d6f7b6e590117ad3a8c --- /dev/null +++ b/deps/SZ/zlib/crc32.h @@ -0,0 +1,441 @@ +/* crc32.h -- tables for rapid CRC calculation + * Generated automatically by crc32.c + */ + +local const z_crc_t FAR crc_table[TBLS][256] = +{ + { + 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, + 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, + 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, + 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, + 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, + 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, + 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, + 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, + 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, + 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, + 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, + 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, + 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, + 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, + 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, + 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, + 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, + 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, + 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, + 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, + 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, + 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, + 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, + 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, + 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, + 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, + 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, + 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, + 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, + 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, + 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, + 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, + 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, + 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, + 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, + 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, + 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, + 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, + 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, + 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, + 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, + 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, + 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, + 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, + 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, + 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, + 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, + 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, + 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, + 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, + 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, + 0x2d02ef8dUL +#ifdef BYFOUR + }, + { + 0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL, + 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL, + 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL, + 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL, + 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL, + 0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL, + 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL, + 0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL, + 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL, + 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL, + 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL, + 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL, + 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL, + 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL, + 0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL, + 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL, + 0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL, + 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL, + 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL, + 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL, + 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL, + 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL, + 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL, + 0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL, + 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL, + 0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL, + 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL, + 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL, + 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL, + 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL, + 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL, + 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL, + 0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL, + 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL, + 0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL, + 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL, + 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL, + 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL, + 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL, + 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL, + 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL, + 0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL, + 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL, + 0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL, + 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL, + 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL, + 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL, + 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL, + 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL, + 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL, + 0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL, + 0x9324fd72UL + }, + { + 0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL, + 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL, + 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL, + 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL, + 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL, + 0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL, + 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL, + 0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL, + 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL, + 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL, + 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL, + 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL, + 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL, + 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL, + 0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL, + 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL, + 0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL, + 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL, + 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL, + 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL, + 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL, + 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL, + 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL, + 0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL, + 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL, + 0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL, + 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL, + 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL, + 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL, + 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL, + 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL, + 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL, + 0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL, + 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL, + 0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL, + 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL, + 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL, + 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL, + 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL, + 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL, + 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL, + 0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL, + 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL, + 0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL, + 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL, + 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL, + 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL, + 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL, + 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL, + 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL, + 0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL, + 0xbe9834edUL + }, + { + 0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL, + 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL, + 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL, + 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL, + 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL, + 0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL, + 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL, + 0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL, + 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL, + 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL, + 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL, + 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL, + 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL, + 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL, + 0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL, + 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL, + 0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL, + 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL, + 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL, + 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL, + 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL, + 0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL, + 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL, + 0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL, + 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL, + 0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL, + 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL, + 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL, + 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL, + 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL, + 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL, + 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL, + 0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL, + 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL, + 0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL, + 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL, + 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL, + 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL, + 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL, + 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL, + 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL, + 0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL, + 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL, + 0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL, + 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL, + 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL, + 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL, + 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL, + 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL, + 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL, + 0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL, + 0xde0506f1UL + }, + { + 0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL, + 0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL, + 0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL, + 0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL, + 0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL, + 0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL, + 0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL, + 0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL, + 0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL, + 0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL, + 0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL, + 0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL, + 0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL, + 0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL, + 0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL, + 0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL, + 0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL, + 0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL, + 0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL, + 0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL, + 0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL, + 0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL, + 0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL, + 0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL, + 0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL, + 0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL, + 0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL, + 0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL, + 0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL, + 0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL, + 0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL, + 0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL, + 0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL, + 0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL, + 0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL, + 0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL, + 0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL, + 0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL, + 0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL, + 0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL, + 0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL, + 0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL, + 0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL, + 0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL, + 0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL, + 0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL, + 0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL, + 0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL, + 0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL, + 0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL, + 0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL, + 0x8def022dUL + }, + { + 0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL, + 0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL, + 0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL, + 0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL, + 0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL, + 0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL, + 0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL, + 0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL, + 0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL, + 0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL, + 0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL, + 0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL, + 0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL, + 0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL, + 0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL, + 0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL, + 0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL, + 0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL, + 0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL, + 0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL, + 0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL, + 0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL, + 0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL, + 0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL, + 0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL, + 0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL, + 0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL, + 0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL, + 0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL, + 0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL, + 0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL, + 0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL, + 0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL, + 0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL, + 0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL, + 0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL, + 0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL, + 0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL, + 0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL, + 0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL, + 0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL, + 0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL, + 0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL, + 0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL, + 0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL, + 0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL, + 0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL, + 0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL, + 0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL, + 0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL, + 0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL, + 0x72fd2493UL + }, + { + 0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL, + 0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL, + 0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL, + 0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL, + 0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL, + 0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL, + 0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL, + 0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL, + 0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL, + 0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL, + 0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL, + 0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL, + 0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL, + 0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL, + 0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL, + 0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL, + 0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL, + 0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL, + 0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL, + 0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL, + 0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL, + 0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL, + 0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL, + 0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL, + 0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL, + 0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL, + 0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL, + 0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL, + 0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL, + 0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL, + 0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL, + 0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL, + 0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL, + 0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL, + 0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL, + 0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL, + 0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL, + 0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL, + 0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL, + 0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL, + 0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL, + 0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL, + 0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL, + 0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL, + 0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL, + 0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL, + 0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL, + 0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL, + 0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL, + 0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL, + 0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL, + 0xed3498beUL + }, + { + 0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL, + 0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL, + 0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL, + 0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL, + 0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL, + 0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL, + 0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL, + 0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL, + 0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL, + 0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL, + 0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL, + 0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL, + 0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL, + 0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL, + 0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL, + 0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL, + 0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL, + 0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL, + 0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL, + 0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL, + 0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL, + 0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL, + 0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL, + 0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL, + 0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL, + 0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL, + 0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL, + 0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL, + 0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL, + 0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL, + 0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL, + 0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL, + 0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL, + 0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL, + 0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL, + 0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL, + 0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL, + 0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL, + 0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL, + 0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL, + 0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL, + 0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL, + 0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL, + 0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL, + 0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL, + 0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL, + 0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL, + 0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL, + 0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL, + 0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL, + 0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL, + 0xf10605deUL +#endif + } +}; diff --git a/deps/SZ/zlib/deflate.c b/deps/SZ/zlib/deflate.c new file mode 100644 index 0000000000000000000000000000000000000000..1ec761448de926724c359256bbff0e8d9e851415 --- /dev/null +++ b/deps/SZ/zlib/deflate.c @@ -0,0 +1,2163 @@ +/* deflate.c -- compress data using the deflation algorithm + * Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * ALGORITHM + * + * The "deflation" process depends on being able to identify portions + * of the input text which are identical to earlier input (within a + * sliding window trailing behind the input currently being processed). + * + * The most straightforward technique turns out to be the fastest for + * most input files: try all possible matches and select the longest. + * The key feature of this algorithm is that insertions into the string + * dictionary are very simple and thus fast, and deletions are avoided + * completely. Insertions are performed at each input character, whereas + * string matches are performed only when the previous match ends. So it + * is preferable to spend more time in matches to allow very fast string + * insertions and avoid deletions. The matching algorithm for small + * strings is inspired from that of Rabin & Karp. A brute force approach + * is used to find longer strings when a small match has been found. + * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze + * (by Leonid Broukhis). + * A previous version of this file used a more sophisticated algorithm + * (by Fiala and Greene) which is guaranteed to run in linear amortized + * time, but has a larger average cost, uses more memory and is patented. + * However the F&G algorithm may be faster for some highly redundant + * files if the parameter max_chain_length (described below) is too large. + * + * ACKNOWLEDGEMENTS + * + * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and + * I found it in 'freeze' written by Leonid Broukhis. + * Thanks to many people for bug reports and testing. + * + * REFERENCES + * + * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification". + * Available in http://tools.ietf.org/html/rfc1951 + * + * A description of the Rabin and Karp algorithm is given in the book + * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. + * + * Fiala,E.R., and Greene,D.H. + * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 + * + */ + +/* @(#) $Id$ */ + +#include "deflate.h" + +const char deflate_copyright[] = + " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ + +/* =========================================================================== + * Function prototypes. + */ +typedef enum { + need_more, /* block not completed, need more input or more output */ + block_done, /* block flush performed */ + finish_started, /* finish started, need only more output at next deflate */ + finish_done /* finish done, accept no more input or output */ +} block_state; + +typedef block_state (*compress_func) OF((deflate_state *s, int flush)); +/* Compression function. Returns the block state after the call. */ + +local int deflateStateCheck OF((z_streamp strm)); +local void slide_hash OF((deflate_state *s)); +local void fill_window OF((deflate_state *s)); +local block_state deflate_stored OF((deflate_state *s, int flush)); +local block_state deflate_fast OF((deflate_state *s, int flush)); +#ifndef FASTEST +local block_state deflate_slow OF((deflate_state *s, int flush)); +#endif +local block_state deflate_rle OF((deflate_state *s, int flush)); +local block_state deflate_huff OF((deflate_state *s, int flush)); +local void lm_init OF((deflate_state *s)); +local void putShortMSB OF((deflate_state *s, uInt b)); +local void flush_pending OF((z_streamp strm)); +local unsigned read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); +#ifdef ASMV +# pragma message("Assembler code may have bugs -- use at your own risk") + void match_init OF((void)); /* asm code initialization */ + uInt longest_match OF((deflate_state *s, IPos cur_match)); +#else +local uInt longest_match OF((deflate_state *s, IPos cur_match)); +#endif + +#ifdef ZLIB_DEBUG +local void check_match OF((deflate_state *s, IPos start, IPos match, + int length)); +#endif + +/* =========================================================================== + * Local data + */ + +#define NIL 0 +/* Tail of hash chains */ + +#ifndef TOO_FAR +# define TOO_FAR 4096 +#endif +/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ + +/* Values for max_lazy_match, good_match and max_chain_length, depending on + * the desired pack level (0..9). The values given below have been tuned to + * exclude worst case performance for pathological files. Better values may be + * found for specific files. + */ +typedef struct config_s { + ush good_length; /* reduce lazy search above this match length */ + ush max_lazy; /* do not perform lazy search above this match length */ + ush nice_length; /* quit search above this match length */ + ush max_chain; + compress_func func; +} config; + +#ifdef FASTEST +local const config configuration_table[2] = { +/* good lazy nice chain */ +/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ +/* 1 */ {4, 4, 8, 4, deflate_fast}}; /* max speed, no lazy matches */ +#else +local const config configuration_table[10] = { +/* good lazy nice chain */ +/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ +/* 1 */ {4, 4, 8, 4, deflate_fast}, /* max speed, no lazy matches */ +/* 2 */ {4, 5, 16, 8, deflate_fast}, +/* 3 */ {4, 6, 32, 32, deflate_fast}, + +/* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */ +/* 5 */ {8, 16, 32, 32, deflate_slow}, +/* 6 */ {8, 16, 128, 128, deflate_slow}, +/* 7 */ {8, 32, 128, 256, deflate_slow}, +/* 8 */ {32, 128, 258, 1024, deflate_slow}, +/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* max compression */ +#endif + +/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 + * For deflate_fast() (levels <= 3) good is ignored and lazy has a different + * meaning. + */ + +/* rank Z_BLOCK between Z_NO_FLUSH and Z_PARTIAL_FLUSH */ +#define RANK(f) (((f) * 2) - ((f) > 4 ? 9 : 0)) + +/* =========================================================================== + * Update a hash value with the given input byte + * IN assertion: all calls to UPDATE_HASH are made with consecutive input + * characters, so that a running hash key can be computed from the previous + * key instead of complete recalculation each time. + */ +#define UPDATE_HASH(s,h,c) (h = (((h)<hash_shift) ^ (c)) & s->hash_mask) + + +/* =========================================================================== + * Insert string str in the dictionary and set match_head to the previous head + * of the hash chain (the most recent string with same hash key). Return + * the previous length of the hash chain. + * If this file is compiled with -DFASTEST, the compression level is forced + * to 1, and no hash chains are maintained. + * IN assertion: all calls to INSERT_STRING are made with consecutive input + * characters and the first MIN_MATCH bytes of str are valid (except for + * the last MIN_MATCH-1 bytes of the input file). + */ +#ifdef FASTEST +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + match_head = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) +#else +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) +#endif + +/* =========================================================================== + * Initialize the hash table (avoiding 64K overflow for 16 bit systems). + * prev[] will be initialized on the fly. + */ +#define CLEAR_HASH(s) \ + s->head[s->hash_size-1] = NIL; \ + zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head)); + +/* =========================================================================== + * Slide the hash table when sliding the window down (could be avoided with 32 + * bit values at the expense of memory usage). We slide even when level == 0 to + * keep the hash table consistent if we switch back to level > 0 later. + */ +local void slide_hash(s) + deflate_state *s; +{ + unsigned n, m; + Posf *p; + uInt wsize = s->w_size; + + n = s->hash_size; + p = &s->head[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m - wsize : NIL); + } while (--n); + n = wsize; +#ifndef FASTEST + p = &s->prev[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m - wsize : NIL); + /* If n is not on any hash chain, prev[n] is garbage but + * its value will never be used. + */ + } while (--n); +#endif +} + +/* ========================================================================= */ +int ZEXPORT deflateInit_(strm, level, version, stream_size) + z_streamp strm; + int level; + const char *version; + int stream_size; +{ + return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, + Z_DEFAULT_STRATEGY, version, stream_size); + /* To do: ignore strm->next_in if we use it as window */ +} + +/* ========================================================================= */ +int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, + version, stream_size) + z_streamp strm; + int level; + int method; + int windowBits; + int memLevel; + int strategy; + const char *version; + int stream_size; +{ + deflate_state *s; + int wrap = 1; + static const char my_version[] = ZLIB_VERSION; + + ushf *overlay; + /* We overlay pending_buf and d_buf+l_buf. This works since the average + * output size for (length,distance) codes is <= 24 bits. + */ + + if (version == Z_NULL || version[0] != my_version[0] || + stream_size != sizeof(z_stream)) { + return Z_VERSION_ERROR; + } + if (strm == Z_NULL) return Z_STREAM_ERROR; + + strm->msg = Z_NULL; + if (strm->zalloc == (alloc_func)0) { +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; +#endif + } + if (strm->zfree == (free_func)0) +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zfree = zcfree; +#endif + +#ifdef FASTEST + if (level != 0) level = 1; +#else + if (level == Z_DEFAULT_COMPRESSION) level = 6; +#endif + + if (windowBits < 0) { /* suppress zlib wrapper */ + wrap = 0; + windowBits = -windowBits; + } +#ifdef GZIP + else if (windowBits > 15) { + wrap = 2; /* write gzip wrapper instead */ + windowBits -= 16; + } +#endif + if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED || + windowBits < 8 || windowBits > 15 || level < 0 || level > 9 || + strategy < 0 || strategy > Z_FIXED || (windowBits == 8 && wrap != 1)) { + return Z_STREAM_ERROR; + } + if (windowBits == 8) windowBits = 9; /* until 256-byte window bug fixed */ + s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state)); + if (s == Z_NULL) return Z_MEM_ERROR; + strm->state = (struct internal_state FAR *)s; + s->strm = strm; + s->status = INIT_STATE; /* to pass state test in deflateReset() */ + + s->wrap = wrap; + s->gzhead = Z_NULL; + s->w_bits = (uInt)windowBits; + s->w_size = 1 << s->w_bits; + s->w_mask = s->w_size - 1; + + s->hash_bits = (uInt)memLevel + 7; + s->hash_size = 1 << s->hash_bits; + s->hash_mask = s->hash_size - 1; + s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); + + s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); + s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); + s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); + + s->high_water = 0; /* nothing written to s->window yet */ + + s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ + + overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); + s->pending_buf = (uchf *) overlay; + s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); + + if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || + s->pending_buf == Z_NULL) { + s->status = FINISH_STATE; + strm->msg = ERR_MSG(Z_MEM_ERROR); + deflateEnd (strm); + return Z_MEM_ERROR; + } + s->d_buf = overlay + s->lit_bufsize/sizeof(ush); + s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; + + s->level = level; + s->strategy = strategy; + s->method = (Byte)method; + + return deflateReset(strm); +} + +/* ========================================================================= + * Check for a valid deflate stream state. Return 0 if ok, 1 if not. + */ +local int deflateStateCheck (strm) + z_streamp strm; +{ + deflate_state *s; + if (strm == Z_NULL || + strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) + return 1; + s = strm->state; + if (s == Z_NULL || s->strm != strm || (s->status != INIT_STATE && +#ifdef GZIP + s->status != GZIP_STATE && +#endif + s->status != EXTRA_STATE && + s->status != NAME_STATE && + s->status != COMMENT_STATE && + s->status != HCRC_STATE && + s->status != BUSY_STATE && + s->status != FINISH_STATE)) + return 1; + return 0; +} + +/* ========================================================================= */ +int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) + z_streamp strm; + const Bytef *dictionary; + uInt dictLength; +{ + deflate_state *s; + uInt str, n; + int wrap; + unsigned avail; + z_const unsigned char *next; + + if (deflateStateCheck(strm) || dictionary == Z_NULL) + return Z_STREAM_ERROR; + s = strm->state; + wrap = s->wrap; + if (wrap == 2 || (wrap == 1 && s->status != INIT_STATE) || s->lookahead) + return Z_STREAM_ERROR; + + /* when using zlib wrappers, compute Adler-32 for provided dictionary */ + if (wrap == 1) + strm->adler = adler32(strm->adler, dictionary, dictLength); + s->wrap = 0; /* avoid computing Adler-32 in read_buf */ + + /* if dictionary would fill window, just replace the history */ + if (dictLength >= s->w_size) { + if (wrap == 0) { /* already empty otherwise */ + CLEAR_HASH(s); + s->strstart = 0; + s->block_start = 0L; + s->insert = 0; + } + dictionary += dictLength - s->w_size; /* use the tail */ + dictLength = s->w_size; + } + + /* insert dictionary into window and hash */ + avail = strm->avail_in; + next = strm->next_in; + strm->avail_in = dictLength; + strm->next_in = (z_const Bytef *)dictionary; + fill_window(s); + while (s->lookahead >= MIN_MATCH) { + str = s->strstart; + n = s->lookahead - (MIN_MATCH-1); + do { + UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); +#ifndef FASTEST + s->prev[str & s->w_mask] = s->head[s->ins_h]; +#endif + s->head[s->ins_h] = (Pos)str; + str++; + } while (--n); + s->strstart = str; + s->lookahead = MIN_MATCH-1; + fill_window(s); + } + s->strstart += s->lookahead; + s->block_start = (long)s->strstart; + s->insert = s->lookahead; + s->lookahead = 0; + s->match_length = s->prev_length = MIN_MATCH-1; + s->match_available = 0; + strm->next_in = next; + strm->avail_in = avail; + s->wrap = wrap; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateGetDictionary (strm, dictionary, dictLength) + z_streamp strm; + Bytef *dictionary; + uInt *dictLength; +{ + deflate_state *s; + uInt len; + + if (deflateStateCheck(strm)) + return Z_STREAM_ERROR; + s = strm->state; + len = s->strstart + s->lookahead; + if (len > s->w_size) + len = s->w_size; + if (dictionary != Z_NULL && len) + zmemcpy(dictionary, s->window + s->strstart + s->lookahead - len, len); + if (dictLength != Z_NULL) + *dictLength = len; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateResetKeep (strm) + z_streamp strm; +{ + deflate_state *s; + + if (deflateStateCheck(strm)) { + return Z_STREAM_ERROR; + } + + strm->total_in = strm->total_out = 0; + strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */ + strm->data_type = Z_UNKNOWN; + + s = (deflate_state *)strm->state; + s->pending = 0; + s->pending_out = s->pending_buf; + + if (s->wrap < 0) { + s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */ + } + s->status = +#ifdef GZIP + s->wrap == 2 ? GZIP_STATE : +#endif + s->wrap ? INIT_STATE : BUSY_STATE; + strm->adler = +#ifdef GZIP + s->wrap == 2 ? crc32(0L, Z_NULL, 0) : +#endif + adler32(0L, Z_NULL, 0); + s->last_flush = Z_NO_FLUSH; + + _tr_init(s); + + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateReset (strm) + z_streamp strm; +{ + int ret; + + ret = deflateResetKeep(strm); + if (ret == Z_OK) + lm_init(strm->state); + return ret; +} + +/* ========================================================================= */ +int ZEXPORT deflateSetHeader (strm, head) + z_streamp strm; + gz_headerp head; +{ + if (deflateStateCheck(strm) || strm->state->wrap != 2) + return Z_STREAM_ERROR; + strm->state->gzhead = head; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflatePending (strm, pending, bits) + unsigned *pending; + int *bits; + z_streamp strm; +{ + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + if (pending != Z_NULL) + *pending = strm->state->pending; + if (bits != Z_NULL) + *bits = strm->state->bi_valid; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflatePrime (strm, bits, value) + z_streamp strm; + int bits; + int value; +{ + deflate_state *s; + int put; + + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + s = strm->state; + if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3)) + return Z_BUF_ERROR; + do { + put = Buf_size - s->bi_valid; + if (put > bits) + put = bits; + s->bi_buf |= (ush)((value & ((1 << put) - 1)) << s->bi_valid); + s->bi_valid += put; + _tr_flush_bits(s); + value >>= put; + bits -= put; + } while (bits); + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateParams(strm, level, strategy) + z_streamp strm; + int level; + int strategy; +{ + deflate_state *s; + compress_func func; + + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + s = strm->state; + +#ifdef FASTEST + if (level != 0) level = 1; +#else + if (level == Z_DEFAULT_COMPRESSION) level = 6; +#endif + if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) { + return Z_STREAM_ERROR; + } + func = configuration_table[s->level].func; + + if ((strategy != s->strategy || func != configuration_table[level].func) && + s->high_water) { + /* Flush the last buffer: */ + int err = deflate(strm, Z_BLOCK); + if (err == Z_STREAM_ERROR) + return err; + if (strm->avail_out == 0) + return Z_BUF_ERROR; + } + if (s->level != level) { + if (s->level == 0 && s->matches != 0) { + if (s->matches == 1) + slide_hash(s); + else + CLEAR_HASH(s); + s->matches = 0; + } + s->level = level; + s->max_lazy_match = configuration_table[level].max_lazy; + s->good_match = configuration_table[level].good_length; + s->nice_match = configuration_table[level].nice_length; + s->max_chain_length = configuration_table[level].max_chain; + } + s->strategy = strategy; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain) + z_streamp strm; + int good_length; + int max_lazy; + int nice_length; + int max_chain; +{ + deflate_state *s; + + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + s = strm->state; + s->good_match = (uInt)good_length; + s->max_lazy_match = (uInt)max_lazy; + s->nice_match = nice_length; + s->max_chain_length = (uInt)max_chain; + return Z_OK; +} + +/* ========================================================================= + * For the default windowBits of 15 and memLevel of 8, this function returns + * a close to exact, as well as small, upper bound on the compressed size. + * They are coded as constants here for a reason--if the #define's are + * changed, then this function needs to be changed as well. The return + * value for 15 and 8 only works for those exact settings. + * + * For any setting other than those defaults for windowBits and memLevel, + * the value returned is a conservative worst case for the maximum expansion + * resulting from using fixed blocks instead of stored blocks, which deflate + * can emit on compressed data for some combinations of the parameters. + * + * This function could be more sophisticated to provide closer upper bounds for + * every combination of windowBits and memLevel. But even the conservative + * upper bound of about 14% expansion does not seem onerous for output buffer + * allocation. + */ +uLong ZEXPORT deflateBound(strm, sourceLen) + z_streamp strm; + uLong sourceLen; +{ + deflate_state *s; + uLong complen, wraplen; + + /* conservative upper bound for compressed data */ + complen = sourceLen + + ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 5; + + /* if can't get parameters, return conservative bound plus zlib wrapper */ + if (deflateStateCheck(strm)) + return complen + 6; + + /* compute wrapper length */ + s = strm->state; + switch (s->wrap) { + case 0: /* raw deflate */ + wraplen = 0; + break; + case 1: /* zlib wrapper */ + wraplen = 6 + (s->strstart ? 4 : 0); + break; +#ifdef GZIP + case 2: /* gzip wrapper */ + wraplen = 18; + if (s->gzhead != Z_NULL) { /* user-supplied gzip header */ + Bytef *str; + if (s->gzhead->extra != Z_NULL) + wraplen += 2 + s->gzhead->extra_len; + str = s->gzhead->name; + if (str != Z_NULL) + do { + wraplen++; + } while (*str++); + str = s->gzhead->comment; + if (str != Z_NULL) + do { + wraplen++; + } while (*str++); + if (s->gzhead->hcrc) + wraplen += 2; + } + break; +#endif + default: /* for compiler happiness */ + wraplen = 6; + } + + /* if not default parameters, return conservative bound */ + if (s->w_bits != 15 || s->hash_bits != 8 + 7) + return complen + wraplen; + + /* default settings: return tight bound for that case */ + return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + + (sourceLen >> 25) + 13 - 6 + wraplen; +} + +/* ========================================================================= + * Put a short in the pending buffer. The 16-bit value is put in MSB order. + * IN assertion: the stream state is correct and there is enough room in + * pending_buf. + */ +local void putShortMSB (s, b) + deflate_state *s; + uInt b; +{ + put_byte(s, (Byte)(b >> 8)); + put_byte(s, (Byte)(b & 0xff)); +} + +/* ========================================================================= + * Flush as much pending output as possible. All deflate() output, except for + * some deflate_stored() output, goes through this function so some + * applications may wish to modify it to avoid allocating a large + * strm->next_out buffer and copying into it. (See also read_buf()). + */ +local void flush_pending(strm) + z_streamp strm; +{ + unsigned len; + deflate_state *s = strm->state; + + _tr_flush_bits(s); + len = s->pending; + if (len > strm->avail_out) len = strm->avail_out; + if (len == 0) return; + + zmemcpy(strm->next_out, s->pending_out, len); + strm->next_out += len; + s->pending_out += len; + strm->total_out += len; + strm->avail_out -= len; + s->pending -= len; + if (s->pending == 0) { + s->pending_out = s->pending_buf; + } +} + +/* =========================================================================== + * Update the header CRC with the bytes s->pending_buf[beg..s->pending - 1]. + */ +#define HCRC_UPDATE(beg) \ + do { \ + if (s->gzhead->hcrc && s->pending > (beg)) \ + strm->adler = crc32(strm->adler, s->pending_buf + (beg), \ + s->pending - (beg)); \ + } while (0) + +/* ========================================================================= */ +int ZEXPORT deflate (strm, flush) + z_streamp strm; + int flush; +{ + int old_flush; /* value of flush param for previous deflate call */ + deflate_state *s; + + if (deflateStateCheck(strm) || flush > Z_BLOCK || flush < 0) { + return Z_STREAM_ERROR; + } + s = strm->state; + + if (strm->next_out == Z_NULL || + (strm->avail_in != 0 && strm->next_in == Z_NULL) || + (s->status == FINISH_STATE && flush != Z_FINISH)) { + ERR_RETURN(strm, Z_STREAM_ERROR); + } + if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR); + + old_flush = s->last_flush; + s->last_flush = flush; + + /* Flush as much pending output as possible */ + if (s->pending != 0) { + flush_pending(strm); + if (strm->avail_out == 0) { + /* Since avail_out is 0, deflate will be called again with + * more output space, but possibly with both pending and + * avail_in equal to zero. There won't be anything to do, + * but this is not an error situation so make sure we + * return OK instead of BUF_ERROR at next call of deflate: + */ + s->last_flush = -1; + return Z_OK; + } + + /* Make sure there is something to do and avoid duplicate consecutive + * flushes. For repeated and useless calls with Z_FINISH, we keep + * returning Z_STREAM_END instead of Z_BUF_ERROR. + */ + } else if (strm->avail_in == 0 && RANK(flush) <= RANK(old_flush) && + flush != Z_FINISH) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + + /* User must not provide more input after the first FINISH: */ + if (s->status == FINISH_STATE && strm->avail_in != 0) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + + /* Write the header */ + if (s->status == INIT_STATE) { + /* zlib header */ + uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8; + uInt level_flags; + + if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2) + level_flags = 0; + else if (s->level < 6) + level_flags = 1; + else if (s->level == 6) + level_flags = 2; + else + level_flags = 3; + header |= (level_flags << 6); + if (s->strstart != 0) header |= PRESET_DICT; + header += 31 - (header % 31); + + putShortMSB(s, header); + + /* Save the adler32 of the preset dictionary: */ + if (s->strstart != 0) { + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } + strm->adler = adler32(0L, Z_NULL, 0); + s->status = BUSY_STATE; + + /* Compression must start with an empty pending buffer */ + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + } +#ifdef GZIP + if (s->status == GZIP_STATE) { + /* gzip header */ + strm->adler = crc32(0L, Z_NULL, 0); + put_byte(s, 31); + put_byte(s, 139); + put_byte(s, 8); + if (s->gzhead == Z_NULL) { + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, s->level == 9 ? 2 : + (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? + 4 : 0)); + put_byte(s, OS_CODE); + s->status = BUSY_STATE; + + /* Compression must start with an empty pending buffer */ + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + } + else { + put_byte(s, (s->gzhead->text ? 1 : 0) + + (s->gzhead->hcrc ? 2 : 0) + + (s->gzhead->extra == Z_NULL ? 0 : 4) + + (s->gzhead->name == Z_NULL ? 0 : 8) + + (s->gzhead->comment == Z_NULL ? 0 : 16) + ); + put_byte(s, (Byte)(s->gzhead->time & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 8) & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 16) & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 24) & 0xff)); + put_byte(s, s->level == 9 ? 2 : + (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? + 4 : 0)); + put_byte(s, s->gzhead->os & 0xff); + if (s->gzhead->extra != Z_NULL) { + put_byte(s, s->gzhead->extra_len & 0xff); + put_byte(s, (s->gzhead->extra_len >> 8) & 0xff); + } + if (s->gzhead->hcrc) + strm->adler = crc32(strm->adler, s->pending_buf, + s->pending); + s->gzindex = 0; + s->status = EXTRA_STATE; + } + } + if (s->status == EXTRA_STATE) { + if (s->gzhead->extra != Z_NULL) { + ulg beg = s->pending; /* start of bytes to update crc */ + uInt left = (s->gzhead->extra_len & 0xffff) - s->gzindex; + while (s->pending + left > s->pending_buf_size) { + uInt copy = s->pending_buf_size - s->pending; + zmemcpy(s->pending_buf + s->pending, + s->gzhead->extra + s->gzindex, copy); + s->pending = s->pending_buf_size; + HCRC_UPDATE(beg); + s->gzindex += copy; + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + beg = 0; + left -= copy; + } + zmemcpy(s->pending_buf + s->pending, + s->gzhead->extra + s->gzindex, left); + s->pending += left; + HCRC_UPDATE(beg); + s->gzindex = 0; + } + s->status = NAME_STATE; + } + if (s->status == NAME_STATE) { + if (s->gzhead->name != Z_NULL) { + ulg beg = s->pending; /* start of bytes to update crc */ + int val; + do { + if (s->pending == s->pending_buf_size) { + HCRC_UPDATE(beg); + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + beg = 0; + } + val = s->gzhead->name[s->gzindex++]; + put_byte(s, val); + } while (val != 0); + HCRC_UPDATE(beg); + s->gzindex = 0; + } + s->status = COMMENT_STATE; + } + if (s->status == COMMENT_STATE) { + if (s->gzhead->comment != Z_NULL) { + ulg beg = s->pending; /* start of bytes to update crc */ + int val; + do { + if (s->pending == s->pending_buf_size) { + HCRC_UPDATE(beg); + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + beg = 0; + } + val = s->gzhead->comment[s->gzindex++]; + put_byte(s, val); + } while (val != 0); + HCRC_UPDATE(beg); + } + s->status = HCRC_STATE; + } + if (s->status == HCRC_STATE) { + if (s->gzhead->hcrc) { + if (s->pending + 2 > s->pending_buf_size) { + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + } + put_byte(s, (Byte)(strm->adler & 0xff)); + put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); + strm->adler = crc32(0L, Z_NULL, 0); + } + s->status = BUSY_STATE; + + /* Compression must start with an empty pending buffer */ + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + } +#endif + + /* Start a new block or continue the current one. + */ + if (strm->avail_in != 0 || s->lookahead != 0 || + (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { + block_state bstate; + + bstate = s->level == 0 ? deflate_stored(s, flush) : + s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) : + s->strategy == Z_RLE ? deflate_rle(s, flush) : + (*(configuration_table[s->level].func))(s, flush); + + if (bstate == finish_started || bstate == finish_done) { + s->status = FINISH_STATE; + } + if (bstate == need_more || bstate == finish_started) { + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR next call, see above */ + } + return Z_OK; + /* If flush != Z_NO_FLUSH && avail_out == 0, the next call + * of deflate should use the same flush parameter to make sure + * that the flush is complete. So we don't have to output an + * empty block here, this will be done at next call. This also + * ensures that for a very small output buffer, we emit at most + * one empty block. + */ + } + if (bstate == block_done) { + if (flush == Z_PARTIAL_FLUSH) { + _tr_align(s); + } else if (flush != Z_BLOCK) { /* FULL_FLUSH or SYNC_FLUSH */ + _tr_stored_block(s, (char*)0, 0L, 0); + /* For a full flush, this empty block will be recognized + * as a special marker by inflate_sync(). + */ + if (flush == Z_FULL_FLUSH) { + CLEAR_HASH(s); /* forget history */ + if (s->lookahead == 0) { + s->strstart = 0; + s->block_start = 0L; + s->insert = 0; + } + } + } + flush_pending(strm); + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */ + return Z_OK; + } + } + } + + if (flush != Z_FINISH) return Z_OK; + if (s->wrap <= 0) return Z_STREAM_END; + + /* Write the trailer */ +#ifdef GZIP + if (s->wrap == 2) { + put_byte(s, (Byte)(strm->adler & 0xff)); + put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); + put_byte(s, (Byte)((strm->adler >> 16) & 0xff)); + put_byte(s, (Byte)((strm->adler >> 24) & 0xff)); + put_byte(s, (Byte)(strm->total_in & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 8) & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 16) & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 24) & 0xff)); + } + else +#endif + { + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } + flush_pending(strm); + /* If avail_out is zero, the application will call deflate again + * to flush the rest. + */ + if (s->wrap > 0) s->wrap = -s->wrap; /* write the trailer only once! */ + return s->pending != 0 ? Z_OK : Z_STREAM_END; +} + +/* ========================================================================= */ +int ZEXPORT deflateEnd (strm) + z_streamp strm; +{ + int status; + + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + + status = strm->state->status; + + /* Deallocate in reverse order of allocations: */ + TRY_FREE(strm, strm->state->pending_buf); + TRY_FREE(strm, strm->state->head); + TRY_FREE(strm, strm->state->prev); + TRY_FREE(strm, strm->state->window); + + ZFREE(strm, strm->state); + strm->state = Z_NULL; + + return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; +} + +/* ========================================================================= + * Copy the source state to the destination state. + * To simplify the source, this is not supported for 16-bit MSDOS (which + * doesn't have enough memory anyway to duplicate compression states). + */ +int ZEXPORT deflateCopy (dest, source) + z_streamp dest; + z_streamp source; +{ +#ifdef MAXSEG_64K + return Z_STREAM_ERROR; +#else + deflate_state *ds; + deflate_state *ss; + ushf *overlay; + + + if (deflateStateCheck(source) || dest == Z_NULL) { + return Z_STREAM_ERROR; + } + + ss = source->state; + + zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); + + ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state)); + if (ds == Z_NULL) return Z_MEM_ERROR; + dest->state = (struct internal_state FAR *) ds; + zmemcpy((voidpf)ds, (voidpf)ss, sizeof(deflate_state)); + ds->strm = dest; + + ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); + ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); + ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); + overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); + ds->pending_buf = (uchf *) overlay; + + if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || + ds->pending_buf == Z_NULL) { + deflateEnd (dest); + return Z_MEM_ERROR; + } + /* following zmemcpy do not work for 16-bit MSDOS */ + zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte)); + zmemcpy((voidpf)ds->prev, (voidpf)ss->prev, ds->w_size * sizeof(Pos)); + zmemcpy((voidpf)ds->head, (voidpf)ss->head, ds->hash_size * sizeof(Pos)); + zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); + + ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); + ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); + ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; + + ds->l_desc.dyn_tree = ds->dyn_ltree; + ds->d_desc.dyn_tree = ds->dyn_dtree; + ds->bl_desc.dyn_tree = ds->bl_tree; + + return Z_OK; +#endif /* MAXSEG_64K */ +} + +/* =========================================================================== + * Read a new buffer from the current input stream, update the adler32 + * and total number of bytes read. All deflate() input goes through + * this function so some applications may wish to modify it to avoid + * allocating a large strm->next_in buffer and copying from it. + * (See also flush_pending()). + */ +local unsigned read_buf(strm, buf, size) + z_streamp strm; + Bytef *buf; + unsigned size; +{ + unsigned len = strm->avail_in; + + if (len > size) len = size; + if (len == 0) return 0; + + strm->avail_in -= len; + + zmemcpy(buf, strm->next_in, len); + if (strm->state->wrap == 1) { + strm->adler = adler32(strm->adler, buf, len); + } +#ifdef GZIP + else if (strm->state->wrap == 2) { + strm->adler = crc32(strm->adler, buf, len); + } +#endif + strm->next_in += len; + strm->total_in += len; + + return len; +} + +/* =========================================================================== + * Initialize the "longest match" routines for a new zlib stream + */ +local void lm_init (s) + deflate_state *s; +{ + s->window_size = (ulg)2L*s->w_size; + + CLEAR_HASH(s); + + /* Set the default configuration parameters: + */ + s->max_lazy_match = configuration_table[s->level].max_lazy; + s->good_match = configuration_table[s->level].good_length; + s->nice_match = configuration_table[s->level].nice_length; + s->max_chain_length = configuration_table[s->level].max_chain; + + s->strstart = 0; + s->block_start = 0L; + s->lookahead = 0; + s->insert = 0; + s->match_length = s->prev_length = MIN_MATCH-1; + s->match_available = 0; + s->ins_h = 0; +#ifndef FASTEST +#ifdef ASMV + match_init(); /* initialize the asm code */ +#endif +#endif +} + +#ifndef FASTEST +/* =========================================================================== + * Set match_start to the longest match starting at the given string and + * return its length. Matches shorter or equal to prev_length are discarded, + * in which case the result is equal to prev_length and match_start is + * garbage. + * IN assertions: cur_match is the head of the hash chain for the current + * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 + * OUT assertion: the match length is not greater than s->lookahead. + */ +#ifndef ASMV +/* For 80x86 and 680x0, an optimized version will be provided in match.asm or + * match.S. The code will be functionally equivalent. + */ +local uInt longest_match(s, cur_match) + deflate_state *s; + IPos cur_match; /* current match */ +{ + unsigned chain_length = s->max_chain_length;/* max hash chain length */ + register Bytef *scan = s->window + s->strstart; /* current string */ + register Bytef *match; /* matched string */ + register int len; /* length of current match */ + int best_len = (int)s->prev_length; /* best match length so far */ + int nice_match = s->nice_match; /* stop if match long enough */ + IPos limit = s->strstart > (IPos)MAX_DIST(s) ? + s->strstart - (IPos)MAX_DIST(s) : NIL; + /* Stop when cur_match becomes <= limit. To simplify the code, + * we prevent matches with the string of window index 0. + */ + Posf *prev = s->prev; + uInt wmask = s->w_mask; + +#ifdef UNALIGNED_OK + /* Compare two bytes at a time. Note: this is not always beneficial. + * Try with and without -DUNALIGNED_OK to check. + */ + register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1; + register ush scan_start = *(ushf*)scan; + register ush scan_end = *(ushf*)(scan+best_len-1); +#else + register Bytef *strend = s->window + s->strstart + MAX_MATCH; + register Byte scan_end1 = scan[best_len-1]; + register Byte scan_end = scan[best_len]; +#endif + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + /* Do not waste too much time if we already have a good match: */ + if (s->prev_length >= s->good_match) { + chain_length >>= 2; + } + /* Do not look for matches beyond the end of the input. This is necessary + * to make deflate deterministic. + */ + if ((uInt)nice_match > s->lookahead) nice_match = (int)s->lookahead; + + Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + + do { + Assert(cur_match < s->strstart, "no future"); + match = s->window + cur_match; + + /* Skip to next match if the match length cannot increase + * or if the match length is less than 2. Note that the checks below + * for insufficient lookahead only occur occasionally for performance + * reasons. Therefore uninitialized memory will be accessed, and + * conditional jumps will be made that depend on those values. + * However the length of the match is limited to the lookahead, so + * the output of deflate is not affected by the uninitialized values. + */ +#if (defined(UNALIGNED_OK) && MAX_MATCH == 258) + /* This code assumes sizeof(unsigned short) == 2. Do not use + * UNALIGNED_OK if your compiler uses a different size. + */ + if (*(ushf*)(match+best_len-1) != scan_end || + *(ushf*)match != scan_start) continue; + + /* It is not necessary to compare scan[2] and match[2] since they are + * always equal when the other bytes match, given that the hash keys + * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at + * strstart+3, +5, ... up to strstart+257. We check for insufficient + * lookahead only every 4th comparison; the 128th check will be made + * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is + * necessary to put more guard bytes at the end of the window, or + * to check more often for insufficient lookahead. + */ + Assert(scan[2] == match[2], "scan[2]?"); + scan++, match++; + do { + } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + scan < strend); + /* The funny "do {}" generates better code on most compilers */ + + /* Here, scan <= window+strstart+257 */ + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + if (*scan == *match) scan++; + + len = (MAX_MATCH - 1) - (int)(strend-scan); + scan = strend - (MAX_MATCH-1); + +#else /* UNALIGNED_OK */ + + if (match[best_len] != scan_end || + match[best_len-1] != scan_end1 || + *match != *scan || + *++match != scan[1]) continue; + + /* The check at best_len-1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match++; + Assert(*scan == *match, "match[2]?"); + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + + len = MAX_MATCH - (int)(strend - scan); + scan = strend - MAX_MATCH; + +#endif /* UNALIGNED_OK */ + + if (len > best_len) { + s->match_start = cur_match; + best_len = len; + if (len >= nice_match) break; +#ifdef UNALIGNED_OK + scan_end = *(ushf*)(scan+best_len-1); +#else + scan_end1 = scan[best_len-1]; + scan_end = scan[best_len]; +#endif + } + } while ((cur_match = prev[cur_match & wmask]) > limit + && --chain_length != 0); + + if ((uInt)best_len <= s->lookahead) return (uInt)best_len; + return s->lookahead; +} +#endif /* ASMV */ + +#else /* FASTEST */ + +/* --------------------------------------------------------------------------- + * Optimized version for FASTEST only + */ +local uInt longest_match(s, cur_match) + deflate_state *s; + IPos cur_match; /* current match */ +{ + register Bytef *scan = s->window + s->strstart; /* current string */ + register Bytef *match; /* matched string */ + register int len; /* length of current match */ + register Bytef *strend = s->window + s->strstart + MAX_MATCH; + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + + Assert(cur_match < s->strstart, "no future"); + + match = s->window + cur_match; + + /* Return failure if the match length is less than 2: + */ + if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH-1; + + /* The check at best_len-1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match += 2; + Assert(*scan == *match, "match[2]?"); + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + + len = MAX_MATCH - (int)(strend - scan); + + if (len < MIN_MATCH) return MIN_MATCH - 1; + + s->match_start = cur_match; + return (uInt)len <= s->lookahead ? (uInt)len : s->lookahead; +} + +#endif /* FASTEST */ + +#ifdef ZLIB_DEBUG + +#define EQUAL 0 +/* result of memcmp for equal strings */ + +/* =========================================================================== + * Check that the match at match_start is indeed a match. + */ +local void check_match(s, start, match, length) + deflate_state *s; + IPos start, match; + int length; +{ + /* check that the match is indeed a match */ + if (zmemcmp(s->window + match, + s->window + start, length) != EQUAL) { + fprintf(stderr, " start %u, match %u, length %d\n", + start, match, length); + do { + fprintf(stderr, "%c%c", s->window[match++], s->window[start++]); + } while (--length != 0); + z_error("invalid match"); + } + if (z_verbose > 1) { + fprintf(stderr,"\\[%d,%d]", start-match, length); + do { putc(s->window[start++], stderr); } while (--length != 0); + } +} +#else +# define check_match(s, start, match, length) +#endif /* ZLIB_DEBUG */ + +/* =========================================================================== + * Fill the window when the lookahead becomes insufficient. + * Updates strstart and lookahead. + * + * IN assertion: lookahead < MIN_LOOKAHEAD + * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD + * At least one byte has been read, or avail_in == 0; reads are + * performed for at least two bytes (required for the zip translate_eol + * option -- not supported here). + */ +local void fill_window(s) + deflate_state *s; +{ + unsigned n; + unsigned more; /* Amount of free space at the end of the window. */ + uInt wsize = s->w_size; + + Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead"); + + do { + more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart); + + /* Deal with !@#$% 64K limit: */ + if (sizeof(int) <= 2) { + if (more == 0 && s->strstart == 0 && s->lookahead == 0) { + more = wsize; + + } else if (more == (unsigned)(-1)) { + /* Very unlikely, but possible on 16 bit machine if + * strstart == 0 && lookahead == 1 (input done a byte at time) + */ + more--; + } + } + + /* If the window is almost full and there is insufficient lookahead, + * move the upper half to the lower one to make room in the upper half. + */ + if (s->strstart >= wsize+MAX_DIST(s)) { + + zmemcpy(s->window, s->window+wsize, (unsigned)wsize - more); + s->match_start -= wsize; + s->strstart -= wsize; /* we now have strstart >= MAX_DIST */ + s->block_start -= (long) wsize; + slide_hash(s); + more += wsize; + } + if (s->strm->avail_in == 0) break; + + /* If there was no sliding: + * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && + * more == window_size - lookahead - strstart + * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) + * => more >= window_size - 2*WSIZE + 2 + * In the BIG_MEM or MMAP case (not yet supported), + * window_size == input_size + MIN_LOOKAHEAD && + * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. + * Otherwise, window_size == 2*WSIZE so more >= 2. + * If there was sliding, more >= WSIZE. So in all cases, more >= 2. + */ + Assert(more >= 2, "more < 2"); + + n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); + s->lookahead += n; + + /* Initialize the hash value now that we have some input: */ + if (s->lookahead + s->insert >= MIN_MATCH) { + uInt str = s->strstart - s->insert; + s->ins_h = s->window[str]; + UPDATE_HASH(s, s->ins_h, s->window[str + 1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + while (s->insert) { + UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); +#ifndef FASTEST + s->prev[str & s->w_mask] = s->head[s->ins_h]; +#endif + s->head[s->ins_h] = (Pos)str; + str++; + s->insert--; + if (s->lookahead + s->insert < MIN_MATCH) + break; + } + } + /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage, + * but this is not important since only literal bytes will be emitted. + */ + + } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0); + + /* If the WIN_INIT bytes after the end of the current data have never been + * written, then zero those bytes in order to avoid memory check reports of + * the use of uninitialized (or uninitialised as Julian writes) bytes by + * the longest match routines. Update the high water mark for the next + * time through here. WIN_INIT is set to MAX_MATCH since the longest match + * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead. + */ + if (s->high_water < s->window_size) { + ulg curr = s->strstart + (ulg)(s->lookahead); + ulg init; + + if (s->high_water < curr) { + /* Previous high water mark below current data -- zero WIN_INIT + * bytes or up to end of window, whichever is less. + */ + init = s->window_size - curr; + if (init > WIN_INIT) + init = WIN_INIT; + zmemzero(s->window + curr, (unsigned)init); + s->high_water = curr + init; + } + else if (s->high_water < (ulg)curr + WIN_INIT) { + /* High water mark at or above current data, but below current data + * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up + * to end of window, whichever is less. + */ + init = (ulg)curr + WIN_INIT - s->high_water; + if (init > s->window_size - s->high_water) + init = s->window_size - s->high_water; + zmemzero(s->window + s->high_water, (unsigned)init); + s->high_water += init; + } + } + + Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD, + "not enough room for search"); +} + +/* =========================================================================== + * Flush the current block, with given end-of-file flag. + * IN assertion: strstart is set to the end of the current match. + */ +#define FLUSH_BLOCK_ONLY(s, last) { \ + _tr_flush_block(s, (s->block_start >= 0L ? \ + (charf *)&s->window[(unsigned)s->block_start] : \ + (charf *)Z_NULL), \ + (ulg)((long)s->strstart - s->block_start), \ + (last)); \ + s->block_start = s->strstart; \ + flush_pending(s->strm); \ + Tracev((stderr,"[FLUSH]")); \ +} + +/* Same but force premature exit if necessary. */ +#define FLUSH_BLOCK(s, last) { \ + FLUSH_BLOCK_ONLY(s, last); \ + if (s->strm->avail_out == 0) return (last) ? finish_started : need_more; \ +} + +/* Maximum stored block length in deflate format (not including header). */ +#define MAX_STORED 65535 + +/* Minimum of a and b. */ +#define MIN(a, b) ((a) > (b) ? (b) : (a)) + +/* =========================================================================== + * Copy without compression as much as possible from the input stream, return + * the current block state. + * + * In case deflateParams() is used to later switch to a non-zero compression + * level, s->matches (otherwise unused when storing) keeps track of the number + * of hash table slides to perform. If s->matches is 1, then one hash table + * slide will be done when switching. If s->matches is 2, the maximum value + * allowed here, then the hash table will be cleared, since two or more slides + * is the same as a clear. + * + * deflate_stored() is written to minimize the number of times an input byte is + * copied. It is most efficient with large input and output buffers, which + * maximizes the opportunites to have a single copy from next_in to next_out. + */ +local block_state deflate_stored(s, flush) + deflate_state *s; + int flush; +{ + /* Smallest worthy block size when not flushing or finishing. By default + * this is 32K. This can be as small as 507 bytes for memLevel == 1. For + * large input and output buffers, the stored block size will be larger. + */ + unsigned min_block = MIN(s->pending_buf_size - 5, s->w_size); + + /* Copy as many min_block or larger stored blocks directly to next_out as + * possible. If flushing, copy the remaining available input to next_out as + * stored blocks, if there is enough space. + */ + unsigned len, left, have, last = 0; + unsigned used = s->strm->avail_in; + do { + /* Set len to the maximum size block that we can copy directly with the + * available input data and output space. Set left to how much of that + * would be copied from what's left in the window. + */ + len = MAX_STORED; /* maximum deflate stored block length */ + have = (s->bi_valid + 42) >> 3; /* number of header bytes */ + if (s->strm->avail_out < have) /* need room for header */ + break; + /* maximum stored block length that will fit in avail_out: */ + have = s->strm->avail_out - have; + left = s->strstart - s->block_start; /* bytes left in window */ + if (len > (ulg)left + s->strm->avail_in) + len = left + s->strm->avail_in; /* limit len to the input */ + if (len > have) + len = have; /* limit len to the output */ + + /* If the stored block would be less than min_block in length, or if + * unable to copy all of the available input when flushing, then try + * copying to the window and the pending buffer instead. Also don't + * write an empty block when flushing -- deflate() does that. + */ + if (len < min_block && ((len == 0 && flush != Z_FINISH) || + flush == Z_NO_FLUSH || + len != left + s->strm->avail_in)) + break; + + /* Make a dummy stored block in pending to get the header bytes, + * including any pending bits. This also updates the debugging counts. + */ + last = flush == Z_FINISH && len == left + s->strm->avail_in ? 1 : 0; + _tr_stored_block(s, (char *)0, 0L, last); + + /* Replace the lengths in the dummy stored block with len. */ + s->pending_buf[s->pending - 4] = len; + s->pending_buf[s->pending - 3] = len >> 8; + s->pending_buf[s->pending - 2] = ~len; + s->pending_buf[s->pending - 1] = ~len >> 8; + + /* Write the stored block header bytes. */ + flush_pending(s->strm); + +#ifdef ZLIB_DEBUG + /* Update debugging counts for the data about to be copied. */ + s->compressed_len += len << 3; + s->bits_sent += len << 3; +#endif + + /* Copy uncompressed bytes from the window to next_out. */ + if (left) { + if (left > len) + left = len; + zmemcpy(s->strm->next_out, s->window + s->block_start, left); + s->strm->next_out += left; + s->strm->avail_out -= left; + s->strm->total_out += left; + s->block_start += left; + len -= left; + } + + /* Copy uncompressed bytes directly from next_in to next_out, updating + * the check value. + */ + if (len) { + read_buf(s->strm, s->strm->next_out, len); + s->strm->next_out += len; + s->strm->avail_out -= len; + s->strm->total_out += len; + } + } while (last == 0); + + /* Update the sliding window with the last s->w_size bytes of the copied + * data, or append all of the copied data to the existing window if less + * than s->w_size bytes were copied. Also update the number of bytes to + * insert in the hash tables, in the event that deflateParams() switches to + * a non-zero compression level. + */ + used -= s->strm->avail_in; /* number of input bytes directly copied */ + if (used) { + /* If any input was used, then no unused input remains in the window, + * therefore s->block_start == s->strstart. + */ + if (used >= s->w_size) { /* supplant the previous history */ + s->matches = 2; /* clear hash */ + zmemcpy(s->window, s->strm->next_in - s->w_size, s->w_size); + s->strstart = s->w_size; + } + else { + if (s->window_size - s->strstart <= used) { + /* Slide the window down. */ + s->strstart -= s->w_size; + zmemcpy(s->window, s->window + s->w_size, s->strstart); + if (s->matches < 2) + s->matches++; /* add a pending slide_hash() */ + } + zmemcpy(s->window + s->strstart, s->strm->next_in - used, used); + s->strstart += used; + } + s->block_start = s->strstart; + s->insert += MIN(used, s->w_size - s->insert); + } + if (s->high_water < s->strstart) + s->high_water = s->strstart; + + /* If the last block was written to next_out, then done. */ + if (last) + return finish_done; + + /* If flushing and all input has been consumed, then done. */ + if (flush != Z_NO_FLUSH && flush != Z_FINISH && + s->strm->avail_in == 0 && (long)s->strstart == s->block_start) + return block_done; + + /* Fill the window with any remaining input. */ + have = s->window_size - s->strstart - 1; + if (s->strm->avail_in > have && s->block_start >= (long)s->w_size) { + /* Slide the window down. */ + s->block_start -= s->w_size; + s->strstart -= s->w_size; + zmemcpy(s->window, s->window + s->w_size, s->strstart); + if (s->matches < 2) + s->matches++; /* add a pending slide_hash() */ + have += s->w_size; /* more space now */ + } + if (have > s->strm->avail_in) + have = s->strm->avail_in; + if (have) { + read_buf(s->strm, s->window + s->strstart, have); + s->strstart += have; + } + if (s->high_water < s->strstart) + s->high_water = s->strstart; + + /* There was not enough avail_out to write a complete worthy or flushed + * stored block to next_out. Write a stored block to pending instead, if we + * have enough input for a worthy block, or if flushing and there is enough + * room for the remaining input as a stored block in the pending buffer. + */ + have = (s->bi_valid + 42) >> 3; /* number of header bytes */ + /* maximum stored block length that will fit in pending: */ + have = MIN(s->pending_buf_size - have, MAX_STORED); + min_block = MIN(have, s->w_size); + left = s->strstart - s->block_start; + if (left >= min_block || + ((left || flush == Z_FINISH) && flush != Z_NO_FLUSH && + s->strm->avail_in == 0 && left <= have)) { + len = MIN(left, have); + last = flush == Z_FINISH && s->strm->avail_in == 0 && + len == left ? 1 : 0; + _tr_stored_block(s, (charf *)s->window + s->block_start, len, last); + s->block_start += len; + flush_pending(s->strm); + } + + /* We've done all we can with the available input and output. */ + return last ? finish_started : need_more; +} + +/* =========================================================================== + * Compress as much as possible from the input stream, return the current + * block state. + * This function does not perform lazy evaluation of matches and inserts + * new strings in the dictionary only for unmatched strings or for short + * matches. It is used only for the fast compression options. + */ +local block_state deflate_fast(s, flush) + deflate_state *s; + int flush; +{ + IPos hash_head; /* head of the hash chain */ + int bflush; /* set if current block must be flushed */ + + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + hash_head = NIL; + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + * At this point we have always match_length < MIN_MATCH + */ + if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + s->match_length = longest_match (s, hash_head); + /* longest_match() sets match_start */ + } + if (s->match_length >= MIN_MATCH) { + check_match(s, s->strstart, s->match_start, s->match_length); + + _tr_tally_dist(s, s->strstart - s->match_start, + s->match_length - MIN_MATCH, bflush); + + s->lookahead -= s->match_length; + + /* Insert new strings in the hash table only if the match length + * is not too large. This saves time but degrades compression. + */ +#ifndef FASTEST + if (s->match_length <= s->max_insert_length && + s->lookahead >= MIN_MATCH) { + s->match_length--; /* string at strstart already in table */ + do { + s->strstart++; + INSERT_STRING(s, s->strstart, hash_head); + /* strstart never exceeds WSIZE-MAX_MATCH, so there are + * always MIN_MATCH bytes ahead. + */ + } while (--s->match_length != 0); + s->strstart++; + } else +#endif + { + s->strstart += s->match_length; + s->match_length = 0; + s->ins_h = s->window[s->strstart]; + UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not + * matter since it will be recomputed at next deflate call. + */ + } + } else { + /* No match, output a literal byte */ + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit (s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + } + if (bflush) FLUSH_BLOCK(s, 0); + } + s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->last_lit) + FLUSH_BLOCK(s, 0); + return block_done; +} + +#ifndef FASTEST +/* =========================================================================== + * Same as above, but achieves better compression. We use a lazy + * evaluation for matches: a match is finally adopted only if there is + * no better match at the next window position. + */ +local block_state deflate_slow(s, flush) + deflate_state *s; + int flush; +{ + IPos hash_head; /* head of hash chain */ + int bflush; /* set if current block must be flushed */ + + /* Process the input block. */ + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + hash_head = NIL; + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + */ + s->prev_length = s->match_length, s->prev_match = s->match_start; + s->match_length = MIN_MATCH-1; + + if (hash_head != NIL && s->prev_length < s->max_lazy_match && + s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + s->match_length = longest_match (s, hash_head); + /* longest_match() sets match_start */ + + if (s->match_length <= 5 && (s->strategy == Z_FILTERED +#if TOO_FAR <= 32767 + || (s->match_length == MIN_MATCH && + s->strstart - s->match_start > TOO_FAR) +#endif + )) { + + /* If prev_match is also MIN_MATCH, match_start is garbage + * but we will ignore the current match anyway. + */ + s->match_length = MIN_MATCH-1; + } + } + /* If there was a match at the previous step and the current + * match is not better, output the previous match: + */ + if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) { + uInt max_insert = s->strstart + s->lookahead - MIN_MATCH; + /* Do not insert strings in hash table beyond this. */ + + check_match(s, s->strstart-1, s->prev_match, s->prev_length); + + _tr_tally_dist(s, s->strstart -1 - s->prev_match, + s->prev_length - MIN_MATCH, bflush); + + /* Insert in hash table all strings up to the end of the match. + * strstart-1 and strstart are already inserted. If there is not + * enough lookahead, the last two strings are not inserted in + * the hash table. + */ + s->lookahead -= s->prev_length-1; + s->prev_length -= 2; + do { + if (++s->strstart <= max_insert) { + INSERT_STRING(s, s->strstart, hash_head); + } + } while (--s->prev_length != 0); + s->match_available = 0; + s->match_length = MIN_MATCH-1; + s->strstart++; + + if (bflush) FLUSH_BLOCK(s, 0); + + } else if (s->match_available) { + /* If there was no match at the previous position, output a + * single literal. If there was a match but the current match + * is longer, truncate the previous match to a single literal. + */ + Tracevv((stderr,"%c", s->window[s->strstart-1])); + _tr_tally_lit(s, s->window[s->strstart-1], bflush); + if (bflush) { + FLUSH_BLOCK_ONLY(s, 0); + } + s->strstart++; + s->lookahead--; + if (s->strm->avail_out == 0) return need_more; + } else { + /* There is no previous match to compare with, wait for + * the next step to decide. + */ + s->match_available = 1; + s->strstart++; + s->lookahead--; + } + } + Assert (flush != Z_NO_FLUSH, "no flush?"); + if (s->match_available) { + Tracevv((stderr,"%c", s->window[s->strstart-1])); + _tr_tally_lit(s, s->window[s->strstart-1], bflush); + s->match_available = 0; + } + s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->last_lit) + FLUSH_BLOCK(s, 0); + return block_done; +} +#endif /* FASTEST */ + +/* =========================================================================== + * For Z_RLE, simply look for runs of bytes, generate matches only of distance + * one. Do not maintain a hash table. (It will be regenerated if this run of + * deflate switches away from Z_RLE.) + */ +local block_state deflate_rle(s, flush) + deflate_state *s; + int flush; +{ + int bflush; /* set if current block must be flushed */ + uInt prev; /* byte at distance one to match */ + Bytef *scan, *strend; /* scan goes up to strend for length of run */ + + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the longest run, plus one for the unrolled loop. + */ + if (s->lookahead <= MAX_MATCH) { + fill_window(s); + if (s->lookahead <= MAX_MATCH && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* See how many times the previous byte repeats */ + s->match_length = 0; + if (s->lookahead >= MIN_MATCH && s->strstart > 0) { + scan = s->window + s->strstart - 1; + prev = *scan; + if (prev == *++scan && prev == *++scan && prev == *++scan) { + strend = s->window + s->strstart + MAX_MATCH; + do { + } while (prev == *++scan && prev == *++scan && + prev == *++scan && prev == *++scan && + prev == *++scan && prev == *++scan && + prev == *++scan && prev == *++scan && + scan < strend); + s->match_length = MAX_MATCH - (uInt)(strend - scan); + if (s->match_length > s->lookahead) + s->match_length = s->lookahead; + } + Assert(scan <= s->window+(uInt)(s->window_size-1), "wild scan"); + } + + /* Emit match if have run of MIN_MATCH or longer, else emit literal */ + if (s->match_length >= MIN_MATCH) { + check_match(s, s->strstart, s->strstart - 1, s->match_length); + + _tr_tally_dist(s, 1, s->match_length - MIN_MATCH, bflush); + + s->lookahead -= s->match_length; + s->strstart += s->match_length; + s->match_length = 0; + } else { + /* No match, output a literal byte */ + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit (s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + } + if (bflush) FLUSH_BLOCK(s, 0); + } + s->insert = 0; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->last_lit) + FLUSH_BLOCK(s, 0); + return block_done; +} + +/* =========================================================================== + * For Z_HUFFMAN_ONLY, do not look for matches. Do not maintain a hash table. + * (It will be regenerated if this run of deflate switches away from Huffman.) + */ +local block_state deflate_huff(s, flush) + deflate_state *s; + int flush; +{ + int bflush; /* set if current block must be flushed */ + + for (;;) { + /* Make sure that we have a literal to write. */ + if (s->lookahead == 0) { + fill_window(s); + if (s->lookahead == 0) { + if (flush == Z_NO_FLUSH) + return need_more; + break; /* flush the current block */ + } + } + + /* Output a literal byte */ + s->match_length = 0; + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit (s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + if (bflush) FLUSH_BLOCK(s, 0); + } + s->insert = 0; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->last_lit) + FLUSH_BLOCK(s, 0); + return block_done; +} diff --git a/deps/SZ/zlib/deflate.h b/deps/SZ/zlib/deflate.h new file mode 100644 index 0000000000000000000000000000000000000000..23ecdd312bc06eb41a40dce73358e62dea8772d2 --- /dev/null +++ b/deps/SZ/zlib/deflate.h @@ -0,0 +1,349 @@ +/* deflate.h -- internal compression state + * Copyright (C) 1995-2016 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* @(#) $Id$ */ + +#ifndef DEFLATE_H +#define DEFLATE_H + +#include "zutil.h" + +/* define NO_GZIP when compiling if you want to disable gzip header and + trailer creation by deflate(). NO_GZIP would be used to avoid linking in + the crc code when it is not needed. For shared libraries, gzip encoding + should be left enabled. */ +#ifndef NO_GZIP +# define GZIP +#endif + +/* =========================================================================== + * Internal compression state. + */ + +#define LENGTH_CODES 29 +/* number of length codes, not counting the special END_BLOCK code */ + +#define LITERALS 256 +/* number of literal bytes 0..255 */ + +#define L_CODES (LITERALS+1+LENGTH_CODES) +/* number of Literal or Length codes, including the END_BLOCK code */ + +#define D_CODES 30 +/* number of distance codes */ + +#define BL_CODES 19 +/* number of codes used to transfer the bit lengths */ + +#define HEAP_SIZE (2*L_CODES+1) +/* maximum heap size */ + +#define MAX_BITS 15 +/* All codes must not exceed MAX_BITS bits */ + +#define Buf_size 16 +/* size of bit buffer in bi_buf */ + +#define INIT_STATE 42 /* zlib header -> BUSY_STATE */ +#ifdef GZIP +# define GZIP_STATE 57 /* gzip header -> BUSY_STATE | EXTRA_STATE */ +#endif +#define EXTRA_STATE 69 /* gzip extra block -> NAME_STATE */ +#define NAME_STATE 73 /* gzip file name -> COMMENT_STATE */ +#define COMMENT_STATE 91 /* gzip comment -> HCRC_STATE */ +#define HCRC_STATE 103 /* gzip header CRC -> BUSY_STATE */ +#define BUSY_STATE 113 /* deflate -> FINISH_STATE */ +#define FINISH_STATE 666 /* stream complete */ +/* Stream status */ + + +/* Data structure describing a single value and its code string. */ +typedef struct ct_data_s { + union { + ush freq; /* frequency count */ + ush code; /* bit string */ + } fc; + union { + ush dad; /* father node in Huffman tree */ + ush len; /* length of bit string */ + } dl; +} FAR ct_data; + +#define Freq fc.freq +#define Code fc.code +#define Dad dl.dad +#define Len dl.len + +typedef struct static_tree_desc_s static_tree_desc; + +typedef struct tree_desc_s { + ct_data *dyn_tree; /* the dynamic tree */ + int max_code; /* largest code with non zero frequency */ + const static_tree_desc *stat_desc; /* the corresponding static tree */ +} FAR tree_desc; + +typedef ush Pos; +typedef Pos FAR Posf; +typedef unsigned IPos; + +/* A Pos is an index in the character window. We use short instead of int to + * save space in the various tables. IPos is used only for parameter passing. + */ + +typedef struct internal_state { + z_streamp strm; /* pointer back to this zlib stream */ + int status; /* as the name implies */ + Bytef *pending_buf; /* output still pending */ + ulg pending_buf_size; /* size of pending_buf */ + Bytef *pending_out; /* next pending byte to output to the stream */ + ulg pending; /* nb of bytes in the pending buffer */ + int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ + gz_headerp gzhead; /* gzip header information to write */ + ulg gzindex; /* where in extra, name, or comment */ + Byte method; /* can only be DEFLATED */ + int last_flush; /* value of flush param for previous deflate call */ + + /* used by deflate.c: */ + + uInt w_size; /* LZ77 window size (32K by default) */ + uInt w_bits; /* log2(w_size) (8..16) */ + uInt w_mask; /* w_size - 1 */ + + Bytef *window; + /* Sliding window. Input bytes are read into the second half of the window, + * and move to the first half later to keep a dictionary of at least wSize + * bytes. With this organization, matches are limited to a distance of + * wSize-MAX_MATCH bytes, but this ensures that IO is always + * performed with a length multiple of the block size. Also, it limits + * the window size to 64K, which is quite useful on MSDOS. + * To do: use the user input buffer as sliding window. + */ + + ulg window_size; + /* Actual size of window: 2*wSize, except when the user input buffer + * is directly used as sliding window. + */ + + Posf *prev; + /* Link to older string with same hash index. To limit the size of this + * array to 64K, this link is maintained only for the last 32K strings. + * An index in this array is thus a window index modulo 32K. + */ + + Posf *head; /* Heads of the hash chains or NIL. */ + + uInt ins_h; /* hash index of string to be inserted */ + uInt hash_size; /* number of elements in hash table */ + uInt hash_bits; /* log2(hash_size) */ + uInt hash_mask; /* hash_size-1 */ + + uInt hash_shift; + /* Number of bits by which ins_h must be shifted at each input + * step. It must be such that after MIN_MATCH steps, the oldest + * byte no longer takes part in the hash key, that is: + * hash_shift * MIN_MATCH >= hash_bits + */ + + long block_start; + /* Window position at the beginning of the current output block. Gets + * negative when the window is moved backwards. + */ + + uInt match_length; /* length of best match */ + IPos prev_match; /* previous match */ + int match_available; /* set if previous match exists */ + uInt strstart; /* start of string to insert */ + uInt match_start; /* start of matching string */ + uInt lookahead; /* number of valid bytes ahead in window */ + + uInt prev_length; + /* Length of the best match at previous step. Matches not greater than this + * are discarded. This is used in the lazy match evaluation. + */ + + uInt max_chain_length; + /* To speed up deflation, hash chains are never searched beyond this + * length. A higher limit improves compression ratio but degrades the + * speed. + */ + + uInt max_lazy_match; + /* Attempt to find a better match only when the current match is strictly + * smaller than this value. This mechanism is used only for compression + * levels >= 4. + */ +# define max_insert_length max_lazy_match + /* Insert new strings in the hash table only if the match length is not + * greater than this length. This saves time but degrades compression. + * max_insert_length is used only for compression levels <= 3. + */ + + int level; /* compression level (1..9) */ + int strategy; /* favor or force Huffman coding*/ + + uInt good_match; + /* Use a faster search when the previous match is longer than this */ + + int nice_match; /* Stop searching when current match exceeds this */ + + /* used by trees.c: */ + /* Didn't use ct_data typedef below to suppress compiler warning */ + struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ + struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ + struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ + + struct tree_desc_s l_desc; /* desc. for literal tree */ + struct tree_desc_s d_desc; /* desc. for distance tree */ + struct tree_desc_s bl_desc; /* desc. for bit length tree */ + + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ + int heap_len; /* number of elements in the heap */ + int heap_max; /* element of largest frequency */ + /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. + * The same heap array is used to build all trees. + */ + + uch depth[2*L_CODES+1]; + /* Depth of each subtree used as tie breaker for trees of equal frequency + */ + + uchf *l_buf; /* buffer for literals or lengths */ + + uInt lit_bufsize; + /* Size of match buffer for literals/lengths. There are 4 reasons for + * limiting lit_bufsize to 64K: + * - frequencies can be kept in 16 bit counters + * - if compression is not successful for the first block, all input + * data is still in the window so we can still emit a stored block even + * when input comes from standard input. (This can also be done for + * all blocks if lit_bufsize is not greater than 32K.) + * - if compression is not successful for a file smaller than 64K, we can + * even emit a stored file instead of a stored block (saving 5 bytes). + * This is applicable only for zip (not gzip or zlib). + * - creating new Huffman trees less frequently may not provide fast + * adaptation to changes in the input data statistics. (Take for + * example a binary file with poorly compressible code followed by + * a highly compressible string table.) Smaller buffer sizes give + * fast adaptation but have of course the overhead of transmitting + * trees more frequently. + * - I can't count above 4 + */ + + uInt last_lit; /* running index in l_buf */ + + ushf *d_buf; + /* Buffer for distances. To simplify the code, d_buf and l_buf have + * the same number of elements. To use different lengths, an extra flag + * array would be necessary. + */ + + ulg opt_len; /* bit length of current block with optimal trees */ + ulg static_len; /* bit length of current block with static trees */ + uInt matches; /* number of string matches in current block */ + uInt insert; /* bytes at end of window left to insert */ + +#ifdef ZLIB_DEBUG + ulg compressed_len; /* total bit length of compressed file mod 2^32 */ + ulg bits_sent; /* bit length of compressed data sent mod 2^32 */ +#endif + + ush bi_buf; + /* Output buffer. bits are inserted starting at the bottom (least + * significant bits). + */ + int bi_valid; + /* Number of valid bits in bi_buf. All bits above the last valid bit + * are always zero. + */ + + ulg high_water; + /* High water mark offset in window for initialized bytes -- bytes above + * this are set to zero in order to avoid memory check warnings when + * longest match routines access bytes past the input. This is then + * updated to the new high water mark. + */ + +} FAR deflate_state; + +/* Output a byte on the stream. + * IN assertion: there is enough room in pending_buf. + */ +#define put_byte(s, c) {s->pending_buf[s->pending++] = (Bytef)(c);} + + +#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) +/* Minimum amount of lookahead, except at the end of the input file. + * See deflate.c for comments about the MIN_MATCH+1. + */ + +#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD) +/* In order to simplify the code, particularly on 16 bit machines, match + * distances are limited to MAX_DIST instead of WSIZE. + */ + +#define WIN_INIT MAX_MATCH +/* Number of bytes after end of data in window to initialize in order to avoid + memory checker errors from longest match routines */ + + /* in trees.c */ +void ZLIB_INTERNAL _tr_init OF((deflate_state *s)); +int ZLIB_INTERNAL _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc)); +void ZLIB_INTERNAL _tr_flush_block OF((deflate_state *s, charf *buf, + ulg stored_len, int last)); +void ZLIB_INTERNAL _tr_flush_bits OF((deflate_state *s)); +void ZLIB_INTERNAL _tr_align OF((deflate_state *s)); +void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf, + ulg stored_len, int last)); + +#define d_code(dist) \ + ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) +/* Mapping from a distance to a distance code. dist is the distance - 1 and + * must not have side effects. _dist_code[256] and _dist_code[257] are never + * used. + */ + +#ifndef ZLIB_DEBUG +/* Inline versions of _tr_tally for speed: */ + +#if defined(GEN_TREES_H) || !defined(STDC) + extern uch ZLIB_INTERNAL _length_code[]; + extern uch ZLIB_INTERNAL _dist_code[]; +#else + extern const uch ZLIB_INTERNAL _length_code[]; + extern const uch ZLIB_INTERNAL _dist_code[]; +#endif + +# define _tr_tally_lit(s, c, flush) \ + { uch cc = (c); \ + s->d_buf[s->last_lit] = 0; \ + s->l_buf[s->last_lit++] = cc; \ + s->dyn_ltree[cc].Freq++; \ + flush = (s->last_lit == s->lit_bufsize-1); \ + } +# define _tr_tally_dist(s, distance, length, flush) \ + { uch len = (uch)(length); \ + ush dist = (ush)(distance); \ + s->d_buf[s->last_lit] = dist; \ + s->l_buf[s->last_lit++] = len; \ + dist--; \ + s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ + s->dyn_dtree[d_code(dist)].Freq++; \ + flush = (s->last_lit == s->lit_bufsize-1); \ + } +#else +# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) +# define _tr_tally_dist(s, distance, length, flush) \ + flush = _tr_tally(s, distance, length) +#endif + +#endif /* DEFLATE_H */ diff --git a/deps/SZ/zlib/gzclose.c b/deps/SZ/zlib/gzclose.c new file mode 100644 index 0000000000000000000000000000000000000000..caeb99a3177f477d622870255a00ac2b72f10cad --- /dev/null +++ b/deps/SZ/zlib/gzclose.c @@ -0,0 +1,25 @@ +/* gzclose.c -- zlib gzclose() function + * Copyright (C) 2004, 2010 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "gzguts.h" + +/* gzclose() is in a separate file so that it is linked in only if it is used. + That way the other gzclose functions can be used instead to avoid linking in + unneeded compression or decompression routines. */ +int ZEXPORT gzclose(file) + gzFile file; +{ +#ifndef NO_GZCOMPRESS + gz_statep state; + + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + + return state->mode == GZ_READ ? gzclose_r(file) : gzclose_w(file); +#else + return gzclose_r(file); +#endif +} diff --git a/deps/SZ/zlib/gzguts.h b/deps/SZ/zlib/gzguts.h new file mode 100644 index 0000000000000000000000000000000000000000..990a4d2514933709883a7d949ed52146675fe2c1 --- /dev/null +++ b/deps/SZ/zlib/gzguts.h @@ -0,0 +1,218 @@ +/* gzguts.h -- zlib internal header definitions for gz* operations + * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifdef _LARGEFILE64_SOURCE +# ifndef _LARGEFILE_SOURCE +# define _LARGEFILE_SOURCE 1 +# endif +# ifdef _FILE_OFFSET_BITS +# undef _FILE_OFFSET_BITS +# endif +#endif + +#ifdef HAVE_HIDDEN +# define ZLIB_INTERNAL __attribute__((visibility ("hidden"))) +#else +# define ZLIB_INTERNAL +#endif + +#include +#include "zlib.h" +#ifdef STDC +# include +# include +# include +#endif + +#ifndef _POSIX_SOURCE +# define _POSIX_SOURCE +#endif +#include + +#ifdef _WIN32 +# include +#endif + +#if defined(__TURBOC__) || defined(_MSC_VER) || defined(_WIN32) +# include +#endif + +#if defined(_WIN32) || defined(__CYGWIN__) +# define WIDECHAR +#endif + +#ifdef WINAPI_FAMILY +# define open _open +# define read _read +# define write _write +# define close _close +#endif + +#ifdef NO_DEFLATE /* for compatibility with old definition */ +# define NO_GZCOMPRESS +#endif + +#if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif + +#if defined(__CYGWIN__) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif + +#if defined(MSDOS) && defined(__BORLANDC__) && (BORLANDC > 0x410) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif + +#ifndef HAVE_VSNPRINTF +# ifdef MSDOS +/* vsnprintf may exist on some MS-DOS compilers (DJGPP?), + but for now we just assume it doesn't. */ +# define NO_vsnprintf +# endif +# ifdef __TURBOC__ +# define NO_vsnprintf +# endif +# ifdef WIN32 +/* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */ +# if !defined(vsnprintf) && !defined(NO_vsnprintf) +# if !defined(_MSC_VER) || ( defined(_MSC_VER) && _MSC_VER < 1500 ) +# define vsnprintf _vsnprintf +# endif +# endif +# endif +# ifdef __SASC +# define NO_vsnprintf +# endif +# ifdef VMS +# define NO_vsnprintf +# endif +# ifdef __OS400__ +# define NO_vsnprintf +# endif +# ifdef __MVS__ +# define NO_vsnprintf +# endif +#endif + +/* unlike snprintf (which is required in C99), _snprintf does not guarantee + null termination of the result -- however this is only used in gzlib.c where + the result is assured to fit in the space provided */ +#if defined(_MSC_VER) && _MSC_VER < 1900 +# define snprintf _snprintf +#endif + +#ifndef local +# define local static +#endif +/* since "static" is used to mean two completely different things in C, we + define "local" for the non-static meaning of "static", for readability + (compile with -Dlocal if your debugger can't find static symbols) */ + +/* gz* functions always use library allocation functions */ +#ifndef STDC + extern voidp malloc OF((uInt size)); + extern void free OF((voidpf ptr)); +#endif + +/* get errno and strerror definition */ +#if defined UNDER_CE +# include +# define zstrerror() gz_strwinerror((DWORD)GetLastError()) +#else +# ifndef NO_STRERROR +# include +# define zstrerror() strerror(errno) +# else +# define zstrerror() "stdio error (consult errno)" +# endif +#endif + +/* provide prototypes for these when building zlib without LFS */ +#if !defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0 + ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); + ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int)); + ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile)); + ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile)); +#endif + +/* default memLevel */ +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif + +/* default i/o buffer size -- double this for output when reading (this and + twice this must be able to fit in an unsigned type) */ +#define GZBUFSIZE 8192 + +/* gzip modes, also provide a little integrity check on the passed structure */ +#define GZ_NONE 0 +#define GZ_READ 7247 +#define GZ_WRITE 31153 +#define GZ_APPEND 1 /* mode set to GZ_WRITE after the file is opened */ + +/* values for gz_state how */ +#define LOOK 0 /* look for a gzip header */ +#define COPY 1 /* copy input directly */ +#define GZIP 2 /* decompress a gzip stream */ + +/* internal gzip file state data structure */ +typedef struct { + /* exposed contents for gzgetc() macro */ + struct gzFile_s x; /* "x" for exposed */ + /* x.have: number of bytes available at x.next */ + /* x.next: next output data to deliver or write */ + /* x.pos: current position in uncompressed data */ + /* used for both reading and writing */ + int mode; /* see gzip modes above */ + int fd; /* file descriptor */ + char *path; /* path or fd for error messages */ + unsigned size; /* buffer size, zero if not allocated yet */ + unsigned want; /* requested buffer size, default is GZBUFSIZE */ + unsigned char *in; /* input buffer (double-sized when writing) */ + unsigned char *out; /* output buffer (double-sized when reading) */ + int direct; /* 0 if processing gzip, 1 if transparent */ + /* just for reading */ + int how; /* 0: get header, 1: copy, 2: decompress */ + z_off64_t start; /* where the gzip data started, for rewinding */ + int eof; /* true if end of input file reached */ + int past; /* true if read requested past end */ + /* just for writing */ + int level; /* compression level */ + int strategy; /* compression strategy */ + /* seek request */ + z_off64_t skip; /* amount to skip (already rewound if backwards) */ + int seek; /* true if seek request pending */ + /* error information */ + int err; /* error code */ + char *msg; /* error message */ + /* zlib inflate or deflate stream */ + z_stream strm; /* stream structure in-place (not a pointer) */ +} gz_state; +typedef gz_state FAR *gz_statep; + +/* shared functions */ +void ZLIB_INTERNAL gz_error OF((gz_statep, int, const char *)); +#if defined UNDER_CE +char ZLIB_INTERNAL *gz_strwinerror OF((DWORD error)); +#endif + +/* GT_OFF(x), where x is an unsigned value, is true if x > maximum z_off64_t + value -- needed when comparing unsigned to z_off64_t, which is signed + (possible z_off64_t types off_t, off64_t, and long are all signed) */ +#ifdef INT_MAX +# define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > INT_MAX) +#else +unsigned ZLIB_INTERNAL gz_intmax OF((void)); +# define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > gz_intmax()) +#endif diff --git a/deps/SZ/zlib/gzlib.c b/deps/SZ/zlib/gzlib.c new file mode 100644 index 0000000000000000000000000000000000000000..4105e6aff92594fb9cfa557aa8349cea5a5d4a2b --- /dev/null +++ b/deps/SZ/zlib/gzlib.c @@ -0,0 +1,637 @@ +/* gzlib.c -- zlib functions common to reading and writing gzip files + * Copyright (C) 2004-2017 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "gzguts.h" + +#if defined(_WIN32) && !defined(__BORLANDC__) && !defined(__MINGW32__) +# define LSEEK _lseeki64 +#else +#if defined(_LARGEFILE64_SOURCE) && _LFS64_LARGEFILE-0 +# define LSEEK lseek64 +#else +# define LSEEK lseek +#endif +#endif + +/* Local functions */ +local void gz_reset OF((gz_statep)); +local gzFile gz_open OF((const void *, int, const char *)); + +#if defined UNDER_CE + +/* Map the Windows error number in ERROR to a locale-dependent error message + string and return a pointer to it. Typically, the values for ERROR come + from GetLastError. + + The string pointed to shall not be modified by the application, but may be + overwritten by a subsequent call to gz_strwinerror + + The gz_strwinerror function does not change the current setting of + GetLastError. */ +char ZLIB_INTERNAL *gz_strwinerror (error) + DWORD error; +{ + static char buf[1024]; + + wchar_t *msgbuf; + DWORD lasterr = GetLastError(); + DWORD chars = FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM + | FORMAT_MESSAGE_ALLOCATE_BUFFER, + NULL, + error, + 0, /* Default language */ + (LPVOID)&msgbuf, + 0, + NULL); + if (chars != 0) { + /* If there is an \r\n appended, zap it. */ + if (chars >= 2 + && msgbuf[chars - 2] == '\r' && msgbuf[chars - 1] == '\n') { + chars -= 2; + msgbuf[chars] = 0; + } + + if (chars > sizeof (buf) - 1) { + chars = sizeof (buf) - 1; + msgbuf[chars] = 0; + } + + wcstombs(buf, msgbuf, chars + 1); + LocalFree(msgbuf); + } + else { + sprintf(buf, "unknown win32 error (%ld)", error); + } + + SetLastError(lasterr); + return buf; +} + +#endif /* UNDER_CE */ + +/* Reset gzip file state */ +local void gz_reset(state) + gz_statep state; +{ + state->x.have = 0; /* no output data available */ + if (state->mode == GZ_READ) { /* for reading ... */ + state->eof = 0; /* not at end of file */ + state->past = 0; /* have not read past end yet */ + state->how = LOOK; /* look for gzip header */ + } + state->seek = 0; /* no seek request pending */ + gz_error(state, Z_OK, NULL); /* clear error */ + state->x.pos = 0; /* no uncompressed data yet */ + state->strm.avail_in = 0; /* no input data yet */ +} + +/* Open a gzip file either by name or file descriptor. */ +local gzFile gz_open(path, fd, mode) + const void *path; + int fd; + const char *mode; +{ + gz_statep state; + z_size_t len; + int oflag; +#ifdef O_CLOEXEC + int cloexec = 0; +#endif +#ifdef O_EXCL + int exclusive = 0; +#endif + + /* check input */ + if (path == NULL) + return NULL; + + /* allocate gzFile structure to return */ + state = (gz_statep)malloc(sizeof(gz_state)); + if (state == NULL) + return NULL; + state->size = 0; /* no buffers allocated yet */ + state->want = GZBUFSIZE; /* requested buffer size */ + state->msg = NULL; /* no error message yet */ + + /* interpret mode */ + state->mode = GZ_NONE; + state->level = Z_DEFAULT_COMPRESSION; + state->strategy = Z_DEFAULT_STRATEGY; + state->direct = 0; + while (*mode) { + if (*mode >= '0' && *mode <= '9') + state->level = *mode - '0'; + else + switch (*mode) { + case 'r': + state->mode = GZ_READ; + break; +#ifndef NO_GZCOMPRESS + case 'w': + state->mode = GZ_WRITE; + break; + case 'a': + state->mode = GZ_APPEND; + break; +#endif + case '+': /* can't read and write at the same time */ + free(state); + return NULL; + case 'b': /* ignore -- will request binary anyway */ + break; +#ifdef O_CLOEXEC + case 'e': + cloexec = 1; + break; +#endif +#ifdef O_EXCL + case 'x': + exclusive = 1; + break; +#endif + case 'f': + state->strategy = Z_FILTERED; + break; + case 'h': + state->strategy = Z_HUFFMAN_ONLY; + break; + case 'R': + state->strategy = Z_RLE; + break; + case 'F': + state->strategy = Z_FIXED; + break; + case 'T': + state->direct = 1; + break; + default: /* could consider as an error, but just ignore */ + ; + } + mode++; + } + + /* must provide an "r", "w", or "a" */ + if (state->mode == GZ_NONE) { + free(state); + return NULL; + } + + /* can't force transparent read */ + if (state->mode == GZ_READ) { + if (state->direct) { + free(state); + return NULL; + } + state->direct = 1; /* for empty file */ + } + + /* save the path name for error messages */ +#ifdef WIDECHAR + if (fd == -2) { + len = wcstombs(NULL, path, 0); + if (len == (z_size_t)-1) + len = 0; + } + else +#endif + len = strlen((const char *)path); + state->path = (char *)malloc(len + 1); + if (state->path == NULL) { + free(state); + return NULL; + } +#ifdef WIDECHAR + if (fd == -2) + if (len) + wcstombs(state->path, path, len + 1); + else + *(state->path) = 0; + else +#endif +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + (void)snprintf(state->path, len + 1, "%s", (const char *)path); +#else + strcpy(state->path, path); +#endif + + /* compute the flags for open() */ + oflag = +#ifdef O_LARGEFILE + O_LARGEFILE | +#endif +#ifdef O_BINARY + O_BINARY | +#endif +#ifdef O_CLOEXEC + (cloexec ? O_CLOEXEC : 0) | +#endif + (state->mode == GZ_READ ? + O_RDONLY : + (O_WRONLY | O_CREAT | +#ifdef O_EXCL + (exclusive ? O_EXCL : 0) | +#endif + (state->mode == GZ_WRITE ? + O_TRUNC : + O_APPEND))); + + /* open the file with the appropriate flags (or just use fd) */ + state->fd = fd > -1 ? fd : ( +#ifdef WIDECHAR + fd == -2 ? _wopen(path, oflag, 0666) : +#endif + open((const char *)path, oflag, 0666)); + if (state->fd == -1) { + free(state->path); + free(state); + return NULL; + } + if (state->mode == GZ_APPEND) { + LSEEK(state->fd, 0, SEEK_END); /* so gzoffset() is correct */ + state->mode = GZ_WRITE; /* simplify later checks */ + } + + /* save the current position for rewinding (only if reading) */ + if (state->mode == GZ_READ) { + state->start = LSEEK(state->fd, 0, SEEK_CUR); + if (state->start == -1) state->start = 0; + } + + /* initialize stream */ + gz_reset(state); + + /* return stream */ + return (gzFile)state; +} + +/* -- see zlib.h -- */ +gzFile ZEXPORT gzopen(path, mode) + const char *path; + const char *mode; +{ + return gz_open(path, -1, mode); +} + +/* -- see zlib.h -- */ +gzFile ZEXPORT gzopen64(path, mode) + const char *path; + const char *mode; +{ + return gz_open(path, -1, mode); +} + +/* -- see zlib.h -- */ +gzFile ZEXPORT gzdopen(fd, mode) + int fd; + const char *mode; +{ + char *path; /* identifier for error messages */ + gzFile gz; + + if (fd == -1 || (path = (char *)malloc(7 + 3 * sizeof(int))) == NULL) + return NULL; +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + (void)snprintf(path, 7 + 3 * sizeof(int), "", fd); +#else + sprintf(path, "", fd); /* for debugging */ +#endif + gz = gz_open(path, fd, mode); + free(path); + return gz; +} + +/* -- see zlib.h -- */ +#ifdef WIDECHAR +gzFile ZEXPORT gzopen_w(path, mode) + const wchar_t *path; + const char *mode; +{ + return gz_open(path, -2, mode); +} +#endif + +/* -- see zlib.h -- */ +int ZEXPORT gzbuffer(file, size) + gzFile file; + unsigned size; +{ + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return -1; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return -1; + + /* make sure we haven't already allocated memory */ + if (state->size != 0) + return -1; + + /* check and set requested size */ + if ((size << 1) < size) + return -1; /* need to be able to double it */ + if (size < 2) + size = 2; /* need two bytes to check magic header */ + state->want = size; + return 0; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzrewind(file) + gzFile file; +{ + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're reading and that there's no error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return -1; + + /* back up and start over */ + if (LSEEK(state->fd, state->start, SEEK_SET) == -1) + return -1; + gz_reset(state); + return 0; +} + +/* -- see zlib.h -- */ +z_off64_t ZEXPORT gzseek64(file, offset, whence) + gzFile file; + z_off64_t offset; + int whence; +{ + unsigned n; + z_off64_t ret; + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return -1; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return -1; + + /* check that there's no error */ + if (state->err != Z_OK && state->err != Z_BUF_ERROR) + return -1; + + /* can only seek from start or relative to current position */ + if (whence != SEEK_SET && whence != SEEK_CUR) + return -1; + + /* normalize offset to a SEEK_CUR specification */ + if (whence == SEEK_SET) + offset -= state->x.pos; + else if (state->seek) + offset += state->skip; + state->seek = 0; + + /* if within raw area while reading, just go there */ + if (state->mode == GZ_READ && state->how == COPY && + state->x.pos + offset >= 0) { + ret = LSEEK(state->fd, offset - state->x.have, SEEK_CUR); + if (ret == -1) + return -1; + state->x.have = 0; + state->eof = 0; + state->past = 0; + state->seek = 0; + gz_error(state, Z_OK, NULL); + state->strm.avail_in = 0; + state->x.pos += offset; + return state->x.pos; + } + + /* calculate skip amount, rewinding if needed for back seek when reading */ + if (offset < 0) { + if (state->mode != GZ_READ) /* writing -- can't go backwards */ + return -1; + offset += state->x.pos; + if (offset < 0) /* before start of file! */ + return -1; + if (gzrewind(file) == -1) /* rewind, then skip to offset */ + return -1; + } + + /* if reading, skip what's in output buffer (one less gzgetc() check) */ + if (state->mode == GZ_READ) { + n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > offset ? + (unsigned)offset : state->x.have; + state->x.have -= n; + state->x.next += n; + state->x.pos += n; + offset -= n; + } + + /* request skip (if not zero) */ + if (offset) { + state->seek = 1; + state->skip = offset; + } + return state->x.pos + offset; +} + +/* -- see zlib.h -- */ +z_off_t ZEXPORT gzseek(file, offset, whence) + gzFile file; + z_off_t offset; + int whence; +{ + z_off64_t ret; + + ret = gzseek64(file, (z_off64_t)offset, whence); + return ret == (z_off_t)ret ? (z_off_t)ret : -1; +} + +/* -- see zlib.h -- */ +z_off64_t ZEXPORT gztell64(file) + gzFile file; +{ + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return -1; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return -1; + + /* return position */ + return state->x.pos + (state->seek ? state->skip : 0); +} + +/* -- see zlib.h -- */ +z_off_t ZEXPORT gztell(file) + gzFile file; +{ + z_off64_t ret; + + ret = gztell64(file); + return ret == (z_off_t)ret ? (z_off_t)ret : -1; +} + +/* -- see zlib.h -- */ +z_off64_t ZEXPORT gzoffset64(file) + gzFile file; +{ + z_off64_t offset; + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return -1; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return -1; + + /* compute and return effective offset in file */ + offset = LSEEK(state->fd, 0, SEEK_CUR); + if (offset == -1) + return -1; + if (state->mode == GZ_READ) /* reading */ + offset -= state->strm.avail_in; /* don't count buffered input */ + return offset; +} + +/* -- see zlib.h -- */ +z_off_t ZEXPORT gzoffset(file) + gzFile file; +{ + z_off64_t ret; + + ret = gzoffset64(file); + return ret == (z_off_t)ret ? (z_off_t)ret : -1; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzeof(file) + gzFile file; +{ + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return 0; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return 0; + + /* return end-of-file state */ + return state->mode == GZ_READ ? state->past : 0; +} + +/* -- see zlib.h -- */ +const char * ZEXPORT gzerror(file, errnum) + gzFile file; + int *errnum; +{ + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return NULL; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return NULL; + + /* return error information */ + if (errnum != NULL) + *errnum = state->err; + return state->err == Z_MEM_ERROR ? "out of memory" : + (state->msg == NULL ? "" : state->msg); +} + +/* -- see zlib.h -- */ +void ZEXPORT gzclearerr(file) + gzFile file; +{ + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return; + + /* clear error and end-of-file */ + if (state->mode == GZ_READ) { + state->eof = 0; + state->past = 0; + } + gz_error(state, Z_OK, NULL); +} + +/* Create an error message in allocated memory and set state->err and + state->msg accordingly. Free any previous error message already there. Do + not try to free or allocate space if the error is Z_MEM_ERROR (out of + memory). Simply save the error message as a static string. If there is an + allocation failure constructing the error message, then convert the error to + out of memory. */ +void ZLIB_INTERNAL gz_error(state, err, msg) + gz_statep state; + int err; + const char *msg; +{ + /* free previously allocated message and clear */ + if (state->msg != NULL) { + if (state->err != Z_MEM_ERROR) + free(state->msg); + state->msg = NULL; + } + + /* if fatal, set state->x.have to 0 so that the gzgetc() macro fails */ + if (err != Z_OK && err != Z_BUF_ERROR) + state->x.have = 0; + + /* set error code, and if no message, then done */ + state->err = err; + if (msg == NULL) + return; + + /* for an out of memory error, return literal string when requested */ + if (err == Z_MEM_ERROR) + return; + + /* construct error message with path */ + if ((state->msg = (char *)malloc(strlen(state->path) + strlen(msg) + 3)) == + NULL) { + state->err = Z_MEM_ERROR; + return; + } +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + (void)snprintf(state->msg, strlen(state->path) + strlen(msg) + 3, + "%s%s%s", state->path, ": ", msg); +#else + strcpy(state->msg, state->path); + strcat(state->msg, ": "); + strcat(state->msg, msg); +#endif +} + +#ifndef INT_MAX +/* portably return maximum value for an int (when limits.h presumed not + available) -- we need to do this to cover cases where 2's complement not + used, since C standard permits 1's complement and sign-bit representations, + otherwise we could just use ((unsigned)-1) >> 1 */ +unsigned ZLIB_INTERNAL gz_intmax() +{ + unsigned p, q; + + p = 1; + do { + q = p; + p <<= 1; + p++; + } while (p > q); + return q >> 1; +} +#endif diff --git a/deps/SZ/zlib/gzread.c b/deps/SZ/zlib/gzread.c new file mode 100644 index 0000000000000000000000000000000000000000..956b91ea7d9e2a7cd554f7d6561142509b655244 --- /dev/null +++ b/deps/SZ/zlib/gzread.c @@ -0,0 +1,654 @@ +/* gzread.c -- zlib functions for reading gzip files + * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "gzguts.h" + +/* Local functions */ +local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *)); +local int gz_avail OF((gz_statep)); +local int gz_look OF((gz_statep)); +local int gz_decomp OF((gz_statep)); +local int gz_fetch OF((gz_statep)); +local int gz_skip OF((gz_statep, z_off64_t)); +local z_size_t gz_read OF((gz_statep, voidp, z_size_t)); + +/* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from + state->fd, and update state->eof, state->err, and state->msg as appropriate. + This function needs to loop on read(), since read() is not guaranteed to + read the number of bytes requested, depending on the type of descriptor. */ +local int gz_load(state, buf, len, have) + gz_statep state; + unsigned char *buf; + unsigned len; + unsigned *have; +{ + int ret; + unsigned get, max = ((unsigned)-1 >> 2) + 1; + + *have = 0; + do { + get = len - *have; + if (get > max) + get = max; + ret = read(state->fd, buf + *have, get); + if (ret <= 0) + break; + *have += (unsigned)ret; + } while (*have < len); + if (ret < 0) { + gz_error(state, Z_ERRNO, zstrerror()); + return -1; + } + if (ret == 0) + state->eof = 1; + return 0; +} + +/* Load up input buffer and set eof flag if last data loaded -- return -1 on + error, 0 otherwise. Note that the eof flag is set when the end of the input + file is reached, even though there may be unused data in the buffer. Once + that data has been used, no more attempts will be made to read the file. + If strm->avail_in != 0, then the current data is moved to the beginning of + the input buffer, and then the remainder of the buffer is loaded with the + available data from the input file. */ +local int gz_avail(state) + gz_statep state; +{ + unsigned got; + z_streamp strm = &(state->strm); + + if (state->err != Z_OK && state->err != Z_BUF_ERROR) + return -1; + if (state->eof == 0) { + if (strm->avail_in) { /* copy what's there to the start */ + unsigned char *p = state->in; + unsigned const char *q = strm->next_in; + unsigned n = strm->avail_in; + do { + *p++ = *q++; + } while (--n); + } + if (gz_load(state, state->in + strm->avail_in, + state->size - strm->avail_in, &got) == -1) + return -1; + strm->avail_in += got; + strm->next_in = state->in; + } + return 0; +} + +/* Look for gzip header, set up for inflate or copy. state->x.have must be 0. + If this is the first time in, allocate required memory. state->how will be + left unchanged if there is no more input data available, will be set to COPY + if there is no gzip header and direct copying will be performed, or it will + be set to GZIP for decompression. If direct copying, then leftover input + data from the input buffer will be copied to the output buffer. In that + case, all further file reads will be directly to either the output buffer or + a user buffer. If decompressing, the inflate state will be initialized. + gz_look() will return 0 on success or -1 on failure. */ +local int gz_look(state) + gz_statep state; +{ + z_streamp strm = &(state->strm); + + /* allocate read buffers and inflate memory */ + if (state->size == 0) { + /* allocate buffers */ + state->in = (unsigned char *)malloc(state->want); + state->out = (unsigned char *)malloc(state->want << 1); + if (state->in == NULL || state->out == NULL) { + free(state->out); + free(state->in); + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + state->size = state->want; + + /* allocate inflate memory */ + state->strm.zalloc = Z_NULL; + state->strm.zfree = Z_NULL; + state->strm.opaque = Z_NULL; + state->strm.avail_in = 0; + state->strm.next_in = Z_NULL; + if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) { /* gunzip */ + free(state->out); + free(state->in); + state->size = 0; + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + } + + /* get at least the magic bytes in the input buffer */ + if (strm->avail_in < 2) { + if (gz_avail(state) == -1) + return -1; + if (strm->avail_in == 0) + return 0; + } + + /* look for gzip magic bytes -- if there, do gzip decoding (note: there is + a logical dilemma here when considering the case of a partially written + gzip file, to wit, if a single 31 byte is written, then we cannot tell + whether this is a single-byte file, or just a partially written gzip + file -- for here we assume that if a gzip file is being written, then + the header will be written in a single operation, so that reading a + single byte is sufficient indication that it is not a gzip file) */ + if (strm->avail_in > 1 && + strm->next_in[0] == 31 && strm->next_in[1] == 139) { + inflateReset(strm); + state->how = GZIP; + state->direct = 0; + return 0; + } + + /* no gzip header -- if we were decoding gzip before, then this is trailing + garbage. Ignore the trailing garbage and finish. */ + if (state->direct == 0) { + strm->avail_in = 0; + state->eof = 1; + state->x.have = 0; + return 0; + } + + /* doing raw i/o, copy any leftover input to output -- this assumes that + the output buffer is larger than the input buffer, which also assures + space for gzungetc() */ + state->x.next = state->out; + if (strm->avail_in) { + memcpy(state->x.next, strm->next_in, strm->avail_in); + state->x.have = strm->avail_in; + strm->avail_in = 0; + } + state->how = COPY; + state->direct = 1; + return 0; +} + +/* Decompress from input to the provided next_out and avail_out in the state. + On return, state->x.have and state->x.next point to the just decompressed + data. If the gzip stream completes, state->how is reset to LOOK to look for + the next gzip stream or raw data, once state->x.have is depleted. Returns 0 + on success, -1 on failure. */ +local int gz_decomp(state) + gz_statep state; +{ + int ret = Z_OK; + unsigned had; + z_streamp strm = &(state->strm); + + /* fill output buffer up to end of deflate stream */ + had = strm->avail_out; + do { + /* get more input for inflate() */ + if (strm->avail_in == 0 && gz_avail(state) == -1) + return -1; + if (strm->avail_in == 0) { + gz_error(state, Z_BUF_ERROR, "unexpected end of file"); + break; + } + + /* decompress and handle errors */ + ret = inflate(strm, Z_NO_FLUSH); + if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) { + gz_error(state, Z_STREAM_ERROR, + "internal error: inflate stream corrupt"); + return -1; + } + if (ret == Z_MEM_ERROR) { + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + if (ret == Z_DATA_ERROR) { /* deflate stream invalid */ + gz_error(state, Z_DATA_ERROR, + strm->msg == NULL ? "compressed data error" : strm->msg); + return -1; + } + } while (strm->avail_out && ret != Z_STREAM_END); + + /* update available output */ + state->x.have = had - strm->avail_out; + state->x.next = strm->next_out - state->x.have; + + /* if the gzip stream completed successfully, look for another */ + if (ret == Z_STREAM_END) + state->how = LOOK; + + /* good decompression */ + return 0; +} + +/* Fetch data and put it in the output buffer. Assumes state->x.have is 0. + Data is either copied from the input file or decompressed from the input + file depending on state->how. If state->how is LOOK, then a gzip header is + looked for to determine whether to copy or decompress. Returns -1 on error, + otherwise 0. gz_fetch() will leave state->how as COPY or GZIP unless the + end of the input file has been reached and all data has been processed. */ +local int gz_fetch(state) + gz_statep state; +{ + z_streamp strm = &(state->strm); + + do { + switch(state->how) { + case LOOK: /* -> LOOK, COPY (only if never GZIP), or GZIP */ + if (gz_look(state) == -1) + return -1; + if (state->how == LOOK) + return 0; + break; + case COPY: /* -> COPY */ + if (gz_load(state, state->out, state->size << 1, &(state->x.have)) + == -1) + return -1; + state->x.next = state->out; + return 0; + case GZIP: /* -> GZIP or LOOK (if end of gzip stream) */ + strm->avail_out = state->size << 1; + strm->next_out = state->out; + if (gz_decomp(state) == -1) + return -1; + } + } while (state->x.have == 0 && (!state->eof || strm->avail_in)); + return 0; +} + +/* Skip len uncompressed bytes of output. Return -1 on error, 0 on success. */ +local int gz_skip(state, len) + gz_statep state; + z_off64_t len; +{ + unsigned n; + + /* skip over len bytes or reach end-of-file, whichever comes first */ + while (len) + /* skip over whatever is in output buffer */ + if (state->x.have) { + n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ? + (unsigned)len : state->x.have; + state->x.have -= n; + state->x.next += n; + state->x.pos += n; + len -= n; + } + + /* output buffer empty -- return if we're at the end of the input */ + else if (state->eof && state->strm.avail_in == 0) + break; + + /* need more data to skip -- load up output buffer */ + else { + /* get more output, looking for header if required */ + if (gz_fetch(state) == -1) + return -1; + } + return 0; +} + +/* Read len bytes into buf from file, or less than len up to the end of the + input. Return the number of bytes read. If zero is returned, either the + end of file was reached, or there was an error. state->err must be + consulted in that case to determine which. */ +local z_size_t gz_read(state, buf, len) + gz_statep state; + voidp buf; + z_size_t len; +{ + z_size_t got; + unsigned n; + + /* if len is zero, avoid unnecessary operations */ + if (len == 0) + return 0; + + /* process a skip request */ + if (state->seek) { + state->seek = 0; + if (gz_skip(state, state->skip) == -1) + return 0; + } + + /* get len bytes to buf, or less than len if at the end */ + got = 0; + do { + /* set n to the maximum amount of len that fits in an unsigned int */ + n = -1; + if (n > len) + n = len; + + /* first just try copying data from the output buffer */ + if (state->x.have) { + if (state->x.have < n) + n = state->x.have; + memcpy(buf, state->x.next, n); + state->x.next += n; + state->x.have -= n; + } + + /* output buffer empty -- return if we're at the end of the input */ + else if (state->eof && state->strm.avail_in == 0) { + state->past = 1; /* tried to read past end */ + break; + } + + /* need output data -- for small len or new stream load up our output + buffer */ + else if (state->how == LOOK || n < (state->size << 1)) { + /* get more output, looking for header if required */ + if (gz_fetch(state) == -1) + return 0; + continue; /* no progress yet -- go back to copy above */ + /* the copy above assures that we will leave with space in the + output buffer, allowing at least one gzungetc() to succeed */ + } + + /* large len -- read directly into user buffer */ + else if (state->how == COPY) { /* read directly */ + if (gz_load(state, (unsigned char *)buf, n, &n) == -1) + return 0; + } + + /* large len -- decompress directly into user buffer */ + else { /* state->how == GZIP */ + state->strm.avail_out = n; + state->strm.next_out = (unsigned char *)buf; + if (gz_decomp(state) == -1) + return 0; + n = state->x.have; + state->x.have = 0; + } + + /* update progress */ + len -= n; + buf = (char *)buf + n; + got += n; + state->x.pos += n; + } while (len); + + /* return number of bytes read into user buffer */ + return got; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzread(file, buf, len) + gzFile file; + voidp buf; + unsigned len; +{ + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return -1; + + /* since an int is returned, make sure len fits in one, otherwise return + with an error (this avoids a flaw in the interface) */ + if ((int)len < 0) { + gz_error(state, Z_STREAM_ERROR, "request does not fit in an int"); + return -1; + } + + /* read len or fewer bytes to buf */ + len = gz_read(state, buf, len); + + /* check for an error */ + if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR) + return -1; + + /* return the number of bytes read (this is assured to fit in an int) */ + return (int)len; +} + +/* -- see zlib.h -- */ +z_size_t ZEXPORT gzfread(buf, size, nitems, file) + voidp buf; + z_size_t size; + z_size_t nitems; + gzFile file; +{ + z_size_t len; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return 0; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return 0; + + /* compute bytes to read -- error on overflow */ + len = nitems * size; + if (size && len / size != nitems) { + gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t"); + return 0; + } + + /* read len or fewer bytes to buf, return the number of full items read */ + return len ? gz_read(state, buf, len) / size : 0; +} + +/* -- see zlib.h -- */ +#ifdef Z_PREFIX_SET +# undef z_gzgetc +#else +# undef gzgetc +#endif +int ZEXPORT gzgetc(file) + gzFile file; +{ + int ret; + unsigned char buf[1]; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return -1; + + /* try output buffer (no need to check for skip request) */ + if (state->x.have) { + state->x.have--; + state->x.pos++; + return *(state->x.next)++; + } + + /* nothing there -- try gz_read() */ + ret = gz_read(state, buf, 1); + return ret < 1 ? -1 : buf[0]; +} + +int ZEXPORT gzgetc_(file) +gzFile file; +{ + return gzgetc(file); +} + +/* -- see zlib.h -- */ +int ZEXPORT gzungetc(c, file) + int c; + gzFile file; +{ + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return -1; + + /* process a skip request */ + if (state->seek) { + state->seek = 0; + if (gz_skip(state, state->skip) == -1) + return -1; + } + + /* can't push EOF */ + if (c < 0) + return -1; + + /* if output buffer empty, put byte at end (allows more pushing) */ + if (state->x.have == 0) { + state->x.have = 1; + state->x.next = state->out + (state->size << 1) - 1; + state->x.next[0] = (unsigned char)c; + state->x.pos--; + state->past = 0; + return c; + } + + /* if no room, give up (must have already done a gzungetc()) */ + if (state->x.have == (state->size << 1)) { + gz_error(state, Z_DATA_ERROR, "out of room to push characters"); + return -1; + } + + /* slide output data if needed and insert byte before existing data */ + if (state->x.next == state->out) { + unsigned char *src = state->out + state->x.have; + unsigned char *dest = state->out + (state->size << 1); + while (src > state->out) + *--dest = *--src; + state->x.next = dest; + } + state->x.have++; + state->x.next--; + state->x.next[0] = (unsigned char)c; + state->x.pos--; + state->past = 0; + return c; +} + +/* -- see zlib.h -- */ +char * ZEXPORT gzgets(file, buf, len) + gzFile file; + char *buf; + int len; +{ + unsigned left, n; + char *str; + unsigned char *eol; + gz_statep state; + + /* check parameters and get internal structure */ + if (file == NULL || buf == NULL || len < 1) + return NULL; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return NULL; + + /* process a skip request */ + if (state->seek) { + state->seek = 0; + if (gz_skip(state, state->skip) == -1) + return NULL; + } + + /* copy output bytes up to new line or len - 1, whichever comes first -- + append a terminating zero to the string (we don't check for a zero in + the contents, let the user worry about that) */ + str = buf; + left = (unsigned)len - 1; + if (left) do { + /* assure that something is in the output buffer */ + if (state->x.have == 0 && gz_fetch(state) == -1) + return NULL; /* error */ + if (state->x.have == 0) { /* end of file */ + state->past = 1; /* read past end */ + break; /* return what we have */ + } + + /* look for end-of-line in current output buffer */ + n = state->x.have > left ? left : state->x.have; + eol = (unsigned char *)memchr(state->x.next, '\n', n); + if (eol != NULL) + n = (unsigned)(eol - state->x.next) + 1; + + /* copy through end-of-line, or remainder if not found */ + memcpy(buf, state->x.next, n); + state->x.have -= n; + state->x.next += n; + state->x.pos += n; + left -= n; + buf += n; + } while (left && eol == NULL); + + /* return terminated string, or if nothing, end of file */ + if (buf == str) + return NULL; + buf[0] = 0; + return str; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzdirect(file) + gzFile file; +{ + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return 0; + state = (gz_statep)file; + + /* if the state is not known, but we can find out, then do so (this is + mainly for right after a gzopen() or gzdopen()) */ + if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0) + (void)gz_look(state); + + /* return 1 if transparent, 0 if processing a gzip stream */ + return state->direct; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzclose_r(file) + gzFile file; +{ + int ret, err; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + + /* check that we're reading */ + if (state->mode != GZ_READ) + return Z_STREAM_ERROR; + + /* free memory and close file */ + if (state->size) { + inflateEnd(&(state->strm)); + free(state->out); + free(state->in); + } + err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK; + gz_error(state, Z_OK, NULL); + free(state->path); + ret = close(state->fd); + free(state); + return ret ? Z_ERRNO : err; +} diff --git a/deps/SZ/zlib/gzwrite.c b/deps/SZ/zlib/gzwrite.c new file mode 100644 index 0000000000000000000000000000000000000000..c7b5651d70b994e20222a734c620f68e11e0dc84 --- /dev/null +++ b/deps/SZ/zlib/gzwrite.c @@ -0,0 +1,665 @@ +/* gzwrite.c -- zlib functions for writing gzip files + * Copyright (C) 2004-2017 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "gzguts.h" + +/* Local functions */ +local int gz_init OF((gz_statep)); +local int gz_comp OF((gz_statep, int)); +local int gz_zero OF((gz_statep, z_off64_t)); +local z_size_t gz_write OF((gz_statep, voidpc, z_size_t)); + +/* Initialize state for writing a gzip file. Mark initialization by setting + state->size to non-zero. Return -1 on a memory allocation failure, or 0 on + success. */ +local int gz_init(state) + gz_statep state; +{ + int ret; + z_streamp strm = &(state->strm); + + /* allocate input buffer (double size for gzprintf) */ + state->in = (unsigned char *)malloc(state->want << 1); + if (state->in == NULL) { + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + + /* only need output buffer and deflate state if compressing */ + if (!state->direct) { + /* allocate output buffer */ + state->out = (unsigned char *)malloc(state->want); + if (state->out == NULL) { + free(state->in); + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + + /* allocate deflate memory, set up for gzip compression */ + strm->zalloc = Z_NULL; + strm->zfree = Z_NULL; + strm->opaque = Z_NULL; + ret = deflateInit2(strm, state->level, Z_DEFLATED, + MAX_WBITS + 16, DEF_MEM_LEVEL, state->strategy); + if (ret != Z_OK) { + free(state->out); + free(state->in); + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + strm->next_in = NULL; + } + + /* mark state as initialized */ + state->size = state->want; + + /* initialize write buffer if compressing */ + if (!state->direct) { + strm->avail_out = state->size; + strm->next_out = state->out; + state->x.next = strm->next_out; + } + return 0; +} + +/* Compress whatever is at avail_in and next_in and write to the output file. + Return -1 if there is an error writing to the output file or if gz_init() + fails to allocate memory, otherwise 0. flush is assumed to be a valid + deflate() flush value. If flush is Z_FINISH, then the deflate() state is + reset to start a new gzip stream. If gz->direct is true, then simply write + to the output file without compressing, and ignore flush. */ +local int gz_comp(state, flush) + gz_statep state; + int flush; +{ + int ret, writ; + unsigned have, put, max = ((unsigned)-1 >> 2) + 1; + z_streamp strm = &(state->strm); + + /* allocate memory if this is the first time through */ + if (state->size == 0 && gz_init(state) == -1) + return -1; + + /* write directly if requested */ + if (state->direct) { + while (strm->avail_in) { + put = strm->avail_in > max ? max : strm->avail_in; + writ = write(state->fd, strm->next_in, put); + if (writ < 0) { + gz_error(state, Z_ERRNO, zstrerror()); + return -1; + } + strm->avail_in -= (unsigned)writ; + strm->next_in += writ; + } + return 0; + } + + /* run deflate() on provided input until it produces no more output */ + ret = Z_OK; + do { + /* write out current buffer contents if full, or if flushing, but if + doing Z_FINISH then don't write until we get to Z_STREAM_END */ + if (strm->avail_out == 0 || (flush != Z_NO_FLUSH && + (flush != Z_FINISH || ret == Z_STREAM_END))) { + while (strm->next_out > state->x.next) { + put = strm->next_out - state->x.next > (int)max ? max : + (unsigned)(strm->next_out - state->x.next); + writ = write(state->fd, state->x.next, put); + if (writ < 0) { + gz_error(state, Z_ERRNO, zstrerror()); + return -1; + } + state->x.next += writ; + } + if (strm->avail_out == 0) { + strm->avail_out = state->size; + strm->next_out = state->out; + state->x.next = state->out; + } + } + + /* compress */ + have = strm->avail_out; + ret = deflate(strm, flush); + if (ret == Z_STREAM_ERROR) { + gz_error(state, Z_STREAM_ERROR, + "internal error: deflate stream corrupt"); + return -1; + } + have -= strm->avail_out; + } while (have); + + /* if that completed a deflate stream, allow another to start */ + if (flush == Z_FINISH) + deflateReset(strm); + + /* all done, no errors */ + return 0; +} + +/* Compress len zeros to output. Return -1 on a write error or memory + allocation failure by gz_comp(), or 0 on success. */ +local int gz_zero(state, len) + gz_statep state; + z_off64_t len; +{ + int first; + unsigned n; + z_streamp strm = &(state->strm); + + /* consume whatever's left in the input buffer */ + if (strm->avail_in && gz_comp(state, Z_NO_FLUSH) == -1) + return -1; + + /* compress len zeros (len guaranteed > 0) */ + first = 1; + while (len) { + n = GT_OFF(state->size) || (z_off64_t)state->size > len ? + (unsigned)len : state->size; + if (first) { + memset(state->in, 0, n); + first = 0; + } + strm->avail_in = n; + strm->next_in = state->in; + state->x.pos += n; + if (gz_comp(state, Z_NO_FLUSH) == -1) + return -1; + len -= n; + } + return 0; +} + +/* Write len bytes from buf to file. Return the number of bytes written. If + the returned value is less than len, then there was an error. */ +local z_size_t gz_write(state, buf, len) + gz_statep state; + voidpc buf; + z_size_t len; +{ + z_size_t put = len; + + /* if len is zero, avoid unnecessary operations */ + if (len == 0) + return 0; + + /* allocate memory if this is the first time through */ + if (state->size == 0 && gz_init(state) == -1) + return 0; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return 0; + } + + /* for small len, copy to input buffer, otherwise compress directly */ + if (len < state->size) { + /* copy to input buffer, compress when full */ + do { + unsigned have, copy; + + if (state->strm.avail_in == 0) + state->strm.next_in = state->in; + have = (unsigned)((state->strm.next_in + state->strm.avail_in) - + state->in); + copy = state->size - have; + if (copy > len) + copy = len; + memcpy(state->in + have, buf, copy); + state->strm.avail_in += copy; + state->x.pos += copy; + buf = (const char *)buf + copy; + len -= copy; + if (len && gz_comp(state, Z_NO_FLUSH) == -1) + return 0; + } while (len); + } + else { + /* consume whatever's left in the input buffer */ + if (state->strm.avail_in && gz_comp(state, Z_NO_FLUSH) == -1) + return 0; + + /* directly compress user buffer to file */ + state->strm.next_in = (z_const Bytef *)buf; + do { + unsigned n = (unsigned)-1; + if (n > len) + n = len; + state->strm.avail_in = n; + state->x.pos += n; + if (gz_comp(state, Z_NO_FLUSH) == -1) + return 0; + len -= n; + } while (len); + } + + /* input was all buffered or compressed */ + return put; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzwrite(file, buf, len) + gzFile file; + voidpc buf; + unsigned len; +{ + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return 0; + state = (gz_statep)file; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return 0; + + /* since an int is returned, make sure len fits in one, otherwise return + with an error (this avoids a flaw in the interface) */ + if ((int)len < 0) { + gz_error(state, Z_DATA_ERROR, "requested length does not fit in int"); + return 0; + } + + /* write len bytes from buf (the return value will fit in an int) */ + return (int)gz_write(state, buf, len); +} + +/* -- see zlib.h -- */ +z_size_t ZEXPORT gzfwrite(buf, size, nitems, file) + voidpc buf; + z_size_t size; + z_size_t nitems; + gzFile file; +{ + z_size_t len; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return 0; + state = (gz_statep)file; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return 0; + + /* compute bytes to read -- error on overflow */ + len = nitems * size; + if (size && len / size != nitems) { + gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t"); + return 0; + } + + /* write len bytes to buf, return the number of full items written */ + return len ? gz_write(state, buf, len) / size : 0; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzputc(file, c) + gzFile file; + int c; +{ + unsigned have; + unsigned char buf[1]; + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return -1; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return -1; + } + + /* try writing to input buffer for speed (state->size == 0 if buffer not + initialized) */ + if (state->size) { + if (strm->avail_in == 0) + strm->next_in = state->in; + have = (unsigned)((strm->next_in + strm->avail_in) - state->in); + if (have < state->size) { + state->in[have] = (unsigned char)c; + strm->avail_in++; + state->x.pos++; + return c & 0xff; + } + } + + /* no room in buffer or not initialized, use gz_write() */ + buf[0] = (unsigned char)c; + if (gz_write(state, buf, 1) != 1) + return -1; + return c & 0xff; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzputs(file, str) + gzFile file; + const char *str; +{ + int ret; + z_size_t len; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return -1; + + /* write string */ + len = strlen(str); + ret = gz_write(state, str, len); + return ret == 0 && len != 0 ? -1 : ret; +} + +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +#include + +/* -- see zlib.h -- */ +int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va) +{ + int len; + unsigned left; + char *next; + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return Z_STREAM_ERROR; + + /* make sure we have some buffer space */ + if (state->size == 0 && gz_init(state) == -1) + return state->err; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return state->err; + } + + /* do the printf() into the input buffer, put length in len -- the input + buffer is double-sized just for this function, so there is guaranteed to + be state->size bytes available after the current contents */ + if (strm->avail_in == 0) + strm->next_in = state->in; + next = (char *)(state->in + (strm->next_in - state->in) + strm->avail_in); + next[state->size - 1] = 0; +#ifdef NO_vsnprintf +# ifdef HAS_vsprintf_void + (void)vsprintf(next, format, va); + for (len = 0; len < state->size; len++) + if (next[len] == 0) break; +# else + len = vsprintf(next, format, va); +# endif +#else +# ifdef HAS_vsnprintf_void + (void)vsnprintf(next, state->size, format, va); + len = strlen(next); +# else + len = vsnprintf(next, state->size, format, va); +# endif +#endif + + /* check that printf() results fit in buffer */ + if (len == 0 || (unsigned)len >= state->size || next[state->size - 1] != 0) + return 0; + + /* update buffer and position, compress first half if past that */ + strm->avail_in += (unsigned)len; + state->x.pos += len; + if (strm->avail_in >= state->size) { + left = strm->avail_in - state->size; + strm->avail_in = state->size; + if (gz_comp(state, Z_NO_FLUSH) == -1) + return state->err; + memcpy(state->in, state->in + state->size, left); + strm->next_in = state->in; + strm->avail_in = left; + } + return len; +} + +int ZEXPORTVA gzprintf(gzFile file, const char *format, ...) +{ + va_list va; + int ret; + + va_start(va, format); + ret = gzvprintf(file, format, va); + va_end(va); + return ret; +} + +#else /* !STDC && !Z_HAVE_STDARG_H */ + +/* -- see zlib.h -- */ +int ZEXPORTVA gzprintf (file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, + a11, a12, a13, a14, a15, a16, a17, a18, a19, a20) + gzFile file; + const char *format; + int a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, + a11, a12, a13, a14, a15, a16, a17, a18, a19, a20; +{ + unsigned len, left; + char *next; + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that can really pass pointer in ints */ + if (sizeof(int) != sizeof(void *)) + return Z_STREAM_ERROR; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return Z_STREAM_ERROR; + + /* make sure we have some buffer space */ + if (state->size == 0 && gz_init(state) == -1) + return state->error; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return state->error; + } + + /* do the printf() into the input buffer, put length in len -- the input + buffer is double-sized just for this function, so there is guaranteed to + be state->size bytes available after the current contents */ + if (strm->avail_in == 0) + strm->next_in = state->in; + next = (char *)(strm->next_in + strm->avail_in); + next[state->size - 1] = 0; +#ifdef NO_snprintf +# ifdef HAS_sprintf_void + sprintf(next, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, + a13, a14, a15, a16, a17, a18, a19, a20); + for (len = 0; len < size; len++) + if (next[len] == 0) + break; +# else + len = sprintf(next, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, + a12, a13, a14, a15, a16, a17, a18, a19, a20); +# endif +#else +# ifdef HAS_snprintf_void + snprintf(next, state->size, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, + a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); + len = strlen(next); +# else + len = snprintf(next, state->size, format, a1, a2, a3, a4, a5, a6, a7, a8, + a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); +# endif +#endif + + /* check that printf() results fit in buffer */ + if (len == 0 || len >= state->size || next[state->size - 1] != 0) + return 0; + + /* update buffer and position, compress first half if past that */ + strm->avail_in += len; + state->x.pos += len; + if (strm->avail_in >= state->size) { + left = strm->avail_in - state->size; + strm->avail_in = state->size; + if (gz_comp(state, Z_NO_FLUSH) == -1) + return state->err; + memcpy(state->in, state->in + state->size, left); + strm->next_in = state->in; + strm->avail_in = left; + } + return (int)len; +} + +#endif + +/* -- see zlib.h -- */ +int ZEXPORT gzflush(file, flush) + gzFile file; + int flush; +{ + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return Z_STREAM_ERROR; + + /* check flush parameter */ + if (flush < 0 || flush > Z_FINISH) + return Z_STREAM_ERROR; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return state->err; + } + + /* compress remaining data with requested flush */ + (void)gz_comp(state, flush); + return state->err; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzsetparams(file, level, strategy) + gzFile file; + int level; + int strategy; +{ + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return Z_STREAM_ERROR; + + /* if no change is requested, then do nothing */ + if (level == state->level && strategy == state->strategy) + return Z_OK; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return state->err; + } + + /* change compression parameters for subsequent input */ + if (state->size) { + /* flush previous input with previous parameters before changing */ + if (strm->avail_in && gz_comp(state, Z_BLOCK) == -1) + return state->err; + deflateParams(strm, level, strategy); + } + state->level = level; + state->strategy = strategy; + return Z_OK; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzclose_w(file) + gzFile file; +{ + int ret = Z_OK; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + + /* check that we're writing */ + if (state->mode != GZ_WRITE) + return Z_STREAM_ERROR; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + ret = state->err; + } + + /* flush, free memory, and close file */ + if (gz_comp(state, Z_FINISH) == -1) + ret = state->err; + if (state->size) { + if (!state->direct) { + (void)deflateEnd(&(state->strm)); + free(state->out); + } + free(state->in); + } + gz_error(state, Z_OK, NULL); + free(state->path); + if (close(state->fd) == -1) + ret = Z_ERRNO; + free(state); + return ret; +} diff --git a/deps/SZ/zlib/infback.c b/deps/SZ/zlib/infback.c new file mode 100644 index 0000000000000000000000000000000000000000..59679ecbfc5d778ca85d9ced87565f69bcb4635c --- /dev/null +++ b/deps/SZ/zlib/infback.c @@ -0,0 +1,640 @@ +/* infback.c -- inflate using a call-back interface + * Copyright (C) 1995-2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + This code is largely copied from inflate.c. Normally either infback.o or + inflate.o would be linked into an application--not both. The interface + with inffast.c is retained so that optimized assembler-coded versions of + inflate_fast() can be used with either inflate.c or infback.c. + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +/* function prototypes */ +local void fixedtables OF((struct inflate_state FAR *state)); + +/* + strm provides memory allocation functions in zalloc and zfree, or + Z_NULL to use the library memory allocation functions. + + windowBits is in the range 8..15, and window is a user-supplied + window and output buffer that is 2**windowBits bytes. + */ +int ZEXPORT inflateBackInit_(strm, windowBits, window, version, stream_size) +z_streamp strm; +int windowBits; +unsigned char FAR *window; +const char *version; +int stream_size; +{ + struct inflate_state FAR *state; + + if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || + stream_size != (int)(sizeof(z_stream))) + return Z_VERSION_ERROR; + if (strm == Z_NULL || window == Z_NULL || + windowBits < 8 || windowBits > 15) + return Z_STREAM_ERROR; + strm->msg = Z_NULL; /* in case we return an error */ + if (strm->zalloc == (alloc_func)0) { +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; +#endif + } + if (strm->zfree == (free_func)0) +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zfree = zcfree; +#endif + state = (struct inflate_state FAR *)ZALLOC(strm, 1, + sizeof(struct inflate_state)); + if (state == Z_NULL) return Z_MEM_ERROR; + Tracev((stderr, "inflate: allocated\n")); + strm->state = (struct internal_state FAR *)state; + state->dmax = 32768U; + state->wbits = (uInt)windowBits; + state->wsize = 1U << windowBits; + state->window = window; + state->wnext = 0; + state->whave = 0; + return Z_OK; +} + +/* + Return state with length and distance decoding tables and index sizes set to + fixed code decoding. Normally this returns fixed tables from inffixed.h. + If BUILDFIXED is defined, then instead this routine builds the tables the + first time it's called, and returns those tables the first time and + thereafter. This reduces the size of the code by about 2K bytes, in + exchange for a little execution time. However, BUILDFIXED should not be + used for threaded applications, since the rewriting of the tables and virgin + may not be thread-safe. + */ +local void fixedtables(state) +struct inflate_state FAR *state; +{ +#ifdef BUILDFIXED + static int virgin = 1; + static code *lenfix, *distfix; + static code fixed[544]; + + /* build fixed huffman tables if first call (may not be thread safe) */ + if (virgin) { + unsigned sym, bits; + static code *next; + + /* literal/length table */ + sym = 0; + while (sym < 144) state->lens[sym++] = 8; + while (sym < 256) state->lens[sym++] = 9; + while (sym < 280) state->lens[sym++] = 7; + while (sym < 288) state->lens[sym++] = 8; + next = fixed; + lenfix = next; + bits = 9; + inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work); + + /* distance table */ + sym = 0; + while (sym < 32) state->lens[sym++] = 5; + distfix = next; + bits = 5; + inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work); + + /* do this just once */ + virgin = 0; + } +#else /* !BUILDFIXED */ +# include "inffixed.h" +#endif /* BUILDFIXED */ + state->lencode = lenfix; + state->lenbits = 9; + state->distcode = distfix; + state->distbits = 5; +} + +/* Macros for inflateBack(): */ + +/* Load returned state from inflate_fast() */ +#define LOAD() \ + do { \ + put = strm->next_out; \ + left = strm->avail_out; \ + next = strm->next_in; \ + have = strm->avail_in; \ + hold = state->hold; \ + bits = state->bits; \ + } while (0) + +/* Set state from registers for inflate_fast() */ +#define RESTORE() \ + do { \ + strm->next_out = put; \ + strm->avail_out = left; \ + strm->next_in = next; \ + strm->avail_in = have; \ + state->hold = hold; \ + state->bits = bits; \ + } while (0) + +/* Clear the input bit accumulator */ +#define INITBITS() \ + do { \ + hold = 0; \ + bits = 0; \ + } while (0) + +/* Assure that some input is available. If input is requested, but denied, + then return a Z_BUF_ERROR from inflateBack(). */ +#define PULL() \ + do { \ + if (have == 0) { \ + have = in(in_desc, &next); \ + if (have == 0) { \ + next = Z_NULL; \ + ret = Z_BUF_ERROR; \ + goto inf_leave; \ + } \ + } \ + } while (0) + +/* Get a byte of input into the bit accumulator, or return from inflateBack() + with an error if there is no input available. */ +#define PULLBYTE() \ + do { \ + PULL(); \ + have--; \ + hold += (unsigned long)(*next++) << bits; \ + bits += 8; \ + } while (0) + +/* Assure that there are at least n bits in the bit accumulator. If there is + not enough available input to do that, then return from inflateBack() with + an error. */ +#define NEEDBITS(n) \ + do { \ + while (bits < (unsigned)(n)) \ + PULLBYTE(); \ + } while (0) + +/* Return the low n bits of the bit accumulator (n < 16) */ +#define BITS(n) \ + ((unsigned)hold & ((1U << (n)) - 1)) + +/* Remove n bits from the bit accumulator */ +#define DROPBITS(n) \ + do { \ + hold >>= (n); \ + bits -= (unsigned)(n); \ + } while (0) + +/* Remove zero to seven bits as needed to go to a byte boundary */ +#define BYTEBITS() \ + do { \ + hold >>= bits & 7; \ + bits -= bits & 7; \ + } while (0) + +/* Assure that some output space is available, by writing out the window + if it's full. If the write fails, return from inflateBack() with a + Z_BUF_ERROR. */ +#define ROOM() \ + do { \ + if (left == 0) { \ + put = state->window; \ + left = state->wsize; \ + state->whave = left; \ + if (out(out_desc, put, left)) { \ + ret = Z_BUF_ERROR; \ + goto inf_leave; \ + } \ + } \ + } while (0) + +/* + strm provides the memory allocation functions and window buffer on input, + and provides information on the unused input on return. For Z_DATA_ERROR + returns, strm will also provide an error message. + + in() and out() are the call-back input and output functions. When + inflateBack() needs more input, it calls in(). When inflateBack() has + filled the window with output, or when it completes with data in the + window, it calls out() to write out the data. The application must not + change the provided input until in() is called again or inflateBack() + returns. The application must not change the window/output buffer until + inflateBack() returns. + + in() and out() are called with a descriptor parameter provided in the + inflateBack() call. This parameter can be a structure that provides the + information required to do the read or write, as well as accumulated + information on the input and output such as totals and check values. + + in() should return zero on failure. out() should return non-zero on + failure. If either in() or out() fails, than inflateBack() returns a + Z_BUF_ERROR. strm->next_in can be checked for Z_NULL to see whether it + was in() or out() that caused in the error. Otherwise, inflateBack() + returns Z_STREAM_END on success, Z_DATA_ERROR for an deflate format + error, or Z_MEM_ERROR if it could not allocate memory for the state. + inflateBack() can also return Z_STREAM_ERROR if the input parameters + are not correct, i.e. strm is Z_NULL or the state was not initialized. + */ +int ZEXPORT inflateBack(strm, in, in_desc, out, out_desc) +z_streamp strm; +in_func in; +void FAR *in_desc; +out_func out; +void FAR *out_desc; +{ + struct inflate_state FAR *state; + z_const unsigned char FAR *next; /* next input */ + unsigned char FAR *put; /* next output */ + unsigned have, left; /* available input and output */ + unsigned long hold; /* bit buffer */ + unsigned bits; /* bits in bit buffer */ + unsigned copy; /* number of stored or match bytes to copy */ + unsigned char FAR *from; /* where to copy match bytes from */ + code here; /* current decoding table entry */ + code last; /* parent table entry */ + unsigned len; /* length to copy for repeats, bits to drop */ + int ret; /* return code */ + static const unsigned short order[19] = /* permutation of code lengths */ + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + + /* Check that the strm exists and that the state was initialized */ + if (strm == Z_NULL || strm->state == Z_NULL) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* Reset the state */ + strm->msg = Z_NULL; + state->mode = TYPE; + state->last = 0; + state->whave = 0; + next = strm->next_in; + have = next != Z_NULL ? strm->avail_in : 0; + hold = 0; + bits = 0; + put = state->window; + left = state->wsize; + + /* Inflate until end of block marked as last */ + for (;;) + switch (state->mode) { + case TYPE: + /* determine and dispatch block type */ + if (state->last) { + BYTEBITS(); + state->mode = DONE; + break; + } + NEEDBITS(3); + state->last = BITS(1); + DROPBITS(1); + switch (BITS(2)) { + case 0: /* stored block */ + Tracev((stderr, "inflate: stored block%s\n", + state->last ? " (last)" : "")); + state->mode = STORED; + break; + case 1: /* fixed block */ + fixedtables(state); + Tracev((stderr, "inflate: fixed codes block%s\n", + state->last ? " (last)" : "")); + state->mode = LEN; /* decode codes */ + break; + case 2: /* dynamic block */ + Tracev((stderr, "inflate: dynamic codes block%s\n", + state->last ? " (last)" : "")); + state->mode = TABLE; + break; + case 3: + strm->msg = (char *)"invalid block type"; + state->mode = BAD; + } + DROPBITS(2); + break; + + case STORED: + /* get and verify stored block length */ + BYTEBITS(); /* go to byte boundary */ + NEEDBITS(32); + if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { + strm->msg = (char *)"invalid stored block lengths"; + state->mode = BAD; + break; + } + state->length = (unsigned)hold & 0xffff; + Tracev((stderr, "inflate: stored length %u\n", + state->length)); + INITBITS(); + + /* copy stored block from input to output */ + while (state->length != 0) { + copy = state->length; + PULL(); + ROOM(); + if (copy > have) copy = have; + if (copy > left) copy = left; + zmemcpy(put, next, copy); + have -= copy; + next += copy; + left -= copy; + put += copy; + state->length -= copy; + } + Tracev((stderr, "inflate: stored end\n")); + state->mode = TYPE; + break; + + case TABLE: + /* get dynamic table entries descriptor */ + NEEDBITS(14); + state->nlen = BITS(5) + 257; + DROPBITS(5); + state->ndist = BITS(5) + 1; + DROPBITS(5); + state->ncode = BITS(4) + 4; + DROPBITS(4); +#ifndef PKZIP_BUG_WORKAROUND + if (state->nlen > 286 || state->ndist > 30) { + strm->msg = (char *)"too many length or distance symbols"; + state->mode = BAD; + break; + } +#endif + Tracev((stderr, "inflate: table sizes ok\n")); + + /* get code length code lengths (not a typo) */ + state->have = 0; + while (state->have < state->ncode) { + NEEDBITS(3); + state->lens[order[state->have++]] = (unsigned short)BITS(3); + DROPBITS(3); + } + while (state->have < 19) + state->lens[order[state->have++]] = 0; + state->next = state->codes; + state->lencode = (code const FAR *)(state->next); + state->lenbits = 7; + ret = inflate_table(CODES, state->lens, 19, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid code lengths set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: code lengths ok\n")); + + /* get length and distance code code lengths */ + state->have = 0; + while (state->have < state->nlen + state->ndist) { + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.val < 16) { + DROPBITS(here.bits); + state->lens[state->have++] = here.val; + } + else { + if (here.val == 16) { + NEEDBITS(here.bits + 2); + DROPBITS(here.bits); + if (state->have == 0) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + len = (unsigned)(state->lens[state->have - 1]); + copy = 3 + BITS(2); + DROPBITS(2); + } + else if (here.val == 17) { + NEEDBITS(here.bits + 3); + DROPBITS(here.bits); + len = 0; + copy = 3 + BITS(3); + DROPBITS(3); + } + else { + NEEDBITS(here.bits + 7); + DROPBITS(here.bits); + len = 0; + copy = 11 + BITS(7); + DROPBITS(7); + } + if (state->have + copy > state->nlen + state->ndist) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + while (copy--) + state->lens[state->have++] = (unsigned short)len; + } + } + + /* handle error breaks in while */ + if (state->mode == BAD) break; + + /* check for end-of-block code (better have one) */ + if (state->lens[256] == 0) { + strm->msg = (char *)"invalid code -- missing end-of-block"; + state->mode = BAD; + break; + } + + /* build code tables -- note: do not change the lenbits or distbits + values here (9 and 6) without reading the comments in inftrees.h + concerning the ENOUGH constants, which depend on those values */ + state->next = state->codes; + state->lencode = (code const FAR *)(state->next); + state->lenbits = 9; + ret = inflate_table(LENS, state->lens, state->nlen, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid literal/lengths set"; + state->mode = BAD; + break; + } + state->distcode = (code const FAR *)(state->next); + state->distbits = 6; + ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist, + &(state->next), &(state->distbits), state->work); + if (ret) { + strm->msg = (char *)"invalid distances set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: codes ok\n")); + state->mode = LEN; + + case LEN: + /* use inflate_fast() if we have enough input and output */ + if (have >= 6 && left >= 258) { + RESTORE(); + if (state->whave < state->wsize) + state->whave = state->wsize - left; + inflate_fast(strm, state->wsize); + LOAD(); + break; + } + + /* get a literal, length, or end-of-block code */ + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.op && (here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->lencode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(here.bits); + state->length = (unsigned)here.val; + + /* process literal */ + if (here.op == 0) { + Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", here.val)); + ROOM(); + *put++ = (unsigned char)(state->length); + left--; + state->mode = LEN; + break; + } + + /* process end of block */ + if (here.op & 32) { + Tracevv((stderr, "inflate: end of block\n")); + state->mode = TYPE; + break; + } + + /* invalid code */ + if (here.op & 64) { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + + /* length code -- get extra bits, if any */ + state->extra = (unsigned)(here.op) & 15; + if (state->extra != 0) { + NEEDBITS(state->extra); + state->length += BITS(state->extra); + DROPBITS(state->extra); + } + Tracevv((stderr, "inflate: length %u\n", state->length)); + + /* get distance code */ + for (;;) { + here = state->distcode[BITS(state->distbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if ((here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->distcode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(here.bits); + if (here.op & 64) { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + state->offset = (unsigned)here.val; + + /* get distance extra bits, if any */ + state->extra = (unsigned)(here.op) & 15; + if (state->extra != 0) { + NEEDBITS(state->extra); + state->offset += BITS(state->extra); + DROPBITS(state->extra); + } + if (state->offset > state->wsize - (state->whave < state->wsize ? + left : 0)) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } + Tracevv((stderr, "inflate: distance %u\n", state->offset)); + + /* copy match from window to output */ + do { + ROOM(); + copy = state->wsize - state->offset; + if (copy < left) { + from = put + copy; + copy = left - copy; + } + else { + from = put - state->offset; + copy = left; + } + if (copy > state->length) copy = state->length; + state->length -= copy; + left -= copy; + do { + *put++ = *from++; + } while (--copy); + } while (state->length != 0); + break; + + case DONE: + /* inflate stream terminated properly -- write leftover output */ + ret = Z_STREAM_END; + if (left < state->wsize) { + if (out(out_desc, state->window, state->wsize - left)) + ret = Z_BUF_ERROR; + } + goto inf_leave; + + case BAD: + ret = Z_DATA_ERROR; + goto inf_leave; + + default: /* can't happen, but makes compilers happy */ + ret = Z_STREAM_ERROR; + goto inf_leave; + } + + /* Return unused input */ + inf_leave: + strm->next_in = next; + strm->avail_in = have; + return ret; +} + +int ZEXPORT inflateBackEnd(strm) +z_streamp strm; +{ + if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0) + return Z_STREAM_ERROR; + ZFREE(strm, strm->state); + strm->state = Z_NULL; + Tracev((stderr, "inflate: end\n")); + return Z_OK; +} diff --git a/deps/SZ/zlib/inffast.c b/deps/SZ/zlib/inffast.c new file mode 100644 index 0000000000000000000000000000000000000000..0dbd1dbc09f2f69425405863bfe1080e3ca2b3f5 --- /dev/null +++ b/deps/SZ/zlib/inffast.c @@ -0,0 +1,323 @@ +/* inffast.c -- fast decoding + * Copyright (C) 1995-2017 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +#ifdef ASMINF +# pragma message("Assembler code may have bugs -- use at your own risk") +#else + +/* + Decode literal, length, and distance codes and write out the resulting + literal and match bytes until either not enough input or output is + available, an end-of-block is encountered, or a data error is encountered. + When large enough input and output buffers are supplied to inflate(), for + example, a 16K input buffer and a 64K output buffer, more than 95% of the + inflate execution time is spent in this routine. + + Entry assumptions: + + state->mode == LEN + strm->avail_in >= 6 + strm->avail_out >= 258 + start >= strm->avail_out + state->bits < 8 + + On return, state->mode is one of: + + LEN -- ran out of enough output space or enough available input + TYPE -- reached end of block code, inflate() to interpret next block + BAD -- error in block data + + Notes: + + - The maximum input bits used by a length/distance pair is 15 bits for the + length code, 5 bits for the length extra, 15 bits for the distance code, + and 13 bits for the distance extra. This totals 48 bits, or six bytes. + Therefore if strm->avail_in >= 6, then there is enough input to avoid + checking for available input while decoding. + + - The maximum bytes that a single length/distance pair can output is 258 + bytes, which is the maximum length that can be coded. inflate_fast() + requires strm->avail_out >= 258 for each loop to avoid checking for + output space. + */ +void ZLIB_INTERNAL inflate_fast(strm, start) +z_streamp strm; +unsigned start; /* inflate()'s starting value for strm->avail_out */ +{ + struct inflate_state FAR *state; + z_const unsigned char FAR *in; /* local strm->next_in */ + z_const unsigned char FAR *last; /* have enough input while in < last */ + unsigned char FAR *out; /* local strm->next_out */ + unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ + unsigned char FAR *end; /* while out < end, enough space available */ +#ifdef INFLATE_STRICT + unsigned dmax; /* maximum distance from zlib header */ +#endif + unsigned wsize; /* window size or zero if not using window */ + unsigned whave; /* valid bytes in the window */ + unsigned wnext; /* window write index */ + unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ + unsigned long hold; /* local strm->hold */ + unsigned bits; /* local strm->bits */ + code const FAR *lcode; /* local strm->lencode */ + code const FAR *dcode; /* local strm->distcode */ + unsigned lmask; /* mask for first level of length codes */ + unsigned dmask; /* mask for first level of distance codes */ + code here; /* retrieved table entry */ + unsigned op; /* code bits, operation, extra bits, or */ + /* window position, window bytes to copy */ + unsigned len; /* match length, unused bytes */ + unsigned dist; /* match distance */ + unsigned char FAR *from; /* where to copy match from */ + + /* copy state to local variables */ + state = (struct inflate_state FAR *)strm->state; + in = strm->next_in; + last = in + (strm->avail_in - 5); + out = strm->next_out; + beg = out - (start - strm->avail_out); + end = out + (strm->avail_out - 257); +#ifdef INFLATE_STRICT + dmax = state->dmax; +#endif + wsize = state->wsize; + whave = state->whave; + wnext = state->wnext; + window = state->window; + hold = state->hold; + bits = state->bits; + lcode = state->lencode; + dcode = state->distcode; + lmask = (1U << state->lenbits) - 1; + dmask = (1U << state->distbits) - 1; + + /* decode literals and length/distances until end-of-block or not enough + input data or output space */ + do { + if (bits < 15) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + here = lcode[hold & lmask]; + dolen: + op = (unsigned)(here.bits); + hold >>= op; + bits -= op; + op = (unsigned)(here.op); + if (op == 0) { /* literal */ + Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", here.val)); + *out++ = (unsigned char)(here.val); + } + else if (op & 16) { /* length base */ + len = (unsigned)(here.val); + op &= 15; /* number of extra bits */ + if (op) { + if (bits < op) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + len += (unsigned)hold & ((1U << op) - 1); + hold >>= op; + bits -= op; + } + Tracevv((stderr, "inflate: length %u\n", len)); + if (bits < 15) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + here = dcode[hold & dmask]; + dodist: + op = (unsigned)(here.bits); + hold >>= op; + bits -= op; + op = (unsigned)(here.op); + if (op & 16) { /* distance base */ + dist = (unsigned)(here.val); + op &= 15; /* number of extra bits */ + if (bits < op) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + if (bits < op) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + } + dist += (unsigned)hold & ((1U << op) - 1); +#ifdef INFLATE_STRICT + if (dist > dmax) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#endif + hold >>= op; + bits -= op; + Tracevv((stderr, "inflate: distance %u\n", dist)); + op = (unsigned)(out - beg); /* max distance in output */ + if (dist > op) { /* see if copy from window */ + op = dist - op; /* distance back in window */ + if (op > whave) { + if (state->sane) { + strm->msg = + (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + if (len <= op - whave) { + do { + *out++ = 0; + } while (--len); + continue; + } + len -= op - whave; + do { + *out++ = 0; + } while (--op > whave); + if (op == 0) { + from = out - dist; + do { + *out++ = *from++; + } while (--len); + continue; + } +#endif + } + from = window; + if (wnext == 0) { /* very common case */ + from += wsize - op; + if (op < len) { /* some from window */ + len -= op; + do { + *out++ = *from++; + } while (--op); + from = out - dist; /* rest from output */ + } + } + else if (wnext < op) { /* wrap around window */ + from += wsize + wnext - op; + op -= wnext; + if (op < len) { /* some from end of window */ + len -= op; + do { + *out++ = *from++; + } while (--op); + from = window; + if (wnext < len) { /* some from start of window */ + op = wnext; + len -= op; + do { + *out++ = *from++; + } while (--op); + from = out - dist; /* rest from output */ + } + } + } + else { /* contiguous in window */ + from += wnext - op; + if (op < len) { /* some from window */ + len -= op; + do { + *out++ = *from++; + } while (--op); + from = out - dist; /* rest from output */ + } + } + while (len > 2) { + *out++ = *from++; + *out++ = *from++; + *out++ = *from++; + len -= 3; + } + if (len) { + *out++ = *from++; + if (len > 1) + *out++ = *from++; + } + } + else { + from = out - dist; /* copy direct from output */ + do { /* minimum length is three */ + *out++ = *from++; + *out++ = *from++; + *out++ = *from++; + len -= 3; + } while (len > 2); + if (len) { + *out++ = *from++; + if (len > 1) + *out++ = *from++; + } + } + } + else if ((op & 64) == 0) { /* 2nd level distance code */ + here = dcode[here.val + (hold & ((1U << op) - 1))]; + goto dodist; + } + else { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + } + else if ((op & 64) == 0) { /* 2nd level length code */ + here = lcode[here.val + (hold & ((1U << op) - 1))]; + goto dolen; + } + else if (op & 32) { /* end-of-block */ + Tracevv((stderr, "inflate: end of block\n")); + state->mode = TYPE; + break; + } + else { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + } while (in < last && out < end); + + /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ + len = bits >> 3; + in -= len; + bits -= len << 3; + hold &= (1U << bits) - 1; + + /* update state and return */ + strm->next_in = in; + strm->next_out = out; + strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); + strm->avail_out = (unsigned)(out < end ? + 257 + (end - out) : 257 - (out - end)); + state->hold = hold; + state->bits = bits; + return; +} + +/* + inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): + - Using bit fields for code structure + - Different op definition to avoid & for extra bits (do & for table bits) + - Three separate decoding do-loops for direct, window, and wnext == 0 + - Special case for distance > 1 copies to do overlapped load and store copy + - Explicit branch predictions (based on measured branch probabilities) + - Deferring match copy and interspersed it with decoding subsequent codes + - Swapping literal/length else + - Swapping window/direct else + - Larger unrolled copy loops (three is about right) + - Moving len -= 3 statement into middle of loop + */ + +#endif /* !ASMINF */ diff --git a/deps/SZ/zlib/inffast.h b/deps/SZ/zlib/inffast.h new file mode 100644 index 0000000000000000000000000000000000000000..e5c1aa4ca8cd5244423680865609c71ab68f9ab6 --- /dev/null +++ b/deps/SZ/zlib/inffast.h @@ -0,0 +1,11 @@ +/* inffast.h -- header to use inffast.c + * Copyright (C) 1995-2003, 2010 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +void ZLIB_INTERNAL inflate_fast OF((z_streamp strm, unsigned start)); diff --git a/deps/SZ/zlib/inffixed.h b/deps/SZ/zlib/inffixed.h new file mode 100644 index 0000000000000000000000000000000000000000..d6283277694802ce7938f537f12990d6eead4924 --- /dev/null +++ b/deps/SZ/zlib/inffixed.h @@ -0,0 +1,94 @@ + /* inffixed.h -- table for decoding fixed codes + * Generated automatically by makefixed(). + */ + + /* WARNING: this file should *not* be used by applications. + It is part of the implementation of this library and is + subject to change. Applications should only use zlib.h. + */ + + static const code lenfix[512] = { + {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48}, + {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128}, + {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59}, + {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176}, + {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20}, + {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100}, + {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8}, + {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216}, + {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76}, + {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114}, + {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2}, + {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148}, + {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42}, + {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86}, + {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15}, + {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236}, + {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62}, + {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142}, + {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31}, + {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162}, + {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25}, + {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105}, + {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4}, + {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202}, + {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69}, + {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125}, + {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13}, + {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195}, + {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35}, + {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91}, + {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19}, + {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246}, + {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55}, + {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135}, + {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99}, + {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190}, + {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16}, + {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96}, + {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6}, + {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209}, + {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72}, + {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116}, + {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4}, + {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153}, + {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44}, + {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82}, + {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11}, + {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229}, + {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58}, + {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138}, + {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51}, + {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173}, + {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30}, + {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110}, + {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0}, + {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195}, + {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65}, + {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121}, + {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9}, + {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258}, + {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37}, + {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93}, + {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23}, + {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251}, + {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51}, + {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131}, + {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67}, + {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183}, + {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23}, + {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103}, + {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9}, + {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223}, + {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79}, + {0,9,255} + }; + + static const code distfix[32] = { + {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025}, + {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193}, + {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385}, + {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577}, + {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073}, + {22,5,193},{64,5,0} + }; diff --git a/deps/SZ/zlib/inflate.c b/deps/SZ/zlib/inflate.c new file mode 100644 index 0000000000000000000000000000000000000000..ac333e8c2edae90ec1145d06d9852002dd5d0617 --- /dev/null +++ b/deps/SZ/zlib/inflate.c @@ -0,0 +1,1561 @@ +/* inflate.c -- zlib decompression + * Copyright (C) 1995-2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * Change history: + * + * 1.2.beta0 24 Nov 2002 + * - First version -- complete rewrite of inflate to simplify code, avoid + * creation of window when not needed, minimize use of window when it is + * needed, make inffast.c even faster, implement gzip decoding, and to + * improve code readability and style over the previous zlib inflate code + * + * 1.2.beta1 25 Nov 2002 + * - Use pointers for available input and output checking in inffast.c + * - Remove input and output counters in inffast.c + * - Change inffast.c entry and loop from avail_in >= 7 to >= 6 + * - Remove unnecessary second byte pull from length extra in inffast.c + * - Unroll direct copy to three copies per loop in inffast.c + * + * 1.2.beta2 4 Dec 2002 + * - Change external routine names to reduce potential conflicts + * - Correct filename to inffixed.h for fixed tables in inflate.c + * - Make hbuf[] unsigned char to match parameter type in inflate.c + * - Change strm->next_out[-state->offset] to *(strm->next_out - state->offset) + * to avoid negation problem on Alphas (64 bit) in inflate.c + * + * 1.2.beta3 22 Dec 2002 + * - Add comments on state->bits assertion in inffast.c + * - Add comments on op field in inftrees.h + * - Fix bug in reuse of allocated window after inflateReset() + * - Remove bit fields--back to byte structure for speed + * - Remove distance extra == 0 check in inflate_fast()--only helps for lengths + * - Change post-increments to pre-increments in inflate_fast(), PPC biased? + * - Add compile time option, POSTINC, to use post-increments instead (Intel?) + * - Make MATCH copy in inflate() much faster for when inflate_fast() not used + * - Use local copies of stream next and avail values, as well as local bit + * buffer and bit count in inflate()--for speed when inflate_fast() not used + * + * 1.2.beta4 1 Jan 2003 + * - Split ptr - 257 statements in inflate_table() to avoid compiler warnings + * - Move a comment on output buffer sizes from inffast.c to inflate.c + * - Add comments in inffast.c to introduce the inflate_fast() routine + * - Rearrange window copies in inflate_fast() for speed and simplification + * - Unroll last copy for window match in inflate_fast() + * - Use local copies of window variables in inflate_fast() for speed + * - Pull out common wnext == 0 case for speed in inflate_fast() + * - Make op and len in inflate_fast() unsigned for consistency + * - Add FAR to lcode and dcode declarations in inflate_fast() + * - Simplified bad distance check in inflate_fast() + * - Added inflateBackInit(), inflateBack(), and inflateBackEnd() in new + * source file infback.c to provide a call-back interface to inflate for + * programs like gzip and unzip -- uses window as output buffer to avoid + * window copying + * + * 1.2.beta5 1 Jan 2003 + * - Improved inflateBack() interface to allow the caller to provide initial + * input in strm. + * - Fixed stored blocks bug in inflateBack() + * + * 1.2.beta6 4 Jan 2003 + * - Added comments in inffast.c on effectiveness of POSTINC + * - Typecasting all around to reduce compiler warnings + * - Changed loops from while (1) or do {} while (1) to for (;;), again to + * make compilers happy + * - Changed type of window in inflateBackInit() to unsigned char * + * + * 1.2.beta7 27 Jan 2003 + * - Changed many types to unsigned or unsigned short to avoid warnings + * - Added inflateCopy() function + * + * 1.2.0 9 Mar 2003 + * - Changed inflateBack() interface to provide separate opaque descriptors + * for the in() and out() functions + * - Changed inflateBack() argument and in_func typedef to swap the length + * and buffer address return values for the input function + * - Check next_in and next_out for Z_NULL on entry to inflate() + * + * The history for versions after 1.2.0 are in ChangeLog in zlib distribution. + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +#ifdef MAKEFIXED +# ifndef BUILDFIXED +# define BUILDFIXED +# endif +#endif + +/* function prototypes */ +local int inflateStateCheck OF((z_streamp strm)); +local void fixedtables OF((struct inflate_state FAR *state)); +local int updatewindow OF((z_streamp strm, const unsigned char FAR *end, + unsigned copy)); +#ifdef BUILDFIXED + void makefixed OF((void)); +#endif +local unsigned syncsearch OF((unsigned FAR *have, const unsigned char FAR *buf, + unsigned len)); + +local int inflateStateCheck(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + if (strm == Z_NULL || + strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) + return 1; + state = (struct inflate_state FAR *)strm->state; + if (state == Z_NULL || state->strm != strm || + state->mode < HEAD || state->mode > SYNC) + return 1; + return 0; +} + +int ZEXPORT inflateResetKeep(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + strm->total_in = strm->total_out = state->total = 0; + strm->msg = Z_NULL; + if (state->wrap) /* to support ill-conceived Java test suite */ + strm->adler = state->wrap & 1; + state->mode = HEAD; + state->last = 0; + state->havedict = 0; + state->dmax = 32768U; + state->head = Z_NULL; + state->hold = 0; + state->bits = 0; + state->lencode = state->distcode = state->next = state->codes; + state->sane = 1; + state->back = -1; + Tracev((stderr, "inflate: reset\n")); + return Z_OK; +} + +int ZEXPORT inflateReset(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + state->wsize = 0; + state->whave = 0; + state->wnext = 0; + return inflateResetKeep(strm); +} + +int ZEXPORT inflateReset2(strm, windowBits) +z_streamp strm; +int windowBits; +{ + int wrap; + struct inflate_state FAR *state; + + /* get the state */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* extract wrap request from windowBits parameter */ + if (windowBits < 0) { + wrap = 0; + windowBits = -windowBits; + } + else { + wrap = (windowBits >> 4) + 5; +#ifdef GUNZIP + if (windowBits < 48) + windowBits &= 15; +#endif + } + + /* set number of window bits, free window if different */ + if (windowBits && (windowBits < 8 || windowBits > 15)) + return Z_STREAM_ERROR; + if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) { + ZFREE(strm, state->window); + state->window = Z_NULL; + } + + /* update state and reset the rest of it */ + state->wrap = wrap; + state->wbits = (unsigned)windowBits; + return inflateReset(strm); +} + +int ZEXPORT inflateInit2_(strm, windowBits, version, stream_size) +z_streamp strm; +int windowBits; +const char *version; +int stream_size; +{ + int ret; + struct inflate_state FAR *state; + + if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || + stream_size != (int)(sizeof(z_stream))) + return Z_VERSION_ERROR; + if (strm == Z_NULL) return Z_STREAM_ERROR; + strm->msg = Z_NULL; /* in case we return an error */ + if (strm->zalloc == (alloc_func)0) { +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; +#endif + } + if (strm->zfree == (free_func)0) +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zfree = zcfree; +#endif + state = (struct inflate_state FAR *) + ZALLOC(strm, 1, sizeof(struct inflate_state)); + if (state == Z_NULL) return Z_MEM_ERROR; + Tracev((stderr, "inflate: allocated\n")); + strm->state = (struct internal_state FAR *)state; + state->strm = strm; + state->window = Z_NULL; + state->mode = HEAD; /* to pass state test in inflateReset2() */ + ret = inflateReset2(strm, windowBits); + if (ret != Z_OK) { + ZFREE(strm, state); + strm->state = Z_NULL; + } + return ret; +} + +int ZEXPORT inflateInit_(strm, version, stream_size) +z_streamp strm; +const char *version; +int stream_size; +{ + return inflateInit2_(strm, DEF_WBITS, version, stream_size); +} + +int ZEXPORT inflatePrime(strm, bits, value) +z_streamp strm; +int bits; +int value; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (bits < 0) { + state->hold = 0; + state->bits = 0; + return Z_OK; + } + if (bits > 16 || state->bits + (uInt)bits > 32) return Z_STREAM_ERROR; + value &= (1L << bits) - 1; + state->hold += (unsigned)value << state->bits; + state->bits += (uInt)bits; + return Z_OK; +} + +/* + Return state with length and distance decoding tables and index sizes set to + fixed code decoding. Normally this returns fixed tables from inffixed.h. + If BUILDFIXED is defined, then instead this routine builds the tables the + first time it's called, and returns those tables the first time and + thereafter. This reduces the size of the code by about 2K bytes, in + exchange for a little execution time. However, BUILDFIXED should not be + used for threaded applications, since the rewriting of the tables and virgin + may not be thread-safe. + */ +local void fixedtables(state) +struct inflate_state FAR *state; +{ +#ifdef BUILDFIXED + static int virgin = 1; + static code *lenfix, *distfix; + static code fixed[544]; + + /* build fixed huffman tables if first call (may not be thread safe) */ + if (virgin) { + unsigned sym, bits; + static code *next; + + /* literal/length table */ + sym = 0; + while (sym < 144) state->lens[sym++] = 8; + while (sym < 256) state->lens[sym++] = 9; + while (sym < 280) state->lens[sym++] = 7; + while (sym < 288) state->lens[sym++] = 8; + next = fixed; + lenfix = next; + bits = 9; + inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work); + + /* distance table */ + sym = 0; + while (sym < 32) state->lens[sym++] = 5; + distfix = next; + bits = 5; + inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work); + + /* do this just once */ + virgin = 0; + } +#else /* !BUILDFIXED */ +# include "inffixed.h" +#endif /* BUILDFIXED */ + state->lencode = lenfix; + state->lenbits = 9; + state->distcode = distfix; + state->distbits = 5; +} + +#ifdef MAKEFIXED +#include + +/* + Write out the inffixed.h that is #include'd above. Defining MAKEFIXED also + defines BUILDFIXED, so the tables are built on the fly. makefixed() writes + those tables to stdout, which would be piped to inffixed.h. A small program + can simply call makefixed to do this: + + void makefixed(void); + + int main(void) + { + makefixed(); + return 0; + } + + Then that can be linked with zlib built with MAKEFIXED defined and run: + + a.out > inffixed.h + */ +void makefixed() +{ + unsigned low, size; + struct inflate_state state; + + fixedtables(&state); + puts(" /* inffixed.h -- table for decoding fixed codes"); + puts(" * Generated automatically by makefixed()."); + puts(" */"); + puts(""); + puts(" /* WARNING: this file should *not* be used by applications."); + puts(" It is part of the implementation of this library and is"); + puts(" subject to change. Applications should only use zlib.h."); + puts(" */"); + puts(""); + size = 1U << 9; + printf(" static const code lenfix[%u] = {", size); + low = 0; + for (;;) { + if ((low % 7) == 0) printf("\n "); + printf("{%u,%u,%d}", (low & 127) == 99 ? 64 : state.lencode[low].op, + state.lencode[low].bits, state.lencode[low].val); + if (++low == size) break; + putchar(','); + } + puts("\n };"); + size = 1U << 5; + printf("\n static const code distfix[%u] = {", size); + low = 0; + for (;;) { + if ((low % 6) == 0) printf("\n "); + printf("{%u,%u,%d}", state.distcode[low].op, state.distcode[low].bits, + state.distcode[low].val); + if (++low == size) break; + putchar(','); + } + puts("\n };"); +} +#endif /* MAKEFIXED */ + +/* + Update the window with the last wsize (normally 32K) bytes written before + returning. If window does not exist yet, create it. This is only called + when a window is already in use, or when output has been written during this + inflate call, but the end of the deflate stream has not been reached yet. + It is also called to create a window for dictionary data when a dictionary + is loaded. + + Providing output buffers larger than 32K to inflate() should provide a speed + advantage, since only the last 32K of output is copied to the sliding window + upon return from inflate(), and since all distances after the first 32K of + output will fall in the output data, making match copies simpler and faster. + The advantage may be dependent on the size of the processor's data caches. + */ +local int updatewindow(strm, end, copy) +z_streamp strm; +const Bytef *end; +unsigned copy; +{ + struct inflate_state FAR *state; + unsigned dist; + + state = (struct inflate_state FAR *)strm->state; + + /* if it hasn't been done already, allocate space for the window */ + if (state->window == Z_NULL) { + state->window = (unsigned char FAR *) + ZALLOC(strm, 1U << state->wbits, + sizeof(unsigned char)); + if (state->window == Z_NULL) return 1; + } + + /* if window not in use yet, initialize */ + if (state->wsize == 0) { + state->wsize = 1U << state->wbits; + state->wnext = 0; + state->whave = 0; + } + + /* copy state->wsize or less output bytes into the circular window */ + if (copy >= state->wsize) { + zmemcpy(state->window, end - state->wsize, state->wsize); + state->wnext = 0; + state->whave = state->wsize; + } + else { + dist = state->wsize - state->wnext; + if (dist > copy) dist = copy; + zmemcpy(state->window + state->wnext, end - copy, dist); + copy -= dist; + if (copy) { + zmemcpy(state->window, end - copy, copy); + state->wnext = copy; + state->whave = state->wsize; + } + else { + state->wnext += dist; + if (state->wnext == state->wsize) state->wnext = 0; + if (state->whave < state->wsize) state->whave += dist; + } + } + return 0; +} + +/* Macros for inflate(): */ + +/* check function to use adler32() for zlib or crc32() for gzip */ +#ifdef GUNZIP +# define UPDATE(check, buf, len) \ + (state->flags ? crc32(check, buf, len) : adler32(check, buf, len)) +#else +# define UPDATE(check, buf, len) adler32(check, buf, len) +#endif + +/* check macros for header crc */ +#ifdef GUNZIP +# define CRC2(check, word) \ + do { \ + hbuf[0] = (unsigned char)(word); \ + hbuf[1] = (unsigned char)((word) >> 8); \ + check = crc32(check, hbuf, 2); \ + } while (0) + +# define CRC4(check, word) \ + do { \ + hbuf[0] = (unsigned char)(word); \ + hbuf[1] = (unsigned char)((word) >> 8); \ + hbuf[2] = (unsigned char)((word) >> 16); \ + hbuf[3] = (unsigned char)((word) >> 24); \ + check = crc32(check, hbuf, 4); \ + } while (0) +#endif + +/* Load registers with state in inflate() for speed */ +#define LOAD() \ + do { \ + put = strm->next_out; \ + left = strm->avail_out; \ + next = strm->next_in; \ + have = strm->avail_in; \ + hold = state->hold; \ + bits = state->bits; \ + } while (0) + +/* Restore state from registers in inflate() */ +#define RESTORE() \ + do { \ + strm->next_out = put; \ + strm->avail_out = left; \ + strm->next_in = next; \ + strm->avail_in = have; \ + state->hold = hold; \ + state->bits = bits; \ + } while (0) + +/* Clear the input bit accumulator */ +#define INITBITS() \ + do { \ + hold = 0; \ + bits = 0; \ + } while (0) + +/* Get a byte of input into the bit accumulator, or return from inflate() + if there is no input available. */ +#define PULLBYTE() \ + do { \ + if (have == 0) goto inf_leave; \ + have--; \ + hold += (unsigned long)(*next++) << bits; \ + bits += 8; \ + } while (0) + +/* Assure that there are at least n bits in the bit accumulator. If there is + not enough available input to do that, then return from inflate(). */ +#define NEEDBITS(n) \ + do { \ + while (bits < (unsigned)(n)) \ + PULLBYTE(); \ + } while (0) + +/* Return the low n bits of the bit accumulator (n < 16) */ +#define BITS(n) \ + ((unsigned)hold & ((1U << (n)) - 1)) + +/* Remove n bits from the bit accumulator */ +#define DROPBITS(n) \ + do { \ + hold >>= (n); \ + bits -= (unsigned)(n); \ + } while (0) + +/* Remove zero to seven bits as needed to go to a byte boundary */ +#define BYTEBITS() \ + do { \ + hold >>= bits & 7; \ + bits -= bits & 7; \ + } while (0) + +/* + inflate() uses a state machine to process as much input data and generate as + much output data as possible before returning. The state machine is + structured roughly as follows: + + for (;;) switch (state) { + ... + case STATEn: + if (not enough input data or output space to make progress) + return; + ... make progress ... + state = STATEm; + break; + ... + } + + so when inflate() is called again, the same case is attempted again, and + if the appropriate resources are provided, the machine proceeds to the + next state. The NEEDBITS() macro is usually the way the state evaluates + whether it can proceed or should return. NEEDBITS() does the return if + the requested bits are not available. The typical use of the BITS macros + is: + + NEEDBITS(n); + ... do something with BITS(n) ... + DROPBITS(n); + + where NEEDBITS(n) either returns from inflate() if there isn't enough + input left to load n bits into the accumulator, or it continues. BITS(n) + gives the low n bits in the accumulator. When done, DROPBITS(n) drops + the low n bits off the accumulator. INITBITS() clears the accumulator + and sets the number of available bits to zero. BYTEBITS() discards just + enough bits to put the accumulator on a byte boundary. After BYTEBITS() + and a NEEDBITS(8), then BITS(8) would return the next byte in the stream. + + NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return + if there is no input available. The decoding of variable length codes uses + PULLBYTE() directly in order to pull just enough bytes to decode the next + code, and no more. + + Some states loop until they get enough input, making sure that enough + state information is maintained to continue the loop where it left off + if NEEDBITS() returns in the loop. For example, want, need, and keep + would all have to actually be part of the saved state in case NEEDBITS() + returns: + + case STATEw: + while (want < need) { + NEEDBITS(n); + keep[want++] = BITS(n); + DROPBITS(n); + } + state = STATEx; + case STATEx: + + As shown above, if the next state is also the next case, then the break + is omitted. + + A state may also return if there is not enough output space available to + complete that state. Those states are copying stored data, writing a + literal byte, and copying a matching string. + + When returning, a "goto inf_leave" is used to update the total counters, + update the check value, and determine whether any progress has been made + during that inflate() call in order to return the proper return code. + Progress is defined as a change in either strm->avail_in or strm->avail_out. + When there is a window, goto inf_leave will update the window with the last + output written. If a goto inf_leave occurs in the middle of decompression + and there is no window currently, goto inf_leave will create one and copy + output to the window for the next call of inflate(). + + In this implementation, the flush parameter of inflate() only affects the + return code (per zlib.h). inflate() always writes as much as possible to + strm->next_out, given the space available and the provided input--the effect + documented in zlib.h of Z_SYNC_FLUSH. Furthermore, inflate() always defers + the allocation of and copying into a sliding window until necessary, which + provides the effect documented in zlib.h for Z_FINISH when the entire input + stream available. So the only thing the flush parameter actually does is: + when flush is set to Z_FINISH, inflate() cannot return Z_OK. Instead it + will return Z_BUF_ERROR if it has not reached the end of the stream. + */ + +int ZEXPORT inflate(strm, flush) +z_streamp strm; +int flush; +{ + struct inflate_state FAR *state; + z_const unsigned char FAR *next; /* next input */ + unsigned char FAR *put; /* next output */ + unsigned have, left; /* available input and output */ + unsigned long hold; /* bit buffer */ + unsigned bits; /* bits in bit buffer */ + unsigned in, out; /* save starting available input and output */ + unsigned copy; /* number of stored or match bytes to copy */ + unsigned char FAR *from; /* where to copy match bytes from */ + code here; /* current decoding table entry */ + code last; /* parent table entry */ + unsigned len; /* length to copy for repeats, bits to drop */ + int ret; /* return code */ +#ifdef GUNZIP + unsigned char hbuf[4]; /* buffer for gzip header crc calculation */ +#endif + static const unsigned short order[19] = /* permutation of code lengths */ + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + + if (inflateStateCheck(strm) || strm->next_out == Z_NULL || + (strm->next_in == Z_NULL && strm->avail_in != 0)) + return Z_STREAM_ERROR; + + state = (struct inflate_state FAR *)strm->state; + if (state->mode == TYPE) state->mode = TYPEDO; /* skip check */ + LOAD(); + in = have; + out = left; + ret = Z_OK; + for (;;) + switch (state->mode) { + case HEAD: + if (state->wrap == 0) { + state->mode = TYPEDO; + break; + } + NEEDBITS(16); +#ifdef GUNZIP + if ((state->wrap & 2) && hold == 0x8b1f) { /* gzip header */ + if (state->wbits == 0) + state->wbits = 15; + state->check = crc32(0L, Z_NULL, 0); + CRC2(state->check, hold); + INITBITS(); + state->mode = FLAGS; + break; + } + state->flags = 0; /* expect zlib header */ + if (state->head != Z_NULL) + state->head->done = -1; + if (!(state->wrap & 1) || /* check if zlib header allowed */ +#else + if ( +#endif + ((BITS(8) << 8) + (hold >> 8)) % 31) { + strm->msg = (char *)"incorrect header check"; + state->mode = BAD; + break; + } + if (BITS(4) != Z_DEFLATED) { + strm->msg = (char *)"unknown compression method"; + state->mode = BAD; + break; + } + DROPBITS(4); + len = BITS(4) + 8; + if (state->wbits == 0) + state->wbits = len; + if (len > 15 || len > state->wbits) { + strm->msg = (char *)"invalid window size"; + state->mode = BAD; + break; + } + state->dmax = 1U << len; + Tracev((stderr, "inflate: zlib header ok\n")); + strm->adler = state->check = adler32(0L, Z_NULL, 0); + state->mode = hold & 0x200 ? DICTID : TYPE; + INITBITS(); + break; +#ifdef GUNZIP + case FLAGS: + NEEDBITS(16); + state->flags = (int)(hold); + if ((state->flags & 0xff) != Z_DEFLATED) { + strm->msg = (char *)"unknown compression method"; + state->mode = BAD; + break; + } + if (state->flags & 0xe000) { + strm->msg = (char *)"unknown header flags set"; + state->mode = BAD; + break; + } + if (state->head != Z_NULL) + state->head->text = (int)((hold >> 8) & 1); + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC2(state->check, hold); + INITBITS(); + state->mode = TIME; + case TIME: + NEEDBITS(32); + if (state->head != Z_NULL) + state->head->time = hold; + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC4(state->check, hold); + INITBITS(); + state->mode = OS; + case OS: + NEEDBITS(16); + if (state->head != Z_NULL) { + state->head->xflags = (int)(hold & 0xff); + state->head->os = (int)(hold >> 8); + } + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC2(state->check, hold); + INITBITS(); + state->mode = EXLEN; + case EXLEN: + if (state->flags & 0x0400) { + NEEDBITS(16); + state->length = (unsigned)(hold); + if (state->head != Z_NULL) + state->head->extra_len = (unsigned)hold; + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC2(state->check, hold); + INITBITS(); + } + else if (state->head != Z_NULL) + state->head->extra = Z_NULL; + state->mode = EXTRA; + case EXTRA: + if (state->flags & 0x0400) { + copy = state->length; + if (copy > have) copy = have; + if (copy) { + if (state->head != Z_NULL && + state->head->extra != Z_NULL) { + len = state->head->extra_len - state->length; + zmemcpy(state->head->extra + len, next, + len + copy > state->head->extra_max ? + state->head->extra_max - len : copy); + } + if ((state->flags & 0x0200) && (state->wrap & 4)) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + state->length -= copy; + } + if (state->length) goto inf_leave; + } + state->length = 0; + state->mode = NAME; + case NAME: + if (state->flags & 0x0800) { + if (have == 0) goto inf_leave; + copy = 0; + do { + len = (unsigned)(next[copy++]); + if (state->head != Z_NULL && + state->head->name != Z_NULL && + state->length < state->head->name_max) + state->head->name[state->length++] = (Bytef)len; + } while (len && copy < have); + if ((state->flags & 0x0200) && (state->wrap & 4)) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + if (len) goto inf_leave; + } + else if (state->head != Z_NULL) + state->head->name = Z_NULL; + state->length = 0; + state->mode = COMMENT; + case COMMENT: + if (state->flags & 0x1000) { + if (have == 0) goto inf_leave; + copy = 0; + do { + len = (unsigned)(next[copy++]); + if (state->head != Z_NULL && + state->head->comment != Z_NULL && + state->length < state->head->comm_max) + state->head->comment[state->length++] = (Bytef)len; + } while (len && copy < have); + if ((state->flags & 0x0200) && (state->wrap & 4)) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + if (len) goto inf_leave; + } + else if (state->head != Z_NULL) + state->head->comment = Z_NULL; + state->mode = HCRC; + case HCRC: + if (state->flags & 0x0200) { + NEEDBITS(16); + if ((state->wrap & 4) && hold != (state->check & 0xffff)) { + strm->msg = (char *)"header crc mismatch"; + state->mode = BAD; + break; + } + INITBITS(); + } + if (state->head != Z_NULL) { + state->head->hcrc = (int)((state->flags >> 9) & 1); + state->head->done = 1; + } + strm->adler = state->check = crc32(0L, Z_NULL, 0); + state->mode = TYPE; + break; +#endif + case DICTID: + NEEDBITS(32); + strm->adler = state->check = ZSWAP32(hold); + INITBITS(); + state->mode = DICT; + case DICT: + if (state->havedict == 0) { + RESTORE(); + return Z_NEED_DICT; + } + strm->adler = state->check = adler32(0L, Z_NULL, 0); + state->mode = TYPE; + case TYPE: + if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave; + case TYPEDO: + if (state->last) { + BYTEBITS(); + state->mode = CHECK; + break; + } + NEEDBITS(3); + state->last = BITS(1); + DROPBITS(1); + switch (BITS(2)) { + case 0: /* stored block */ + Tracev((stderr, "inflate: stored block%s\n", + state->last ? " (last)" : "")); + state->mode = STORED; + break; + case 1: /* fixed block */ + fixedtables(state); + Tracev((stderr, "inflate: fixed codes block%s\n", + state->last ? " (last)" : "")); + state->mode = LEN_; /* decode codes */ + if (flush == Z_TREES) { + DROPBITS(2); + goto inf_leave; + } + break; + case 2: /* dynamic block */ + Tracev((stderr, "inflate: dynamic codes block%s\n", + state->last ? " (last)" : "")); + state->mode = TABLE; + break; + case 3: + strm->msg = (char *)"invalid block type"; + state->mode = BAD; + } + DROPBITS(2); + break; + case STORED: + BYTEBITS(); /* go to byte boundary */ + NEEDBITS(32); + if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { + strm->msg = (char *)"invalid stored block lengths"; + state->mode = BAD; + break; + } + state->length = (unsigned)hold & 0xffff; + Tracev((stderr, "inflate: stored length %u\n", + state->length)); + INITBITS(); + state->mode = COPY_; + if (flush == Z_TREES) goto inf_leave; + case COPY_: + state->mode = COPY; + case COPY: + copy = state->length; + if (copy) { + if (copy > have) copy = have; + if (copy > left) copy = left; + if (copy == 0) goto inf_leave; + zmemcpy(put, next, copy); + have -= copy; + next += copy; + left -= copy; + put += copy; + state->length -= copy; + break; + } + Tracev((stderr, "inflate: stored end\n")); + state->mode = TYPE; + break; + case TABLE: + NEEDBITS(14); + state->nlen = BITS(5) + 257; + DROPBITS(5); + state->ndist = BITS(5) + 1; + DROPBITS(5); + state->ncode = BITS(4) + 4; + DROPBITS(4); +#ifndef PKZIP_BUG_WORKAROUND + if (state->nlen > 286 || state->ndist > 30) { + strm->msg = (char *)"too many length or distance symbols"; + state->mode = BAD; + break; + } +#endif + Tracev((stderr, "inflate: table sizes ok\n")); + state->have = 0; + state->mode = LENLENS; + case LENLENS: + while (state->have < state->ncode) { + NEEDBITS(3); + state->lens[order[state->have++]] = (unsigned short)BITS(3); + DROPBITS(3); + } + while (state->have < 19) + state->lens[order[state->have++]] = 0; + state->next = state->codes; + state->lencode = (const code FAR *)(state->next); + state->lenbits = 7; + ret = inflate_table(CODES, state->lens, 19, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid code lengths set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: code lengths ok\n")); + state->have = 0; + state->mode = CODELENS; + case CODELENS: + while (state->have < state->nlen + state->ndist) { + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.val < 16) { + DROPBITS(here.bits); + state->lens[state->have++] = here.val; + } + else { + if (here.val == 16) { + NEEDBITS(here.bits + 2); + DROPBITS(here.bits); + if (state->have == 0) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + len = state->lens[state->have - 1]; + copy = 3 + BITS(2); + DROPBITS(2); + } + else if (here.val == 17) { + NEEDBITS(here.bits + 3); + DROPBITS(here.bits); + len = 0; + copy = 3 + BITS(3); + DROPBITS(3); + } + else { + NEEDBITS(here.bits + 7); + DROPBITS(here.bits); + len = 0; + copy = 11 + BITS(7); + DROPBITS(7); + } + if (state->have + copy > state->nlen + state->ndist) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + while (copy--) + state->lens[state->have++] = (unsigned short)len; + } + } + + /* handle error breaks in while */ + if (state->mode == BAD) break; + + /* check for end-of-block code (better have one) */ + if (state->lens[256] == 0) { + strm->msg = (char *)"invalid code -- missing end-of-block"; + state->mode = BAD; + break; + } + + /* build code tables -- note: do not change the lenbits or distbits + values here (9 and 6) without reading the comments in inftrees.h + concerning the ENOUGH constants, which depend on those values */ + state->next = state->codes; + state->lencode = (const code FAR *)(state->next); + state->lenbits = 9; + ret = inflate_table(LENS, state->lens, state->nlen, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid literal/lengths set"; + state->mode = BAD; + break; + } + state->distcode = (const code FAR *)(state->next); + state->distbits = 6; + ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist, + &(state->next), &(state->distbits), state->work); + if (ret) { + strm->msg = (char *)"invalid distances set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: codes ok\n")); + state->mode = LEN_; + if (flush == Z_TREES) goto inf_leave; + case LEN_: + state->mode = LEN; + case LEN: + if (have >= 6 && left >= 258) { + RESTORE(); + inflate_fast(strm, out); + LOAD(); + if (state->mode == TYPE) + state->back = -1; + break; + } + state->back = 0; + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.op && (here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->lencode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + state->back += last.bits; + } + DROPBITS(here.bits); + state->back += here.bits; + state->length = (unsigned)here.val; + if ((int)(here.op) == 0) { + Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", here.val)); + state->mode = LIT; + break; + } + if (here.op & 32) { + Tracevv((stderr, "inflate: end of block\n")); + state->back = -1; + state->mode = TYPE; + break; + } + if (here.op & 64) { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + state->extra = (unsigned)(here.op) & 15; + state->mode = LENEXT; + case LENEXT: + if (state->extra) { + NEEDBITS(state->extra); + state->length += BITS(state->extra); + DROPBITS(state->extra); + state->back += state->extra; + } + Tracevv((stderr, "inflate: length %u\n", state->length)); + state->was = state->length; + state->mode = DIST; + case DIST: + for (;;) { + here = state->distcode[BITS(state->distbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if ((here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->distcode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + state->back += last.bits; + } + DROPBITS(here.bits); + state->back += here.bits; + if (here.op & 64) { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + state->offset = (unsigned)here.val; + state->extra = (unsigned)(here.op) & 15; + state->mode = DISTEXT; + case DISTEXT: + if (state->extra) { + NEEDBITS(state->extra); + state->offset += BITS(state->extra); + DROPBITS(state->extra); + state->back += state->extra; + } +#ifdef INFLATE_STRICT + if (state->offset > state->dmax) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#endif + Tracevv((stderr, "inflate: distance %u\n", state->offset)); + state->mode = MATCH; + case MATCH: + if (left == 0) goto inf_leave; + copy = out - left; + if (state->offset > copy) { /* copy from window */ + copy = state->offset - copy; + if (copy > state->whave) { + if (state->sane) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + Trace((stderr, "inflate.c too far\n")); + copy -= state->whave; + if (copy > state->length) copy = state->length; + if (copy > left) copy = left; + left -= copy; + state->length -= copy; + do { + *put++ = 0; + } while (--copy); + if (state->length == 0) state->mode = LEN; + break; +#endif + } + if (copy > state->wnext) { + copy -= state->wnext; + from = state->window + (state->wsize - copy); + } + else + from = state->window + (state->wnext - copy); + if (copy > state->length) copy = state->length; + } + else { /* copy from output */ + from = put - state->offset; + copy = state->length; + } + if (copy > left) copy = left; + left -= copy; + state->length -= copy; + do { + *put++ = *from++; + } while (--copy); + if (state->length == 0) state->mode = LEN; + break; + case LIT: + if (left == 0) goto inf_leave; + *put++ = (unsigned char)(state->length); + left--; + state->mode = LEN; + break; + case CHECK: + if (state->wrap) { + NEEDBITS(32); + out -= left; + strm->total_out += out; + state->total += out; + if ((state->wrap & 4) && out) + strm->adler = state->check = + UPDATE(state->check, put - out, out); + out = left; + if ((state->wrap & 4) && ( +#ifdef GUNZIP + state->flags ? hold : +#endif + ZSWAP32(hold)) != state->check) { + strm->msg = (char *)"incorrect data check"; + state->mode = BAD; + break; + } + INITBITS(); + Tracev((stderr, "inflate: check matches trailer\n")); + } +#ifdef GUNZIP + state->mode = LENGTH; + case LENGTH: + if (state->wrap && state->flags) { + NEEDBITS(32); + if (hold != (state->total & 0xffffffffUL)) { + strm->msg = (char *)"incorrect length check"; + state->mode = BAD; + break; + } + INITBITS(); + Tracev((stderr, "inflate: length matches trailer\n")); + } +#endif + state->mode = DONE; + case DONE: + ret = Z_STREAM_END; + goto inf_leave; + case BAD: + ret = Z_DATA_ERROR; + goto inf_leave; + case MEM: + return Z_MEM_ERROR; + case SYNC: + default: + return Z_STREAM_ERROR; + } + + /* + Return from inflate(), updating the total counts and the check value. + If there was no progress during the inflate() call, return a buffer + error. Call updatewindow() to create and/or update the window state. + Note: a memory error from inflate() is non-recoverable. + */ + inf_leave: + RESTORE(); + if (state->wsize || (out != strm->avail_out && state->mode < BAD && + (state->mode < CHECK || flush != Z_FINISH))) + if (updatewindow(strm, strm->next_out, out - strm->avail_out)) { + state->mode = MEM; + return Z_MEM_ERROR; + } + in -= strm->avail_in; + out -= strm->avail_out; + strm->total_in += in; + strm->total_out += out; + state->total += out; + if ((state->wrap & 4) && out) + strm->adler = state->check = + UPDATE(state->check, strm->next_out - out, out); + strm->data_type = (int)state->bits + (state->last ? 64 : 0) + + (state->mode == TYPE ? 128 : 0) + + (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0); + if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK) + ret = Z_BUF_ERROR; + return ret; +} + +int ZEXPORT inflateEnd(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + if (inflateStateCheck(strm)) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (state->window != Z_NULL) ZFREE(strm, state->window); + ZFREE(strm, strm->state); + strm->state = Z_NULL; + Tracev((stderr, "inflate: end\n")); + return Z_OK; +} + +int ZEXPORT inflateGetDictionary(strm, dictionary, dictLength) +z_streamp strm; +Bytef *dictionary; +uInt *dictLength; +{ + struct inflate_state FAR *state; + + /* check state */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* copy dictionary */ + if (state->whave && dictionary != Z_NULL) { + zmemcpy(dictionary, state->window + state->wnext, + state->whave - state->wnext); + zmemcpy(dictionary + state->whave - state->wnext, + state->window, state->wnext); + } + if (dictLength != Z_NULL) + *dictLength = state->whave; + return Z_OK; +} + +int ZEXPORT inflateSetDictionary(strm, dictionary, dictLength) +z_streamp strm; +const Bytef *dictionary; +uInt dictLength; +{ + struct inflate_state FAR *state; + unsigned long dictid; + int ret; + + /* check state */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (state->wrap != 0 && state->mode != DICT) + return Z_STREAM_ERROR; + + /* check for correct dictionary identifier */ + if (state->mode == DICT) { + dictid = adler32(0L, Z_NULL, 0); + dictid = adler32(dictid, dictionary, dictLength); + if (dictid != state->check) + return Z_DATA_ERROR; + } + + /* copy dictionary to window using updatewindow(), which will amend the + existing dictionary if appropriate */ + ret = updatewindow(strm, dictionary + dictLength, dictLength); + if (ret) { + state->mode = MEM; + return Z_MEM_ERROR; + } + state->havedict = 1; + Tracev((stderr, "inflate: dictionary set\n")); + return Z_OK; +} + +int ZEXPORT inflateGetHeader(strm, head) +z_streamp strm; +gz_headerp head; +{ + struct inflate_state FAR *state; + + /* check state */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if ((state->wrap & 2) == 0) return Z_STREAM_ERROR; + + /* save header structure */ + state->head = head; + head->done = 0; + return Z_OK; +} + +/* + Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff. Return when found + or when out of input. When called, *have is the number of pattern bytes + found in order so far, in 0..3. On return *have is updated to the new + state. If on return *have equals four, then the pattern was found and the + return value is how many bytes were read including the last byte of the + pattern. If *have is less than four, then the pattern has not been found + yet and the return value is len. In the latter case, syncsearch() can be + called again with more data and the *have state. *have is initialized to + zero for the first call. + */ +local unsigned syncsearch(have, buf, len) +unsigned FAR *have; +const unsigned char FAR *buf; +unsigned len; +{ + unsigned got; + unsigned next; + + got = *have; + next = 0; + while (next < len && got < 4) { + if ((int)(buf[next]) == (got < 2 ? 0 : 0xff)) + got++; + else if (buf[next]) + got = 0; + else + got = 4 - got; + next++; + } + *have = got; + return next; +} + +int ZEXPORT inflateSync(strm) +z_streamp strm; +{ + unsigned len; /* number of bytes to look at or looked at */ + unsigned long in, out; /* temporary to save total_in and total_out */ + unsigned char buf[4]; /* to restore bit buffer to byte string */ + struct inflate_state FAR *state; + + /* check parameters */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR; + + /* if first time, start search in bit buffer */ + if (state->mode != SYNC) { + state->mode = SYNC; + state->hold <<= state->bits & 7; + state->bits -= state->bits & 7; + len = 0; + while (state->bits >= 8) { + buf[len++] = (unsigned char)(state->hold); + state->hold >>= 8; + state->bits -= 8; + } + state->have = 0; + syncsearch(&(state->have), buf, len); + } + + /* search available input */ + len = syncsearch(&(state->have), strm->next_in, strm->avail_in); + strm->avail_in -= len; + strm->next_in += len; + strm->total_in += len; + + /* return no joy or set up to restart inflate() on a new block */ + if (state->have != 4) return Z_DATA_ERROR; + in = strm->total_in; out = strm->total_out; + inflateReset(strm); + strm->total_in = in; strm->total_out = out; + state->mode = TYPE; + return Z_OK; +} + +/* + Returns true if inflate is currently at the end of a block generated by + Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP + implementation to provide an additional safety check. PPP uses + Z_SYNC_FLUSH but removes the length bytes of the resulting empty stored + block. When decompressing, PPP checks that at the end of input packet, + inflate is waiting for these length bytes. + */ +int ZEXPORT inflateSyncPoint(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + return state->mode == STORED && state->bits == 0; +} + +int ZEXPORT inflateCopy(dest, source) +z_streamp dest; +z_streamp source; +{ + struct inflate_state FAR *state; + struct inflate_state FAR *copy; + unsigned char FAR *window; + unsigned wsize; + + /* check input */ + if (inflateStateCheck(source) || dest == Z_NULL) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)source->state; + + /* allocate space */ + copy = (struct inflate_state FAR *) + ZALLOC(source, 1, sizeof(struct inflate_state)); + if (copy == Z_NULL) return Z_MEM_ERROR; + window = Z_NULL; + if (state->window != Z_NULL) { + window = (unsigned char FAR *) + ZALLOC(source, 1U << state->wbits, sizeof(unsigned char)); + if (window == Z_NULL) { + ZFREE(source, copy); + return Z_MEM_ERROR; + } + } + + /* copy state */ + zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); + zmemcpy((voidpf)copy, (voidpf)state, sizeof(struct inflate_state)); + copy->strm = dest; + if (state->lencode >= state->codes && + state->lencode <= state->codes + ENOUGH - 1) { + copy->lencode = copy->codes + (state->lencode - state->codes); + copy->distcode = copy->codes + (state->distcode - state->codes); + } + copy->next = copy->codes + (state->next - state->codes); + if (window != Z_NULL) { + wsize = 1U << state->wbits; + zmemcpy(window, state->window, wsize); + } + copy->window = window; + dest->state = (struct internal_state FAR *)copy; + return Z_OK; +} + +int ZEXPORT inflateUndermine(strm, subvert) +z_streamp strm; +int subvert; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + state->sane = !subvert; + return Z_OK; +#else + (void)subvert; + state->sane = 1; + return Z_DATA_ERROR; +#endif +} + +int ZEXPORT inflateValidate(strm, check) +z_streamp strm; +int check; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (check) + state->wrap |= 4; + else + state->wrap &= ~4; + return Z_OK; +} + +long ZEXPORT inflateMark(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) + return -(1L << 16); + state = (struct inflate_state FAR *)strm->state; + return (long)(((unsigned long)((long)state->back)) << 16) + + (state->mode == COPY ? state->length : + (state->mode == MATCH ? state->was - state->length : 0)); +} + +unsigned long ZEXPORT inflateCodesUsed(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + if (inflateStateCheck(strm)) return (unsigned long)-1; + state = (struct inflate_state FAR *)strm->state; + return (unsigned long)(state->next - state->codes); +} diff --git a/deps/SZ/zlib/inflate.h b/deps/SZ/zlib/inflate.h new file mode 100644 index 0000000000000000000000000000000000000000..a46cce6b6d05ef994d2a386257cf09068f0aa298 --- /dev/null +++ b/deps/SZ/zlib/inflate.h @@ -0,0 +1,125 @@ +/* inflate.h -- internal inflate state definition + * Copyright (C) 1995-2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* define NO_GZIP when compiling if you want to disable gzip header and + trailer decoding by inflate(). NO_GZIP would be used to avoid linking in + the crc code when it is not needed. For shared libraries, gzip decoding + should be left enabled. */ +#ifndef NO_GZIP +# define GUNZIP +#endif + +/* Possible inflate modes between inflate() calls */ +typedef enum { + HEAD = 16180, /* i: waiting for magic header */ + FLAGS, /* i: waiting for method and flags (gzip) */ + TIME, /* i: waiting for modification time (gzip) */ + OS, /* i: waiting for extra flags and operating system (gzip) */ + EXLEN, /* i: waiting for extra length (gzip) */ + EXTRA, /* i: waiting for extra bytes (gzip) */ + NAME, /* i: waiting for end of file name (gzip) */ + COMMENT, /* i: waiting for end of comment (gzip) */ + HCRC, /* i: waiting for header crc (gzip) */ + DICTID, /* i: waiting for dictionary check value */ + DICT, /* waiting for inflateSetDictionary() call */ + TYPE, /* i: waiting for type bits, including last-flag bit */ + TYPEDO, /* i: same, but skip check to exit inflate on new block */ + STORED, /* i: waiting for stored size (length and complement) */ + COPY_, /* i/o: same as COPY below, but only first time in */ + COPY, /* i/o: waiting for input or output to copy stored block */ + TABLE, /* i: waiting for dynamic block table lengths */ + LENLENS, /* i: waiting for code length code lengths */ + CODELENS, /* i: waiting for length/lit and distance code lengths */ + LEN_, /* i: same as LEN below, but only first time in */ + LEN, /* i: waiting for length/lit/eob code */ + LENEXT, /* i: waiting for length extra bits */ + DIST, /* i: waiting for distance code */ + DISTEXT, /* i: waiting for distance extra bits */ + MATCH, /* o: waiting for output space to copy string */ + LIT, /* o: waiting for output space to write literal */ + CHECK, /* i: waiting for 32-bit check value */ + LENGTH, /* i: waiting for 32-bit length (gzip) */ + DONE, /* finished check, done -- remain here until reset */ + BAD, /* got a data error -- remain here until reset */ + MEM, /* got an inflate() memory error -- remain here until reset */ + SYNC /* looking for synchronization bytes to restart inflate() */ +} inflate_mode; + +/* + State transitions between above modes - + + (most modes can go to BAD or MEM on error -- not shown for clarity) + + Process header: + HEAD -> (gzip) or (zlib) or (raw) + (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME -> COMMENT -> + HCRC -> TYPE + (zlib) -> DICTID or TYPE + DICTID -> DICT -> TYPE + (raw) -> TYPEDO + Read deflate blocks: + TYPE -> TYPEDO -> STORED or TABLE or LEN_ or CHECK + STORED -> COPY_ -> COPY -> TYPE + TABLE -> LENLENS -> CODELENS -> LEN_ + LEN_ -> LEN + Read deflate codes in fixed or dynamic block: + LEN -> LENEXT or LIT or TYPE + LENEXT -> DIST -> DISTEXT -> MATCH -> LEN + LIT -> LEN + Process trailer: + CHECK -> LENGTH -> DONE + */ + +/* State maintained between inflate() calls -- approximately 7K bytes, not + including the allocated sliding window, which is up to 32K bytes. */ +struct inflate_state { + z_streamp strm; /* pointer back to this zlib stream */ + inflate_mode mode; /* current inflate mode */ + int last; /* true if processing last block */ + int wrap; /* bit 0 true for zlib, bit 1 true for gzip, + bit 2 true to validate check value */ + int havedict; /* true if dictionary provided */ + int flags; /* gzip header method and flags (0 if zlib) */ + unsigned dmax; /* zlib header max distance (INFLATE_STRICT) */ + unsigned long check; /* protected copy of check value */ + unsigned long total; /* protected copy of output count */ + gz_headerp head; /* where to save gzip header information */ + /* sliding window */ + unsigned wbits; /* log base 2 of requested window size */ + unsigned wsize; /* window size or zero if not using window */ + unsigned whave; /* valid bytes in the window */ + unsigned wnext; /* window write index */ + unsigned char FAR *window; /* allocated sliding window, if needed */ + /* bit accumulator */ + unsigned long hold; /* input bit accumulator */ + unsigned bits; /* number of bits in "in" */ + /* for string and stored block copying */ + unsigned length; /* literal or length of data to copy */ + unsigned offset; /* distance back to copy string from */ + /* for table and code decoding */ + unsigned extra; /* extra bits needed */ + /* fixed and dynamic code tables */ + code const FAR *lencode; /* starting table for length/literal codes */ + code const FAR *distcode; /* starting table for distance codes */ + unsigned lenbits; /* index bits for lencode */ + unsigned distbits; /* index bits for distcode */ + /* dynamic table building */ + unsigned ncode; /* number of code length code lengths */ + unsigned nlen; /* number of length code lengths */ + unsigned ndist; /* number of distance code lengths */ + unsigned have; /* number of code lengths in lens[] */ + code FAR *next; /* next available space in codes[] */ + unsigned short lens[320]; /* temporary storage for code lengths */ + unsigned short work[288]; /* work area for code table building */ + code codes[ENOUGH]; /* space for code tables */ + int sane; /* if false, allow invalid distance too far */ + int back; /* bits back of last unprocessed length/lit */ + unsigned was; /* initial length of match */ +}; diff --git a/deps/SZ/zlib/inftrees.c b/deps/SZ/zlib/inftrees.c new file mode 100644 index 0000000000000000000000000000000000000000..2ea08fc13ea8ec50fad1f7574fa287aa6362abc4 --- /dev/null +++ b/deps/SZ/zlib/inftrees.c @@ -0,0 +1,304 @@ +/* inftrees.c -- generate Huffman trees for efficient decoding + * Copyright (C) 1995-2017 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" + +#define MAXBITS 15 + +const char inflate_copyright[] = + " inflate 1.2.11 Copyright 1995-2017 Mark Adler "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ + +/* + Build a set of tables to decode the provided canonical Huffman code. + The code lengths are lens[0..codes-1]. The result starts at *table, + whose indices are 0..2^bits-1. work is a writable array of at least + lens shorts, which is used as a work area. type is the type of code + to be generated, CODES, LENS, or DISTS. On return, zero is success, + -1 is an invalid code, and +1 means that ENOUGH isn't enough. table + on return points to the next available entry's address. bits is the + requested root table index bits, and on return it is the actual root + table index bits. It will differ if the request is greater than the + longest code or if it is less than the shortest code. + */ +int ZLIB_INTERNAL inflate_table(type, lens, codes, table, bits, work) +codetype type; +unsigned short FAR *lens; +unsigned codes; +code FAR * FAR *table; +unsigned FAR *bits; +unsigned short FAR *work; +{ + unsigned len; /* a code's length in bits */ + unsigned sym; /* index of code symbols */ + unsigned min, max; /* minimum and maximum code lengths */ + unsigned root; /* number of index bits for root table */ + unsigned curr; /* number of index bits for current table */ + unsigned drop; /* code bits to drop for sub-table */ + int left; /* number of prefix codes available */ + unsigned used; /* code entries in table used */ + unsigned huff; /* Huffman code */ + unsigned incr; /* for incrementing code, index */ + unsigned fill; /* index for replicating entries */ + unsigned low; /* low bits for current root entry */ + unsigned mask; /* mask for low root bits */ + code here; /* table entry for duplication */ + code FAR *next; /* next available space in table */ + const unsigned short FAR *base; /* base value table to use */ + const unsigned short FAR *extra; /* extra bits table to use */ + unsigned match; /* use base and extra for symbol >= match */ + unsigned short count[MAXBITS+1]; /* number of codes of each length */ + unsigned short offs[MAXBITS+1]; /* offsets in table for each length */ + static const unsigned short lbase[31] = { /* Length codes 257..285 base */ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; + static const unsigned short lext[31] = { /* Length codes 257..285 extra */ + 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, + 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 77, 202}; + static const unsigned short dbase[32] = { /* Distance codes 0..29 base */ + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, + 8193, 12289, 16385, 24577, 0, 0}; + static const unsigned short dext[32] = { /* Distance codes 0..29 extra */ + 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, + 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, + 28, 28, 29, 29, 64, 64}; + + /* + Process a set of code lengths to create a canonical Huffman code. The + code lengths are lens[0..codes-1]. Each length corresponds to the + symbols 0..codes-1. The Huffman code is generated by first sorting the + symbols by length from short to long, and retaining the symbol order + for codes with equal lengths. Then the code starts with all zero bits + for the first code of the shortest length, and the codes are integer + increments for the same length, and zeros are appended as the length + increases. For the deflate format, these bits are stored backwards + from their more natural integer increment ordering, and so when the + decoding tables are built in the large loop below, the integer codes + are incremented backwards. + + This routine assumes, but does not check, that all of the entries in + lens[] are in the range 0..MAXBITS. The caller must assure this. + 1..MAXBITS is interpreted as that code length. zero means that that + symbol does not occur in this code. + + The codes are sorted by computing a count of codes for each length, + creating from that a table of starting indices for each length in the + sorted table, and then entering the symbols in order in the sorted + table. The sorted table is work[], with that space being provided by + the caller. + + The length counts are used for other purposes as well, i.e. finding + the minimum and maximum length codes, determining if there are any + codes at all, checking for a valid set of lengths, and looking ahead + at length counts to determine sub-table sizes when building the + decoding tables. + */ + + /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */ + for (len = 0; len <= MAXBITS; len++) + count[len] = 0; + for (sym = 0; sym < codes; sym++) + count[lens[sym]]++; + + /* bound code lengths, force root to be within code lengths */ + root = *bits; + for (max = MAXBITS; max >= 1; max--) + if (count[max] != 0) break; + if (root > max) root = max; + if (max == 0) { /* no symbols to code at all */ + here.op = (unsigned char)64; /* invalid code marker */ + here.bits = (unsigned char)1; + here.val = (unsigned short)0; + *(*table)++ = here; /* make a table to force an error */ + *(*table)++ = here; + *bits = 1; + return 0; /* no symbols, but wait for decoding to report error */ + } + for (min = 1; min < max; min++) + if (count[min] != 0) break; + if (root < min) root = min; + + /* check for an over-subscribed or incomplete set of lengths */ + left = 1; + for (len = 1; len <= MAXBITS; len++) { + left <<= 1; + left -= count[len]; + if (left < 0) return -1; /* over-subscribed */ + } + if (left > 0 && (type == CODES || max != 1)) + return -1; /* incomplete set */ + + /* generate offsets into symbol table for each length for sorting */ + offs[1] = 0; + for (len = 1; len < MAXBITS; len++) + offs[len + 1] = offs[len] + count[len]; + + /* sort symbols by length, by symbol order within each length */ + for (sym = 0; sym < codes; sym++) + if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym; + + /* + Create and fill in decoding tables. In this loop, the table being + filled is at next and has curr index bits. The code being used is huff + with length len. That code is converted to an index by dropping drop + bits off of the bottom. For codes where len is less than drop + curr, + those top drop + curr - len bits are incremented through all values to + fill the table with replicated entries. + + root is the number of index bits for the root table. When len exceeds + root, sub-tables are created pointed to by the root entry with an index + of the low root bits of huff. This is saved in low to check for when a + new sub-table should be started. drop is zero when the root table is + being filled, and drop is root when sub-tables are being filled. + + When a new sub-table is needed, it is necessary to look ahead in the + code lengths to determine what size sub-table is needed. The length + counts are used for this, and so count[] is decremented as codes are + entered in the tables. + + used keeps track of how many table entries have been allocated from the + provided *table space. It is checked for LENS and DIST tables against + the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in + the initial root table size constants. See the comments in inftrees.h + for more information. + + sym increments through all symbols, and the loop terminates when + all codes of length max, i.e. all codes, have been processed. This + routine permits incomplete codes, so another loop after this one fills + in the rest of the decoding tables with invalid code markers. + */ + + /* set up for code type */ + switch (type) { + case CODES: + base = extra = work; /* dummy value--not used */ + match = 20; + break; + case LENS: + base = lbase; + extra = lext; + match = 257; + break; + default: /* DISTS */ + base = dbase; + extra = dext; + match = 0; + } + + /* initialize state for loop */ + huff = 0; /* starting code */ + sym = 0; /* starting code symbol */ + len = min; /* starting code length */ + next = *table; /* current table to fill in */ + curr = root; /* current table index bits */ + drop = 0; /* current bits to drop from code for index */ + low = (unsigned)(-1); /* trigger new sub-table when len > root */ + used = 1U << root; /* use root table entries */ + mask = used - 1; /* mask for comparing low */ + + /* check available table space */ + if ((type == LENS && used > ENOUGH_LENS) || + (type == DISTS && used > ENOUGH_DISTS)) + return 1; + + /* process all codes and make table entries */ + for (;;) { + /* create table entry */ + here.bits = (unsigned char)(len - drop); + if (work[sym] + 1U < match) { + here.op = (unsigned char)0; + here.val = work[sym]; + } + else if (work[sym] >= match) { + here.op = (unsigned char)(extra[work[sym] - match]); + here.val = base[work[sym] - match]; + } + else { + here.op = (unsigned char)(32 + 64); /* end of block */ + here.val = 0; + } + + /* replicate for those indices with low len bits equal to huff */ + incr = 1U << (len - drop); + fill = 1U << curr; + min = fill; /* save offset to next table */ + do { + fill -= incr; + next[(huff >> drop) + fill] = here; + } while (fill != 0); + + /* backwards increment the len-bit code huff */ + incr = 1U << (len - 1); + while (huff & incr) + incr >>= 1; + if (incr != 0) { + huff &= incr - 1; + huff += incr; + } + else + huff = 0; + + /* go to next symbol, update count, len */ + sym++; + if (--(count[len]) == 0) { + if (len == max) break; + len = lens[work[sym]]; + } + + /* create new sub-table if needed */ + if (len > root && (huff & mask) != low) { + /* if first time, transition to sub-tables */ + if (drop == 0) + drop = root; + + /* increment past last table */ + next += min; /* here min is 1 << curr */ + + /* determine length of next table */ + curr = len - drop; + left = (int)(1 << curr); + while (curr + drop < max) { + left -= count[curr + drop]; + if (left <= 0) break; + curr++; + left <<= 1; + } + + /* check for enough space */ + used += 1U << curr; + if ((type == LENS && used > ENOUGH_LENS) || + (type == DISTS && used > ENOUGH_DISTS)) + return 1; + + /* point entry in root table to sub-table */ + low = huff & mask; + (*table)[low].op = (unsigned char)curr; + (*table)[low].bits = (unsigned char)root; + (*table)[low].val = (unsigned short)(next - *table); + } + } + + /* fill in remaining table entry if code is incomplete (guaranteed to have + at most one remaining entry, since if the code is incomplete, the + maximum code length that was allowed to get this far is one bit) */ + if (huff != 0) { + here.op = (unsigned char)64; /* invalid code marker */ + here.bits = (unsigned char)(len - drop); + here.val = (unsigned short)0; + next[huff] = here; + } + + /* set return parameters */ + *table += used; + *bits = root; + return 0; +} diff --git a/deps/SZ/zlib/inftrees.h b/deps/SZ/zlib/inftrees.h new file mode 100644 index 0000000000000000000000000000000000000000..baa53a0b1a199ce6ea4c3f99d0306502ab4fab2c --- /dev/null +++ b/deps/SZ/zlib/inftrees.h @@ -0,0 +1,62 @@ +/* inftrees.h -- header to use inftrees.c + * Copyright (C) 1995-2005, 2010 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* Structure for decoding tables. Each entry provides either the + information needed to do the operation requested by the code that + indexed that table entry, or it provides a pointer to another + table that indexes more bits of the code. op indicates whether + the entry is a pointer to another table, a literal, a length or + distance, an end-of-block, or an invalid code. For a table + pointer, the low four bits of op is the number of index bits of + that table. For a length or distance, the low four bits of op + is the number of extra bits to get after the code. bits is + the number of bits in this code or part of the code to drop off + of the bit buffer. val is the actual byte to output in the case + of a literal, the base length or distance, or the offset from + the current table to the next table. Each entry is four bytes. */ +typedef struct { + unsigned char op; /* operation, extra bits, table bits */ + unsigned char bits; /* bits in this part of the code */ + unsigned short val; /* offset in table or code value */ +} code; + +/* op values as set by inflate_table(): + 00000000 - literal + 0000tttt - table link, tttt != 0 is the number of table index bits + 0001eeee - length or distance, eeee is the number of extra bits + 01100000 - end of block + 01000000 - invalid code + */ + +/* Maximum size of the dynamic table. The maximum number of code structures is + 1444, which is the sum of 852 for literal/length codes and 592 for distance + codes. These values were found by exhaustive searches using the program + examples/enough.c found in the zlib distribtution. The arguments to that + program are the number of symbols, the initial root table size, and the + maximum bit length of a code. "enough 286 9 15" for literal/length codes + returns returns 852, and "enough 30 6 15" for distance codes returns 592. + The initial root table size (9 or 6) is found in the fifth argument of the + inflate_table() calls in inflate.c and infback.c. If the root table size is + changed, then these maximum sizes would be need to be recalculated and + updated. */ +#define ENOUGH_LENS 852 +#define ENOUGH_DISTS 592 +#define ENOUGH (ENOUGH_LENS+ENOUGH_DISTS) + +/* Type of code to build for inflate_table() */ +typedef enum { + CODES, + LENS, + DISTS +} codetype; + +int ZLIB_INTERNAL inflate_table OF((codetype type, unsigned short FAR *lens, + unsigned codes, code FAR * FAR *table, + unsigned FAR *bits, unsigned short FAR *work)); diff --git a/deps/SZ/zlib/trees.c b/deps/SZ/zlib/trees.c new file mode 100644 index 0000000000000000000000000000000000000000..50cf4b4571cfec347ce5891b76fcb6675fcb580d --- /dev/null +++ b/deps/SZ/zlib/trees.c @@ -0,0 +1,1203 @@ +/* trees.c -- output deflated data using Huffman coding + * Copyright (C) 1995-2017 Jean-loup Gailly + * detect_data_type() function provided freely by Cosmin Truta, 2006 + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * ALGORITHM + * + * The "deflation" process uses several Huffman trees. The more + * common source values are represented by shorter bit sequences. + * + * Each code tree is stored in a compressed form which is itself + * a Huffman encoding of the lengths of all the code strings (in + * ascending order by source values). The actual code strings are + * reconstructed from the lengths in the inflate process, as described + * in the deflate specification. + * + * REFERENCES + * + * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification". + * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc + * + * Storer, James A. + * Data Compression: Methods and Theory, pp. 49-50. + * Computer Science Press, 1988. ISBN 0-7167-8156-5. + * + * Sedgewick, R. + * Algorithms, p290. + * Addison-Wesley, 1983. ISBN 0-201-06672-6. + */ + +/* @(#) $Id$ */ + +/* #define GEN_TREES_H */ + +#include "deflate.h" + +#ifdef ZLIB_DEBUG +# include +#endif + +/* =========================================================================== + * Constants + */ + +#define MAX_BL_BITS 7 +/* Bit length codes must not exceed MAX_BL_BITS bits */ + +#define END_BLOCK 256 +/* end of block literal code */ + +#define REP_3_6 16 +/* repeat previous bit length 3-6 times (2 bits of repeat count) */ + +#define REPZ_3_10 17 +/* repeat a zero length 3-10 times (3 bits of repeat count) */ + +#define REPZ_11_138 18 +/* repeat a zero length 11-138 times (7 bits of repeat count) */ + +local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ + = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0}; + +local const int extra_dbits[D_CODES] /* extra bits for each distance code */ + = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */ + = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7}; + +local const uch bl_order[BL_CODES] + = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; +/* The lengths of the bit length codes are sent in order of decreasing + * probability, to avoid transmitting the lengths for unused bit length codes. + */ + +/* =========================================================================== + * Local data. These are initialized only once. + */ + +#define DIST_CODE_LEN 512 /* see definition of array dist_code below */ + +#if defined(GEN_TREES_H) || !defined(STDC) +/* non ANSI compilers may not accept trees.h */ + +local ct_data static_ltree[L_CODES+2]; +/* The static literal tree. Since the bit lengths are imposed, there is no + * need for the L_CODES extra codes used during heap construction. However + * The codes 286 and 287 are needed to build a canonical tree (see _tr_init + * below). + */ + +local ct_data static_dtree[D_CODES]; +/* The static distance tree. (Actually a trivial tree since all codes use + * 5 bits.) + */ + +uch _dist_code[DIST_CODE_LEN]; +/* Distance codes. The first 256 values correspond to the distances + * 3 .. 258, the last 256 values correspond to the top 8 bits of + * the 15 bit distances. + */ + +uch _length_code[MAX_MATCH-MIN_MATCH+1]; +/* length code for each normalized match length (0 == MIN_MATCH) */ + +local int base_length[LENGTH_CODES]; +/* First normalized length for each code (0 = MIN_MATCH) */ + +local int base_dist[D_CODES]; +/* First normalized distance for each code (0 = distance of 1) */ + +#else +# include "trees.h" +#endif /* GEN_TREES_H */ + +struct static_tree_desc_s { + const ct_data *static_tree; /* static tree or NULL */ + const intf *extra_bits; /* extra bits for each code or NULL */ + int extra_base; /* base index for extra_bits */ + int elems; /* max number of elements in the tree */ + int max_length; /* max bit length for the codes */ +}; + +local const static_tree_desc static_l_desc = +{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS}; + +local const static_tree_desc static_d_desc = +{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS}; + +local const static_tree_desc static_bl_desc = +{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS}; + +/* =========================================================================== + * Local (static) routines in this file. + */ + +local void tr_static_init OF((void)); +local void init_block OF((deflate_state *s)); +local void pqdownheap OF((deflate_state *s, ct_data *tree, int k)); +local void gen_bitlen OF((deflate_state *s, tree_desc *desc)); +local void gen_codes OF((ct_data *tree, int max_code, ushf *bl_count)); +local void build_tree OF((deflate_state *s, tree_desc *desc)); +local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code)); +local void send_tree OF((deflate_state *s, ct_data *tree, int max_code)); +local int build_bl_tree OF((deflate_state *s)); +local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, + int blcodes)); +local void compress_block OF((deflate_state *s, const ct_data *ltree, + const ct_data *dtree)); +local int detect_data_type OF((deflate_state *s)); +local unsigned bi_reverse OF((unsigned value, int length)); +local void bi_windup OF((deflate_state *s)); +local void bi_flush OF((deflate_state *s)); + +#ifdef GEN_TREES_H +local void gen_trees_header OF((void)); +#endif + +#ifndef ZLIB_DEBUG +# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len) + /* Send a code of the given tree. c and tree must not have side effects */ + +#else /* !ZLIB_DEBUG */ +# define send_code(s, c, tree) \ + { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \ + send_bits(s, tree[c].Code, tree[c].Len); } +#endif + +/* =========================================================================== + * Output a short LSB first on the stream. + * IN assertion: there is enough room in pendingBuf. + */ +#define put_short(s, w) { \ + put_byte(s, (uch)((w) & 0xff)); \ + put_byte(s, (uch)((ush)(w) >> 8)); \ +} + +/* =========================================================================== + * Send a value on a given number of bits. + * IN assertion: length <= 16 and value fits in length bits. + */ +#ifdef ZLIB_DEBUG +local void send_bits OF((deflate_state *s, int value, int length)); + +local void send_bits(s, value, length) + deflate_state *s; + int value; /* value to send */ + int length; /* number of bits */ +{ + Tracevv((stderr," l %2d v %4x ", length, value)); + Assert(length > 0 && length <= 15, "invalid length"); + s->bits_sent += (ulg)length; + + /* If not enough room in bi_buf, use (valid) bits from bi_buf and + * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) + * unused bits in value. + */ + if (s->bi_valid > (int)Buf_size - length) { + s->bi_buf |= (ush)value << s->bi_valid; + put_short(s, s->bi_buf); + s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); + s->bi_valid += length - Buf_size; + } else { + s->bi_buf |= (ush)value << s->bi_valid; + s->bi_valid += length; + } +} +#else /* !ZLIB_DEBUG */ + +#define send_bits(s, value, length) \ +{ int len = length;\ + if (s->bi_valid > (int)Buf_size - len) {\ + int val = (int)value;\ + s->bi_buf |= (ush)val << s->bi_valid;\ + put_short(s, s->bi_buf);\ + s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ + s->bi_valid += len - Buf_size;\ + } else {\ + s->bi_buf |= (ush)(value) << s->bi_valid;\ + s->bi_valid += len;\ + }\ +} +#endif /* ZLIB_DEBUG */ + + +/* the arguments must not have side effects */ + +/* =========================================================================== + * Initialize the various 'constant' tables. + */ +local void tr_static_init() +{ +#if defined(GEN_TREES_H) || !defined(STDC) + static int static_init_done = 0; + int n; /* iterates over tree elements */ + int bits; /* bit counter */ + int length; /* length value */ + int code; /* code value */ + int dist; /* distance index */ + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + if (static_init_done) return; + + /* For some embedded targets, global variables are not initialized: */ +#ifdef NO_INIT_GLOBAL_POINTERS + static_l_desc.static_tree = static_ltree; + static_l_desc.extra_bits = extra_lbits; + static_d_desc.static_tree = static_dtree; + static_d_desc.extra_bits = extra_dbits; + static_bl_desc.extra_bits = extra_blbits; +#endif + + /* Initialize the mapping length (0..255) -> length code (0..28) */ + length = 0; + for (code = 0; code < LENGTH_CODES-1; code++) { + base_length[code] = length; + for (n = 0; n < (1< dist code (0..29) */ + dist = 0; + for (code = 0 ; code < 16; code++) { + base_dist[code] = dist; + for (n = 0; n < (1<>= 7; /* from now on, all distances are divided by 128 */ + for ( ; code < D_CODES; code++) { + base_dist[code] = dist << 7; + for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) { + _dist_code[256 + dist++] = (uch)code; + } + } + Assert (dist == 256, "tr_static_init: 256+dist != 512"); + + /* Construct the codes of the static literal tree */ + for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; + n = 0; + while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++; + while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++; + while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++; + while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++; + /* Codes 286 and 287 do not exist, but we must include them in the + * tree construction to get a canonical Huffman tree (longest code + * all ones) + */ + gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count); + + /* The static distance tree is trivial: */ + for (n = 0; n < D_CODES; n++) { + static_dtree[n].Len = 5; + static_dtree[n].Code = bi_reverse((unsigned)n, 5); + } + static_init_done = 1; + +# ifdef GEN_TREES_H + gen_trees_header(); +# endif +#endif /* defined(GEN_TREES_H) || !defined(STDC) */ +} + +/* =========================================================================== + * Genererate the file trees.h describing the static trees. + */ +#ifdef GEN_TREES_H +# ifndef ZLIB_DEBUG +# include +# endif + +# define SEPARATOR(i, last, width) \ + ((i) == (last)? "\n};\n\n" : \ + ((i) % (width) == (width)-1 ? ",\n" : ", ")) + +void gen_trees_header() +{ + FILE *header = fopen("trees.h", "w"); + int i; + + Assert (header != NULL, "Can't open trees.h"); + fprintf(header, + "/* header created automatically with -DGEN_TREES_H */\n\n"); + + fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n"); + for (i = 0; i < L_CODES+2; i++) { + fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code, + static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5)); + } + + fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n"); + for (i = 0; i < D_CODES; i++) { + fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code, + static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5)); + } + + fprintf(header, "const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {\n"); + for (i = 0; i < DIST_CODE_LEN; i++) { + fprintf(header, "%2u%s", _dist_code[i], + SEPARATOR(i, DIST_CODE_LEN-1, 20)); + } + + fprintf(header, + "const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {\n"); + for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) { + fprintf(header, "%2u%s", _length_code[i], + SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20)); + } + + fprintf(header, "local const int base_length[LENGTH_CODES] = {\n"); + for (i = 0; i < LENGTH_CODES; i++) { + fprintf(header, "%1u%s", base_length[i], + SEPARATOR(i, LENGTH_CODES-1, 20)); + } + + fprintf(header, "local const int base_dist[D_CODES] = {\n"); + for (i = 0; i < D_CODES; i++) { + fprintf(header, "%5u%s", base_dist[i], + SEPARATOR(i, D_CODES-1, 10)); + } + + fclose(header); +} +#endif /* GEN_TREES_H */ + +/* =========================================================================== + * Initialize the tree data structures for a new zlib stream. + */ +void ZLIB_INTERNAL _tr_init(s) + deflate_state *s; +{ + tr_static_init(); + + s->l_desc.dyn_tree = s->dyn_ltree; + s->l_desc.stat_desc = &static_l_desc; + + s->d_desc.dyn_tree = s->dyn_dtree; + s->d_desc.stat_desc = &static_d_desc; + + s->bl_desc.dyn_tree = s->bl_tree; + s->bl_desc.stat_desc = &static_bl_desc; + + s->bi_buf = 0; + s->bi_valid = 0; +#ifdef ZLIB_DEBUG + s->compressed_len = 0L; + s->bits_sent = 0L; +#endif + + /* Initialize the first block of the first file: */ + init_block(s); +} + +/* =========================================================================== + * Initialize a new block. + */ +local void init_block(s) + deflate_state *s; +{ + int n; /* iterates over tree elements */ + + /* Initialize the trees. */ + for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0; + for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0; + for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0; + + s->dyn_ltree[END_BLOCK].Freq = 1; + s->opt_len = s->static_len = 0L; + s->last_lit = s->matches = 0; +} + +#define SMALLEST 1 +/* Index within the heap array of least frequent node in the Huffman tree */ + + +/* =========================================================================== + * Remove the smallest element from the heap and recreate the heap with + * one less element. Updates heap and heap_len. + */ +#define pqremove(s, tree, top) \ +{\ + top = s->heap[SMALLEST]; \ + s->heap[SMALLEST] = s->heap[s->heap_len--]; \ + pqdownheap(s, tree, SMALLEST); \ +} + +/* =========================================================================== + * Compares to subtrees, using the tree depth as tie breaker when + * the subtrees have equal frequency. This minimizes the worst case length. + */ +#define smaller(tree, n, m, depth) \ + (tree[n].Freq < tree[m].Freq || \ + (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m])) + +/* =========================================================================== + * Restore the heap property by moving down the tree starting at node k, + * exchanging a node with the smallest of its two sons if necessary, stopping + * when the heap property is re-established (each father smaller than its + * two sons). + */ +local void pqdownheap(s, tree, k) + deflate_state *s; + ct_data *tree; /* the tree to restore */ + int k; /* node to move down */ +{ + int v = s->heap[k]; + int j = k << 1; /* left son of k */ + while (j <= s->heap_len) { + /* Set j to the smallest of the two sons: */ + if (j < s->heap_len && + smaller(tree, s->heap[j+1], s->heap[j], s->depth)) { + j++; + } + /* Exit if v is smaller than both sons */ + if (smaller(tree, v, s->heap[j], s->depth)) break; + + /* Exchange v with the smallest son */ + s->heap[k] = s->heap[j]; k = j; + + /* And continue down the tree, setting j to the left son of k */ + j <<= 1; + } + s->heap[k] = v; +} + +/* =========================================================================== + * Compute the optimal bit lengths for a tree and update the total bit length + * for the current block. + * IN assertion: the fields freq and dad are set, heap[heap_max] and + * above are the tree nodes sorted by increasing frequency. + * OUT assertions: the field len is set to the optimal bit length, the + * array bl_count contains the frequencies for each bit length. + * The length opt_len is updated; static_len is also updated if stree is + * not null. + */ +local void gen_bitlen(s, desc) + deflate_state *s; + tree_desc *desc; /* the tree descriptor */ +{ + ct_data *tree = desc->dyn_tree; + int max_code = desc->max_code; + const ct_data *stree = desc->stat_desc->static_tree; + const intf *extra = desc->stat_desc->extra_bits; + int base = desc->stat_desc->extra_base; + int max_length = desc->stat_desc->max_length; + int h; /* heap index */ + int n, m; /* iterate over the tree elements */ + int bits; /* bit length */ + int xbits; /* extra bits */ + ush f; /* frequency */ + int overflow = 0; /* number of elements with bit length too large */ + + for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0; + + /* In a first pass, compute the optimal bit lengths (which may + * overflow in the case of the bit length tree). + */ + tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */ + + for (h = s->heap_max+1; h < HEAP_SIZE; h++) { + n = s->heap[h]; + bits = tree[tree[n].Dad].Len + 1; + if (bits > max_length) bits = max_length, overflow++; + tree[n].Len = (ush)bits; + /* We overwrite tree[n].Dad which is no longer needed */ + + if (n > max_code) continue; /* not a leaf node */ + + s->bl_count[bits]++; + xbits = 0; + if (n >= base) xbits = extra[n-base]; + f = tree[n].Freq; + s->opt_len += (ulg)f * (unsigned)(bits + xbits); + if (stree) s->static_len += (ulg)f * (unsigned)(stree[n].Len + xbits); + } + if (overflow == 0) return; + + Tracev((stderr,"\nbit length overflow\n")); + /* This happens for example on obj2 and pic of the Calgary corpus */ + + /* Find the first bit length which could increase: */ + do { + bits = max_length-1; + while (s->bl_count[bits] == 0) bits--; + s->bl_count[bits]--; /* move one leaf down the tree */ + s->bl_count[bits+1] += 2; /* move one overflow item as its brother */ + s->bl_count[max_length]--; + /* The brother of the overflow item also moves one step up, + * but this does not affect bl_count[max_length] + */ + overflow -= 2; + } while (overflow > 0); + + /* Now recompute all bit lengths, scanning in increasing frequency. + * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all + * lengths instead of fixing only the wrong ones. This idea is taken + * from 'ar' written by Haruhiko Okumura.) + */ + for (bits = max_length; bits != 0; bits--) { + n = s->bl_count[bits]; + while (n != 0) { + m = s->heap[--h]; + if (m > max_code) continue; + if ((unsigned) tree[m].Len != (unsigned) bits) { + Tracev((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); + s->opt_len += ((ulg)bits - tree[m].Len) * tree[m].Freq; + tree[m].Len = (ush)bits; + } + n--; + } + } +} + +/* =========================================================================== + * Generate the codes for a given tree and bit counts (which need not be + * optimal). + * IN assertion: the array bl_count contains the bit length statistics for + * the given tree and the field len is set for all tree elements. + * OUT assertion: the field code is set for all tree elements of non + * zero code length. + */ +local void gen_codes (tree, max_code, bl_count) + ct_data *tree; /* the tree to decorate */ + int max_code; /* largest code with non zero frequency */ + ushf *bl_count; /* number of codes at each bit length */ +{ + ush next_code[MAX_BITS+1]; /* next code value for each bit length */ + unsigned code = 0; /* running code value */ + int bits; /* bit index */ + int n; /* code index */ + + /* The distribution counts are first used to generate the code values + * without bit reversal. + */ + for (bits = 1; bits <= MAX_BITS; bits++) { + code = (code + bl_count[bits-1]) << 1; + next_code[bits] = (ush)code; + } + /* Check that the bit counts in bl_count are consistent. The last code + * must be all ones. + */ + Assert (code + bl_count[MAX_BITS]-1 == (1<dyn_tree; + const ct_data *stree = desc->stat_desc->static_tree; + int elems = desc->stat_desc->elems; + int n, m; /* iterate over heap elements */ + int max_code = -1; /* largest code with non zero frequency */ + int node; /* new node being created */ + + /* Construct the initial heap, with least frequent element in + * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. + * heap[0] is not used. + */ + s->heap_len = 0, s->heap_max = HEAP_SIZE; + + for (n = 0; n < elems; n++) { + if (tree[n].Freq != 0) { + s->heap[++(s->heap_len)] = max_code = n; + s->depth[n] = 0; + } else { + tree[n].Len = 0; + } + } + + /* The pkzip format requires that at least one distance code exists, + * and that at least one bit should be sent even if there is only one + * possible code. So to avoid special checks later on we force at least + * two codes of non zero frequency. + */ + while (s->heap_len < 2) { + node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0); + tree[node].Freq = 1; + s->depth[node] = 0; + s->opt_len--; if (stree) s->static_len -= stree[node].Len; + /* node is 0 or 1 so it does not have extra bits */ + } + desc->max_code = max_code; + + /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree, + * establish sub-heaps of increasing lengths: + */ + for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n); + + /* Construct the Huffman tree by repeatedly combining the least two + * frequent nodes. + */ + node = elems; /* next internal node of the tree */ + do { + pqremove(s, tree, n); /* n = node of least frequency */ + m = s->heap[SMALLEST]; /* m = node of next least frequency */ + + s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */ + s->heap[--(s->heap_max)] = m; + + /* Create a new node father of n and m */ + tree[node].Freq = tree[n].Freq + tree[m].Freq; + s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ? + s->depth[n] : s->depth[m]) + 1); + tree[n].Dad = tree[m].Dad = (ush)node; +#ifdef DUMP_BL_TREE + if (tree == s->bl_tree) { + fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)", + node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq); + } +#endif + /* and insert the new node in the heap */ + s->heap[SMALLEST] = node++; + pqdownheap(s, tree, SMALLEST); + + } while (s->heap_len >= 2); + + s->heap[--(s->heap_max)] = s->heap[SMALLEST]; + + /* At this point, the fields freq and dad are set. We can now + * generate the bit lengths. + */ + gen_bitlen(s, (tree_desc *)desc); + + /* The field len is now set, we can generate the bit codes */ + gen_codes ((ct_data *)tree, max_code, s->bl_count); +} + +/* =========================================================================== + * Scan a literal or distance tree to determine the frequencies of the codes + * in the bit length tree. + */ +local void scan_tree (s, tree, max_code) + deflate_state *s; + ct_data *tree; /* the tree to be scanned */ + int max_code; /* and its largest code of non zero frequency */ +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + if (nextlen == 0) max_count = 138, min_count = 3; + tree[max_code+1].Len = (ush)0xffff; /* guard */ + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + s->bl_tree[curlen].Freq += count; + } else if (curlen != 0) { + if (curlen != prevlen) s->bl_tree[curlen].Freq++; + s->bl_tree[REP_3_6].Freq++; + } else if (count <= 10) { + s->bl_tree[REPZ_3_10].Freq++; + } else { + s->bl_tree[REPZ_11_138].Freq++; + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Send a literal or distance tree in compressed form, using the codes in + * bl_tree. + */ +local void send_tree (s, tree, max_code) + deflate_state *s; + ct_data *tree; /* the tree to be scanned */ + int max_code; /* and its largest code of non zero frequency */ +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + /* tree[max_code+1].Len = -1; */ /* guard already set */ + if (nextlen == 0) max_count = 138, min_count = 3; + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + do { send_code(s, curlen, s->bl_tree); } while (--count != 0); + + } else if (curlen != 0) { + if (curlen != prevlen) { + send_code(s, curlen, s->bl_tree); count--; + } + Assert(count >= 3 && count <= 6, " 3_6?"); + send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2); + + } else if (count <= 10) { + send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3); + + } else { + send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7); + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Construct the Huffman tree for the bit lengths and return the index in + * bl_order of the last bit length code to send. + */ +local int build_bl_tree(s) + deflate_state *s; +{ + int max_blindex; /* index of last bit length code of non zero freq */ + + /* Determine the bit length frequencies for literal and distance trees */ + scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code); + scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code); + + /* Build the bit length tree: */ + build_tree(s, (tree_desc *)(&(s->bl_desc))); + /* opt_len now includes the length of the tree representations, except + * the lengths of the bit lengths codes and the 5+5+4 bits for the counts. + */ + + /* Determine the number of bit length codes to send. The pkzip format + * requires that at least 4 bit length codes be sent. (appnote.txt says + * 3 but the actual value used is 4.) + */ + for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) { + if (s->bl_tree[bl_order[max_blindex]].Len != 0) break; + } + /* Update opt_len to include the bit length tree and counts */ + s->opt_len += 3*((ulg)max_blindex+1) + 5+5+4; + Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", + s->opt_len, s->static_len)); + + return max_blindex; +} + +/* =========================================================================== + * Send the header for a block using dynamic Huffman trees: the counts, the + * lengths of the bit length codes, the literal tree and the distance tree. + * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. + */ +local void send_all_trees(s, lcodes, dcodes, blcodes) + deflate_state *s; + int lcodes, dcodes, blcodes; /* number of codes for each tree */ +{ + int rank; /* index in bl_order */ + + Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); + Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, + "too many codes"); + Tracev((stderr, "\nbl counts: ")); + send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */ + send_bits(s, dcodes-1, 5); + send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */ + for (rank = 0; rank < blcodes; rank++) { + Tracev((stderr, "\nbl code %2d ", bl_order[rank])); + send_bits(s, s->bl_tree[bl_order[rank]].Len, 3); + } + Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */ + Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */ + Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent)); +} + +/* =========================================================================== + * Send a stored block + */ +void ZLIB_INTERNAL _tr_stored_block(s, buf, stored_len, last) + deflate_state *s; + charf *buf; /* input block */ + ulg stored_len; /* length of input block */ + int last; /* one if this is the last block for a file */ +{ + send_bits(s, (STORED_BLOCK<<1)+last, 3); /* send block type */ + bi_windup(s); /* align on byte boundary */ + put_short(s, (ush)stored_len); + put_short(s, (ush)~stored_len); + zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len); + s->pending += stored_len; +#ifdef ZLIB_DEBUG + s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; + s->compressed_len += (stored_len + 4) << 3; + s->bits_sent += 2*16; + s->bits_sent += stored_len<<3; +#endif +} + +/* =========================================================================== + * Flush the bits in the bit buffer to pending output (leaves at most 7 bits) + */ +void ZLIB_INTERNAL _tr_flush_bits(s) + deflate_state *s; +{ + bi_flush(s); +} + +/* =========================================================================== + * Send one empty static block to give enough lookahead for inflate. + * This takes 10 bits, of which 7 may remain in the bit buffer. + */ +void ZLIB_INTERNAL _tr_align(s) + deflate_state *s; +{ + send_bits(s, STATIC_TREES<<1, 3); + send_code(s, END_BLOCK, static_ltree); +#ifdef ZLIB_DEBUG + s->compressed_len += 10L; /* 3 for block type, 7 for EOB */ +#endif + bi_flush(s); +} + +/* =========================================================================== + * Determine the best encoding for the current block: dynamic trees, static + * trees or store, and write out the encoded block. + */ +void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last) + deflate_state *s; + charf *buf; /* input block, or NULL if too old */ + ulg stored_len; /* length of input block */ + int last; /* one if this is the last block for a file */ +{ + ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ + int max_blindex = 0; /* index of last bit length code of non zero freq */ + + /* Build the Huffman trees unless a stored block is forced */ + if (s->level > 0) { + + /* Check if the file is binary or text */ + if (s->strm->data_type == Z_UNKNOWN) + s->strm->data_type = detect_data_type(s); + + /* Construct the literal and distance trees */ + build_tree(s, (tree_desc *)(&(s->l_desc))); + Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + + build_tree(s, (tree_desc *)(&(s->d_desc))); + Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + /* At this point, opt_len and static_len are the total bit lengths of + * the compressed block data, excluding the tree representations. + */ + + /* Build the bit length tree for the above two trees, and get the index + * in bl_order of the last bit length code to send. + */ + max_blindex = build_bl_tree(s); + + /* Determine the best encoding. Compute the block lengths in bytes. */ + opt_lenb = (s->opt_len+3+7)>>3; + static_lenb = (s->static_len+3+7)>>3; + + Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", + opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, + s->last_lit)); + + if (static_lenb <= opt_lenb) opt_lenb = static_lenb; + + } else { + Assert(buf != (char*)0, "lost buf"); + opt_lenb = static_lenb = stored_len + 5; /* force a stored block */ + } + +#ifdef FORCE_STORED + if (buf != (char*)0) { /* force stored block */ +#else + if (stored_len+4 <= opt_lenb && buf != (char*)0) { + /* 4: two words for the lengths */ +#endif + /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. + * Otherwise we can't have processed more than WSIZE input bytes since + * the last block flush, because compression would have been + * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to + * transform a block into a stored block. + */ + _tr_stored_block(s, buf, stored_len, last); + +#ifdef FORCE_STATIC + } else if (static_lenb >= 0) { /* force static trees */ +#else + } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) { +#endif + send_bits(s, (STATIC_TREES<<1)+last, 3); + compress_block(s, (const ct_data *)static_ltree, + (const ct_data *)static_dtree); +#ifdef ZLIB_DEBUG + s->compressed_len += 3 + s->static_len; +#endif + } else { + send_bits(s, (DYN_TREES<<1)+last, 3); + send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1, + max_blindex+1); + compress_block(s, (const ct_data *)s->dyn_ltree, + (const ct_data *)s->dyn_dtree); +#ifdef ZLIB_DEBUG + s->compressed_len += 3 + s->opt_len; +#endif + } + Assert (s->compressed_len == s->bits_sent, "bad compressed size"); + /* The above check is made mod 2^32, for files larger than 512 MB + * and uLong implemented on 32 bits. + */ + init_block(s); + + if (last) { + bi_windup(s); +#ifdef ZLIB_DEBUG + s->compressed_len += 7; /* align on byte boundary */ +#endif + } + Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3, + s->compressed_len-7*last)); +} + +/* =========================================================================== + * Save the match info and tally the frequency counts. Return true if + * the current block must be flushed. + */ +int ZLIB_INTERNAL _tr_tally (s, dist, lc) + deflate_state *s; + unsigned dist; /* distance of matched string */ + unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ +{ + s->d_buf[s->last_lit] = (ush)dist; + s->l_buf[s->last_lit++] = (uch)lc; + if (dist == 0) { + /* lc is the unmatched char */ + s->dyn_ltree[lc].Freq++; + } else { + s->matches++; + /* Here, lc is the match length - MIN_MATCH */ + dist--; /* dist = match distance - 1 */ + Assert((ush)dist < (ush)MAX_DIST(s) && + (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) && + (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match"); + + s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++; + s->dyn_dtree[d_code(dist)].Freq++; + } + +#ifdef TRUNCATE_BLOCK + /* Try to guess if it is profitable to stop the current block here */ + if ((s->last_lit & 0x1fff) == 0 && s->level > 2) { + /* Compute an upper bound for the compressed length */ + ulg out_length = (ulg)s->last_lit*8L; + ulg in_length = (ulg)((long)s->strstart - s->block_start); + int dcode; + for (dcode = 0; dcode < D_CODES; dcode++) { + out_length += (ulg)s->dyn_dtree[dcode].Freq * + (5L+extra_dbits[dcode]); + } + out_length >>= 3; + Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", + s->last_lit, in_length, out_length, + 100L - out_length*100L/in_length)); + if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; + } +#endif + return (s->last_lit == s->lit_bufsize-1); + /* We avoid equality with lit_bufsize because of wraparound at 64K + * on 16 bit machines and because stored blocks are restricted to + * 64K-1 bytes. + */ +} + +/* =========================================================================== + * Send the block data compressed using the given Huffman trees + */ +local void compress_block(s, ltree, dtree) + deflate_state *s; + const ct_data *ltree; /* literal tree */ + const ct_data *dtree; /* distance tree */ +{ + unsigned dist; /* distance of matched string */ + int lc; /* match length or unmatched char (if dist == 0) */ + unsigned lx = 0; /* running index in l_buf */ + unsigned code; /* the code to send */ + int extra; /* number of extra bits to send */ + + if (s->last_lit != 0) do { + dist = s->d_buf[lx]; + lc = s->l_buf[lx++]; + if (dist == 0) { + send_code(s, lc, ltree); /* send a literal byte */ + Tracecv(isgraph(lc), (stderr," '%c' ", lc)); + } else { + /* Here, lc is the match length - MIN_MATCH */ + code = _length_code[lc]; + send_code(s, code+LITERALS+1, ltree); /* send the length code */ + extra = extra_lbits[code]; + if (extra != 0) { + lc -= base_length[code]; + send_bits(s, lc, extra); /* send the extra length bits */ + } + dist--; /* dist is now the match distance - 1 */ + code = d_code(dist); + Assert (code < D_CODES, "bad d_code"); + + send_code(s, code, dtree); /* send the distance code */ + extra = extra_dbits[code]; + if (extra != 0) { + dist -= (unsigned)base_dist[code]; + send_bits(s, dist, extra); /* send the extra distance bits */ + } + } /* literal or match pair ? */ + + /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ + Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx, + "pendingBuf overflow"); + + } while (lx < s->last_lit); + + send_code(s, END_BLOCK, ltree); +} + +/* =========================================================================== + * Check if the data type is TEXT or BINARY, using the following algorithm: + * - TEXT if the two conditions below are satisfied: + * a) There are no non-portable control characters belonging to the + * "black list" (0..6, 14..25, 28..31). + * b) There is at least one printable character belonging to the + * "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255). + * - BINARY otherwise. + * - The following partially-portable control characters form a + * "gray list" that is ignored in this detection algorithm: + * (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}). + * IN assertion: the fields Freq of dyn_ltree are set. + */ +local int detect_data_type(s) + deflate_state *s; +{ + /* black_mask is the bit mask of black-listed bytes + * set bits 0..6, 14..25, and 28..31 + * 0xf3ffc07f = binary 11110011111111111100000001111111 + */ + unsigned long black_mask = 0xf3ffc07fUL; + int n; + + /* Check for non-textual ("black-listed") bytes. */ + for (n = 0; n <= 31; n++, black_mask >>= 1) + if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0)) + return Z_BINARY; + + /* Check for textual ("white-listed") bytes. */ + if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0 + || s->dyn_ltree[13].Freq != 0) + return Z_TEXT; + for (n = 32; n < LITERALS; n++) + if (s->dyn_ltree[n].Freq != 0) + return Z_TEXT; + + /* There are no "black-listed" or "white-listed" bytes: + * this stream either is empty or has tolerated ("gray-listed") bytes only. + */ + return Z_BINARY; +} + +/* =========================================================================== + * Reverse the first len bits of a code, using straightforward code (a faster + * method would use a table) + * IN assertion: 1 <= len <= 15 + */ +local unsigned bi_reverse(code, len) + unsigned code; /* the value to invert */ + int len; /* its bit length */ +{ + register unsigned res = 0; + do { + res |= code & 1; + code >>= 1, res <<= 1; + } while (--len > 0); + return res >> 1; +} + +/* =========================================================================== + * Flush the bit buffer, keeping at most 7 bits in it. + */ +local void bi_flush(s) + deflate_state *s; +{ + if (s->bi_valid == 16) { + put_short(s, s->bi_buf); + s->bi_buf = 0; + s->bi_valid = 0; + } else if (s->bi_valid >= 8) { + put_byte(s, (Byte)s->bi_buf); + s->bi_buf >>= 8; + s->bi_valid -= 8; + } +} + +/* =========================================================================== + * Flush the bit buffer and align the output on a byte boundary + */ +local void bi_windup(s) + deflate_state *s; +{ + if (s->bi_valid > 8) { + put_short(s, s->bi_buf); + } else if (s->bi_valid > 0) { + put_byte(s, (Byte)s->bi_buf); + } + s->bi_buf = 0; + s->bi_valid = 0; +#ifdef ZLIB_DEBUG + s->bits_sent = (s->bits_sent+7) & ~7; +#endif +} diff --git a/deps/SZ/zlib/trees.h b/deps/SZ/zlib/trees.h new file mode 100644 index 0000000000000000000000000000000000000000..d35639d82a27807e49ea35c334f8bbcf64720f82 --- /dev/null +++ b/deps/SZ/zlib/trees.h @@ -0,0 +1,128 @@ +/* header created automatically with -DGEN_TREES_H */ + +local const ct_data static_ltree[L_CODES+2] = { +{{ 12},{ 8}}, {{140},{ 8}}, {{ 76},{ 8}}, {{204},{ 8}}, {{ 44},{ 8}}, +{{172},{ 8}}, {{108},{ 8}}, {{236},{ 8}}, {{ 28},{ 8}}, {{156},{ 8}}, +{{ 92},{ 8}}, {{220},{ 8}}, {{ 60},{ 8}}, {{188},{ 8}}, {{124},{ 8}}, +{{252},{ 8}}, {{ 2},{ 8}}, {{130},{ 8}}, {{ 66},{ 8}}, {{194},{ 8}}, +{{ 34},{ 8}}, {{162},{ 8}}, {{ 98},{ 8}}, {{226},{ 8}}, {{ 18},{ 8}}, +{{146},{ 8}}, {{ 82},{ 8}}, {{210},{ 8}}, {{ 50},{ 8}}, {{178},{ 8}}, +{{114},{ 8}}, {{242},{ 8}}, {{ 10},{ 8}}, {{138},{ 8}}, {{ 74},{ 8}}, +{{202},{ 8}}, {{ 42},{ 8}}, {{170},{ 8}}, {{106},{ 8}}, {{234},{ 8}}, +{{ 26},{ 8}}, {{154},{ 8}}, {{ 90},{ 8}}, {{218},{ 8}}, {{ 58},{ 8}}, +{{186},{ 8}}, {{122},{ 8}}, {{250},{ 8}}, {{ 6},{ 8}}, {{134},{ 8}}, +{{ 70},{ 8}}, {{198},{ 8}}, {{ 38},{ 8}}, {{166},{ 8}}, {{102},{ 8}}, +{{230},{ 8}}, {{ 22},{ 8}}, {{150},{ 8}}, {{ 86},{ 8}}, {{214},{ 8}}, +{{ 54},{ 8}}, {{182},{ 8}}, {{118},{ 8}}, {{246},{ 8}}, {{ 14},{ 8}}, +{{142},{ 8}}, {{ 78},{ 8}}, {{206},{ 8}}, {{ 46},{ 8}}, {{174},{ 8}}, +{{110},{ 8}}, {{238},{ 8}}, {{ 30},{ 8}}, {{158},{ 8}}, {{ 94},{ 8}}, +{{222},{ 8}}, {{ 62},{ 8}}, {{190},{ 8}}, {{126},{ 8}}, {{254},{ 8}}, +{{ 1},{ 8}}, {{129},{ 8}}, {{ 65},{ 8}}, {{193},{ 8}}, {{ 33},{ 8}}, +{{161},{ 8}}, {{ 97},{ 8}}, {{225},{ 8}}, {{ 17},{ 8}}, {{145},{ 8}}, +{{ 81},{ 8}}, {{209},{ 8}}, {{ 49},{ 8}}, {{177},{ 8}}, {{113},{ 8}}, +{{241},{ 8}}, {{ 9},{ 8}}, {{137},{ 8}}, {{ 73},{ 8}}, {{201},{ 8}}, +{{ 41},{ 8}}, {{169},{ 8}}, {{105},{ 8}}, {{233},{ 8}}, {{ 25},{ 8}}, +{{153},{ 8}}, {{ 89},{ 8}}, {{217},{ 8}}, {{ 57},{ 8}}, {{185},{ 8}}, +{{121},{ 8}}, {{249},{ 8}}, {{ 5},{ 8}}, {{133},{ 8}}, {{ 69},{ 8}}, +{{197},{ 8}}, {{ 37},{ 8}}, {{165},{ 8}}, {{101},{ 8}}, {{229},{ 8}}, +{{ 21},{ 8}}, {{149},{ 8}}, {{ 85},{ 8}}, {{213},{ 8}}, {{ 53},{ 8}}, +{{181},{ 8}}, {{117},{ 8}}, {{245},{ 8}}, {{ 13},{ 8}}, {{141},{ 8}}, +{{ 77},{ 8}}, {{205},{ 8}}, {{ 45},{ 8}}, {{173},{ 8}}, {{109},{ 8}}, +{{237},{ 8}}, {{ 29},{ 8}}, {{157},{ 8}}, {{ 93},{ 8}}, {{221},{ 8}}, +{{ 61},{ 8}}, {{189},{ 8}}, {{125},{ 8}}, {{253},{ 8}}, {{ 19},{ 9}}, +{{275},{ 9}}, {{147},{ 9}}, {{403},{ 9}}, {{ 83},{ 9}}, {{339},{ 9}}, +{{211},{ 9}}, {{467},{ 9}}, {{ 51},{ 9}}, {{307},{ 9}}, {{179},{ 9}}, +{{435},{ 9}}, {{115},{ 9}}, {{371},{ 9}}, {{243},{ 9}}, {{499},{ 9}}, +{{ 11},{ 9}}, {{267},{ 9}}, {{139},{ 9}}, {{395},{ 9}}, {{ 75},{ 9}}, +{{331},{ 9}}, {{203},{ 9}}, {{459},{ 9}}, {{ 43},{ 9}}, {{299},{ 9}}, +{{171},{ 9}}, {{427},{ 9}}, {{107},{ 9}}, {{363},{ 9}}, {{235},{ 9}}, +{{491},{ 9}}, {{ 27},{ 9}}, {{283},{ 9}}, {{155},{ 9}}, {{411},{ 9}}, +{{ 91},{ 9}}, {{347},{ 9}}, {{219},{ 9}}, {{475},{ 9}}, {{ 59},{ 9}}, +{{315},{ 9}}, {{187},{ 9}}, {{443},{ 9}}, {{123},{ 9}}, {{379},{ 9}}, +{{251},{ 9}}, {{507},{ 9}}, {{ 7},{ 9}}, {{263},{ 9}}, {{135},{ 9}}, +{{391},{ 9}}, {{ 71},{ 9}}, {{327},{ 9}}, {{199},{ 9}}, {{455},{ 9}}, +{{ 39},{ 9}}, {{295},{ 9}}, {{167},{ 9}}, {{423},{ 9}}, {{103},{ 9}}, +{{359},{ 9}}, {{231},{ 9}}, {{487},{ 9}}, {{ 23},{ 9}}, {{279},{ 9}}, +{{151},{ 9}}, {{407},{ 9}}, {{ 87},{ 9}}, {{343},{ 9}}, {{215},{ 9}}, +{{471},{ 9}}, {{ 55},{ 9}}, {{311},{ 9}}, {{183},{ 9}}, {{439},{ 9}}, +{{119},{ 9}}, {{375},{ 9}}, {{247},{ 9}}, {{503},{ 9}}, {{ 15},{ 9}}, +{{271},{ 9}}, {{143},{ 9}}, {{399},{ 9}}, {{ 79},{ 9}}, {{335},{ 9}}, +{{207},{ 9}}, {{463},{ 9}}, {{ 47},{ 9}}, {{303},{ 9}}, {{175},{ 9}}, +{{431},{ 9}}, {{111},{ 9}}, {{367},{ 9}}, {{239},{ 9}}, {{495},{ 9}}, +{{ 31},{ 9}}, {{287},{ 9}}, {{159},{ 9}}, {{415},{ 9}}, {{ 95},{ 9}}, +{{351},{ 9}}, {{223},{ 9}}, {{479},{ 9}}, {{ 63},{ 9}}, {{319},{ 9}}, +{{191},{ 9}}, {{447},{ 9}}, {{127},{ 9}}, {{383},{ 9}}, {{255},{ 9}}, +{{511},{ 9}}, {{ 0},{ 7}}, {{ 64},{ 7}}, {{ 32},{ 7}}, {{ 96},{ 7}}, +{{ 16},{ 7}}, {{ 80},{ 7}}, {{ 48},{ 7}}, {{112},{ 7}}, {{ 8},{ 7}}, +{{ 72},{ 7}}, {{ 40},{ 7}}, {{104},{ 7}}, {{ 24},{ 7}}, {{ 88},{ 7}}, +{{ 56},{ 7}}, {{120},{ 7}}, {{ 4},{ 7}}, {{ 68},{ 7}}, {{ 36},{ 7}}, +{{100},{ 7}}, {{ 20},{ 7}}, {{ 84},{ 7}}, {{ 52},{ 7}}, {{116},{ 7}}, +{{ 3},{ 8}}, {{131},{ 8}}, {{ 67},{ 8}}, {{195},{ 8}}, {{ 35},{ 8}}, +{{163},{ 8}}, {{ 99},{ 8}}, {{227},{ 8}} +}; + +local const ct_data static_dtree[D_CODES] = { +{{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}}, +{{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}}, +{{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}}, +{{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}}, +{{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}}, +{{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}} +}; + +const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = { + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, + 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, +10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, +11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, +12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, +13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 16, 17, +18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, +23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 +}; + +const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, +13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, +17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, +19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, +21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, +22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, +23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, +25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28 +}; + +local const int base_length[LENGTH_CODES] = { +0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, +64, 80, 96, 112, 128, 160, 192, 224, 0 +}; + +local const int base_dist[D_CODES] = { + 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, + 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, + 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576 +}; + diff --git a/deps/SZ/zlib/uncompr.c b/deps/SZ/zlib/uncompr.c new file mode 100644 index 0000000000000000000000000000000000000000..f03a1a865e347d10ac16f6a70b2bc2fdc5235f9c --- /dev/null +++ b/deps/SZ/zlib/uncompr.c @@ -0,0 +1,93 @@ +/* uncompr.c -- decompress a memory buffer + * Copyright (C) 1995-2003, 2010, 2014, 2016 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#define ZLIB_INTERNAL +#include "zlib.h" + +/* =========================================================================== + Decompresses the source buffer into the destination buffer. *sourceLen is + the byte length of the source buffer. Upon entry, *destLen is the total size + of the destination buffer, which must be large enough to hold the entire + uncompressed data. (The size of the uncompressed data must have been saved + previously by the compressor and transmitted to the decompressor by some + mechanism outside the scope of this compression library.) Upon exit, + *destLen is the size of the decompressed data and *sourceLen is the number + of source bytes consumed. Upon return, source + *sourceLen points to the + first unused input byte. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, or + Z_DATA_ERROR if the input data was corrupted, including if the input data is + an incomplete zlib stream. +*/ +int ZEXPORT uncompress2 (dest, destLen, source, sourceLen) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong *sourceLen; +{ + z_stream stream; + int err; + const uInt max = (uInt)-1; + uLong len, left; + Byte buf[1]; /* for detection of incomplete stream when *destLen == 0 */ + + len = *sourceLen; + if (*destLen) { + left = *destLen; + *destLen = 0; + } + else { + left = 1; + dest = buf; + } + + stream.next_in = (z_const Bytef *)source; + stream.avail_in = 0; + stream.zalloc = (alloc_func)0; + stream.zfree = (free_func)0; + stream.opaque = (voidpf)0; + + err = inflateInit(&stream); + if (err != Z_OK) return err; + + stream.next_out = dest; + stream.avail_out = 0; + + do { + if (stream.avail_out == 0) { + stream.avail_out = left > (uLong)max ? max : (uInt)left; + left -= stream.avail_out; + } + if (stream.avail_in == 0) { + stream.avail_in = len > (uLong)max ? max : (uInt)len; + len -= stream.avail_in; + } + err = inflate(&stream, Z_NO_FLUSH); + } while (err == Z_OK); + + *sourceLen -= len + stream.avail_in; + if (dest != buf) + *destLen = stream.total_out; + else if (stream.total_out && err == Z_BUF_ERROR) + left = 1; + + inflateEnd(&stream); + return err == Z_STREAM_END ? Z_OK : + err == Z_NEED_DICT ? Z_DATA_ERROR : + err == Z_BUF_ERROR && left + stream.avail_out ? Z_DATA_ERROR : + err; +} + +int ZEXPORT uncompress (dest, destLen, source, sourceLen) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong sourceLen; +{ + return uncompress2(dest, destLen, source, &sourceLen); +} diff --git a/deps/SZ/zlib/zconf.h b/deps/SZ/zlib/zconf.h new file mode 100644 index 0000000000000000000000000000000000000000..77398c11a1e2c5cd2262559cc4c9d033d56b22eb --- /dev/null +++ b/deps/SZ/zlib/zconf.h @@ -0,0 +1,534 @@ +/* zconf.h -- configuration of the zlib compression library + * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#ifndef ZCONF_H +#define ZCONF_H + +/* + * If you *really* need a unique prefix for all types and library functions, + * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it. + * Even better than compiling with -DZ_PREFIX would be to use configure to set + * this permanently in zconf.h using "./configure --zprefix". + */ +#ifdef Z_PREFIX /* may be set to #if 1 by ./configure */ +# define Z_PREFIX_SET + +/* all linked symbols and init macros */ +# define _dist_code z__dist_code +# define _length_code z__length_code +# define _tr_align z__tr_align +# define _tr_flush_bits z__tr_flush_bits +# define _tr_flush_block z__tr_flush_block +# define _tr_init z__tr_init +# define _tr_stored_block z__tr_stored_block +# define _tr_tally z__tr_tally +# define adler32 z_adler32 +# define adler32_combine z_adler32_combine +# define adler32_combine64 z_adler32_combine64 +# define adler32_z z_adler32_z +# ifndef Z_SOLO +# define compress z_compress +# define compress2 z_compress2 +# define compressBound z_compressBound +# endif +# define crc32 z_crc32 +# define crc32_combine z_crc32_combine +# define crc32_combine64 z_crc32_combine64 +# define crc32_z z_crc32_z +# define deflate z_deflate +# define deflateBound z_deflateBound +# define deflateCopy z_deflateCopy +# define deflateEnd z_deflateEnd +# define deflateGetDictionary z_deflateGetDictionary +# define deflateInit z_deflateInit +# define deflateInit2 z_deflateInit2 +# define deflateInit2_ z_deflateInit2_ +# define deflateInit_ z_deflateInit_ +# define deflateParams z_deflateParams +# define deflatePending z_deflatePending +# define deflatePrime z_deflatePrime +# define deflateReset z_deflateReset +# define deflateResetKeep z_deflateResetKeep +# define deflateSetDictionary z_deflateSetDictionary +# define deflateSetHeader z_deflateSetHeader +# define deflateTune z_deflateTune +# define deflate_copyright z_deflate_copyright +# define get_crc_table z_get_crc_table +# ifndef Z_SOLO +# define gz_error z_gz_error +# define gz_intmax z_gz_intmax +# define gz_strwinerror z_gz_strwinerror +# define gzbuffer z_gzbuffer +# define gzclearerr z_gzclearerr +# define gzclose z_gzclose +# define gzclose_r z_gzclose_r +# define gzclose_w z_gzclose_w +# define gzdirect z_gzdirect +# define gzdopen z_gzdopen +# define gzeof z_gzeof +# define gzerror z_gzerror +# define gzflush z_gzflush +# define gzfread z_gzfread +# define gzfwrite z_gzfwrite +# define gzgetc z_gzgetc +# define gzgetc_ z_gzgetc_ +# define gzgets z_gzgets +# define gzoffset z_gzoffset +# define gzoffset64 z_gzoffset64 +# define gzopen z_gzopen +# define gzopen64 z_gzopen64 +# ifdef _WIN32 +# define gzopen_w z_gzopen_w +# endif +# define gzprintf z_gzprintf +# define gzputc z_gzputc +# define gzputs z_gzputs +# define gzread z_gzread +# define gzrewind z_gzrewind +# define gzseek z_gzseek +# define gzseek64 z_gzseek64 +# define gzsetparams z_gzsetparams +# define gztell z_gztell +# define gztell64 z_gztell64 +# define gzungetc z_gzungetc +# define gzvprintf z_gzvprintf +# define gzwrite z_gzwrite +# endif +# define inflate z_inflate +# define inflateBack z_inflateBack +# define inflateBackEnd z_inflateBackEnd +# define inflateBackInit z_inflateBackInit +# define inflateBackInit_ z_inflateBackInit_ +# define inflateCodesUsed z_inflateCodesUsed +# define inflateCopy z_inflateCopy +# define inflateEnd z_inflateEnd +# define inflateGetDictionary z_inflateGetDictionary +# define inflateGetHeader z_inflateGetHeader +# define inflateInit z_inflateInit +# define inflateInit2 z_inflateInit2 +# define inflateInit2_ z_inflateInit2_ +# define inflateInit_ z_inflateInit_ +# define inflateMark z_inflateMark +# define inflatePrime z_inflatePrime +# define inflateReset z_inflateReset +# define inflateReset2 z_inflateReset2 +# define inflateResetKeep z_inflateResetKeep +# define inflateSetDictionary z_inflateSetDictionary +# define inflateSync z_inflateSync +# define inflateSyncPoint z_inflateSyncPoint +# define inflateUndermine z_inflateUndermine +# define inflateValidate z_inflateValidate +# define inflate_copyright z_inflate_copyright +# define inflate_fast z_inflate_fast +# define inflate_table z_inflate_table +# ifndef Z_SOLO +# define uncompress z_uncompress +# define uncompress2 z_uncompress2 +# endif +# define zError z_zError +# ifndef Z_SOLO +# define zcalloc z_zcalloc +# define zcfree z_zcfree +# endif +# define zlibCompileFlags z_zlibCompileFlags +# define zlibVersion z_zlibVersion + +/* all zlib typedefs in zlib.h and zconf.h */ +# define Byte z_Byte +# define Bytef z_Bytef +# define alloc_func z_alloc_func +# define charf z_charf +# define free_func z_free_func +# ifndef Z_SOLO +# define gzFile z_gzFile +# endif +# define gz_header z_gz_header +# define gz_headerp z_gz_headerp +# define in_func z_in_func +# define intf z_intf +# define out_func z_out_func +# define uInt z_uInt +# define uIntf z_uIntf +# define uLong z_uLong +# define uLongf z_uLongf +# define voidp z_voidp +# define voidpc z_voidpc +# define voidpf z_voidpf + +/* all zlib structs in zlib.h and zconf.h */ +# define gz_header_s z_gz_header_s +# define internal_state z_internal_state + +#endif + +#if defined(__MSDOS__) && !defined(MSDOS) +# define MSDOS +#endif +#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2) +# define OS2 +#endif +#if defined(_WINDOWS) && !defined(WINDOWS) +# define WINDOWS +#endif +#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__) +# ifndef WIN32 +# define WIN32 +# endif +#endif +#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32) +# if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__) +# ifndef SYS16BIT +# define SYS16BIT +# endif +# endif +#endif + +/* + * Compile with -DMAXSEG_64K if the alloc function cannot allocate more + * than 64k bytes at a time (needed on systems with 16-bit int). + */ +#ifdef SYS16BIT +# define MAXSEG_64K +#endif +#ifdef MSDOS +# define UNALIGNED_OK +#endif + +#ifdef __STDC_VERSION__ +# ifndef STDC +# define STDC +# endif +# if __STDC_VERSION__ >= 199901L +# ifndef STDC99 +# define STDC99 +# endif +# endif +#endif +#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus)) +# define STDC +#endif +#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__)) +# define STDC +#endif +#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32)) +# define STDC +#endif +#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__)) +# define STDC +#endif + +#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */ +# define STDC +#endif + +#ifndef STDC +# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */ +# define const /* note: need a more gentle solution here */ +# endif +#endif + +#if defined(ZLIB_CONST) && !defined(z_const) +# define z_const const +#else +# define z_const +#endif + +#ifdef Z_SOLO + typedef unsigned long z_size_t; +#else +# define z_longlong long long +# if defined(NO_SIZE_T) + typedef unsigned NO_SIZE_T z_size_t; +# elif defined(STDC) +# include + typedef size_t z_size_t; +# else + typedef unsigned long z_size_t; +# endif +# undef z_longlong +#endif + +/* Maximum value for memLevel in deflateInit2 */ +#ifndef MAX_MEM_LEVEL +# ifdef MAXSEG_64K +# define MAX_MEM_LEVEL 8 +# else +# define MAX_MEM_LEVEL 9 +# endif +#endif + +/* Maximum value for windowBits in deflateInit2 and inflateInit2. + * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files + * created by gzip. (Files created by minigzip can still be extracted by + * gzip.) + */ +#ifndef MAX_WBITS +# define MAX_WBITS 15 /* 32K LZ77 window */ +#endif + +/* The memory requirements for deflate are (in bytes): + (1 << (windowBits+2)) + (1 << (memLevel+9)) + that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) + plus a few kilobytes for small objects. For example, if you want to reduce + the default memory requirements from 256K to 128K, compile with + make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" + Of course this will generally degrade compression (there's no free lunch). + + The memory requirements for inflate are (in bytes) 1 << windowBits + that is, 32K for windowBits=15 (default value) plus about 7 kilobytes + for small objects. +*/ + + /* Type declarations */ + +#ifndef OF /* function prototypes */ +# ifdef STDC +# define OF(args) args +# else +# define OF(args) () +# endif +#endif + +#ifndef Z_ARG /* function prototypes for stdarg */ +# if defined(STDC) || defined(Z_HAVE_STDARG_H) +# define Z_ARG(args) args +# else +# define Z_ARG(args) () +# endif +#endif + +/* The following definitions for FAR are needed only for MSDOS mixed + * model programming (small or medium model with some far allocations). + * This was tested only with MSC; for other MSDOS compilers you may have + * to define NO_MEMCPY in zutil.h. If you don't need the mixed model, + * just define FAR to be empty. + */ +#ifdef SYS16BIT +# if defined(M_I86SM) || defined(M_I86MM) + /* MSC small or medium model */ +# define SMALL_MEDIUM +# ifdef _MSC_VER +# define FAR _far +# else +# define FAR far +# endif +# endif +# if (defined(__SMALL__) || defined(__MEDIUM__)) + /* Turbo C small or medium model */ +# define SMALL_MEDIUM +# ifdef __BORLANDC__ +# define FAR _far +# else +# define FAR far +# endif +# endif +#endif + +#if defined(WINDOWS) || defined(WIN32) + /* If building or using zlib as a DLL, define ZLIB_DLL. + * This is not mandatory, but it offers a little performance increase. + */ +# ifdef ZLIB_DLL +# if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500)) +# ifdef ZLIB_INTERNAL +# define ZEXTERN extern __declspec(dllexport) +# else +# define ZEXTERN extern __declspec(dllimport) +# endif +# endif +# endif /* ZLIB_DLL */ + /* If building or using zlib with the WINAPI/WINAPIV calling convention, + * define ZLIB_WINAPI. + * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI. + */ +# ifdef ZLIB_WINAPI +# ifdef FAR +# undef FAR +# endif +# include + /* No need for _export, use ZLIB.DEF instead. */ + /* For complete Windows compatibility, use WINAPI, not __stdcall. */ +# define ZEXPORT WINAPI +# ifdef WIN32 +# define ZEXPORTVA WINAPIV +# else +# define ZEXPORTVA FAR CDECL +# endif +# endif +#endif + +#if defined (__BEOS__) +# ifdef ZLIB_DLL +# ifdef ZLIB_INTERNAL +# define ZEXPORT __declspec(dllexport) +# define ZEXPORTVA __declspec(dllexport) +# else +# define ZEXPORT __declspec(dllimport) +# define ZEXPORTVA __declspec(dllimport) +# endif +# endif +#endif + +#ifndef ZEXTERN +# define ZEXTERN extern +#endif +#ifndef ZEXPORT +# define ZEXPORT +#endif +#ifndef ZEXPORTVA +# define ZEXPORTVA +#endif + +#ifndef FAR +# define FAR +#endif + +#if !defined(__MACTYPES__) +typedef unsigned char Byte; /* 8 bits */ +#endif +typedef unsigned int uInt; /* 16 bits or more */ +typedef unsigned long uLong; /* 32 bits or more */ + +#ifdef SMALL_MEDIUM + /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */ +# define Bytef Byte FAR +#else + typedef Byte FAR Bytef; +#endif +typedef char FAR charf; +typedef int FAR intf; +typedef uInt FAR uIntf; +typedef uLong FAR uLongf; + +#ifdef STDC + typedef void const *voidpc; + typedef void FAR *voidpf; + typedef void *voidp; +#else + typedef Byte const *voidpc; + typedef Byte FAR *voidpf; + typedef Byte *voidp; +#endif + +#if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC) +# include +# if (UINT_MAX == 0xffffffffUL) +# define Z_U4 unsigned +# elif (ULONG_MAX == 0xffffffffUL) +# define Z_U4 unsigned long +# elif (USHRT_MAX == 0xffffffffUL) +# define Z_U4 unsigned short +# endif +#endif + +#ifdef Z_U4 + typedef Z_U4 z_crc_t; +#else + typedef unsigned long z_crc_t; +#endif + +#if 1 /* was set to #if 1 by ./configure */ +# define Z_HAVE_UNISTD_H +#endif + +#if 1 /* was set to #if 1 by ./configure */ +# define Z_HAVE_STDARG_H +#endif + +#ifdef STDC +# ifndef Z_SOLO +# include /* for off_t */ +# endif +#endif + +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +# include /* for va_list */ +# endif +#endif + +#ifdef _WIN32 +# ifndef Z_SOLO +# include /* for wchar_t */ +# endif +#endif + +/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and + * "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even + * though the former does not conform to the LFS document), but considering + * both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as + * equivalently requesting no 64-bit operations + */ +#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1 +# undef _LARGEFILE64_SOURCE +#endif + +#if defined(__WATCOMC__) && !defined(Z_HAVE_UNISTD_H) +# define Z_HAVE_UNISTD_H +#endif +#ifndef Z_SOLO +# if defined(Z_HAVE_UNISTD_H) || defined(_LARGEFILE64_SOURCE) +# include /* for SEEK_*, off_t, and _LFS64_LARGEFILE */ +# ifdef VMS +# include /* for off_t */ +# endif +# ifndef z_off_t +# define z_off_t off_t +# endif +# endif +#endif + +#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0 +# define Z_LFS64 +#endif + +#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64) +# define Z_LARGE64 +#endif + +#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64) +# define Z_WANT64 +#endif + +#if !defined(SEEK_SET) && !defined(Z_SOLO) +# define SEEK_SET 0 /* Seek from beginning of file. */ +# define SEEK_CUR 1 /* Seek from current position. */ +# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ +#endif + +#ifndef z_off_t +# define z_off_t long +#endif + +#if !defined(_WIN32) && defined(Z_LARGE64) +# define z_off64_t off64_t +#else +# if defined(_WIN32) && !defined(__GNUC__) && !defined(Z_SOLO) +# define z_off64_t __int64 +# else +# define z_off64_t z_off_t +# endif +#endif + +/* MVS linker does not support external names larger than 8 bytes */ +#if defined(__MVS__) + #pragma map(deflateInit_,"DEIN") + #pragma map(deflateInit2_,"DEIN2") + #pragma map(deflateEnd,"DEEND") + #pragma map(deflateBound,"DEBND") + #pragma map(inflateInit_,"ININ") + #pragma map(inflateInit2_,"ININ2") + #pragma map(inflateEnd,"INEND") + #pragma map(inflateSync,"INSY") + #pragma map(inflateSetDictionary,"INSEDI") + #pragma map(compressBound,"CMBND") + #pragma map(inflate_table,"INTABL") + #pragma map(inflate_fast,"INFA") + #pragma map(inflate_copyright,"INCOPY") +#endif + +#endif /* ZCONF_H */ diff --git a/deps/SZ/zlib/zlib.h b/deps/SZ/zlib/zlib.h new file mode 100644 index 0000000000000000000000000000000000000000..f09cdaf1e0543de911d8220befdb51fa8632a9e6 --- /dev/null +++ b/deps/SZ/zlib/zlib.h @@ -0,0 +1,1912 @@ +/* zlib.h -- interface of the 'zlib' general purpose compression library + version 1.2.11, January 15th, 2017 + + Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + + + The data format used by the zlib library is described by RFCs (Request for + Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950 + (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format). +*/ + +#ifndef ZLIB_H +#define ZLIB_H + +#include "zconf.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ZLIB_VERSION "1.2.11" +#define ZLIB_VERNUM 0x12b0 +#define ZLIB_VER_MAJOR 1 +#define ZLIB_VER_MINOR 2 +#define ZLIB_VER_REVISION 11 +#define ZLIB_VER_SUBREVISION 0 + +/* + The 'zlib' compression library provides in-memory compression and + decompression functions, including integrity checks of the uncompressed data. + This version of the library supports only one compression method (deflation) + but other algorithms will be added later and will have the same stream + interface. + + Compression can be done in a single step if the buffers are large enough, + or can be done by repeated calls of the compression function. In the latter + case, the application must provide more input and/or consume the output + (providing more output space) before each call. + + The compressed data format used by default by the in-memory functions is + the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped + around a deflate stream, which is itself documented in RFC 1951. + + The library also supports reading and writing files in gzip (.gz) format + with an interface similar to that of stdio using the functions that start + with "gz". The gzip format is different from the zlib format. gzip is a + gzip wrapper, documented in RFC 1952, wrapped around a deflate stream. + + This library can optionally read and write gzip and raw deflate streams in + memory as well. + + The zlib format was designed to be compact and fast for use in memory + and on communications channels. The gzip format was designed for single- + file compression on file systems, has a larger header than zlib to maintain + directory information, and uses a different, slower check method than zlib. + + The library does not install any signal handler. The decoder checks + the consistency of the compressed data, so the library should never crash + even in the case of corrupted input. +*/ + +typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size)); +typedef void (*free_func) OF((voidpf opaque, voidpf address)); + +struct internal_state; + +typedef struct z_stream_s { + z_const Bytef *next_in; /* next input byte */ + uInt avail_in; /* number of bytes available at next_in */ + uLong total_in; /* total number of input bytes read so far */ + + Bytef *next_out; /* next output byte will go here */ + uInt avail_out; /* remaining free space at next_out */ + uLong total_out; /* total number of bytes output so far */ + + z_const char *msg; /* last error message, NULL if no error */ + struct internal_state FAR *state; /* not visible by applications */ + + alloc_func zalloc; /* used to allocate the internal state */ + free_func zfree; /* used to free the internal state */ + voidpf opaque; /* private data object passed to zalloc and zfree */ + + int data_type; /* best guess about the data type: binary or text + for deflate, or the decoding state for inflate */ + uLong adler; /* Adler-32 or CRC-32 value of the uncompressed data */ + uLong reserved; /* reserved for future use */ +} z_stream; + +typedef z_stream FAR *z_streamp; + +/* + gzip header information passed to and from zlib routines. See RFC 1952 + for more details on the meanings of these fields. +*/ +typedef struct gz_header_s { + int text; /* true if compressed data believed to be text */ + uLong time; /* modification time */ + int xflags; /* extra flags (not used when writing a gzip file) */ + int os; /* operating system */ + Bytef *extra; /* pointer to extra field or Z_NULL if none */ + uInt extra_len; /* extra field length (valid if extra != Z_NULL) */ + uInt extra_max; /* space at extra (only when reading header) */ + Bytef *name; /* pointer to zero-terminated file name or Z_NULL */ + uInt name_max; /* space at name (only when reading header) */ + Bytef *comment; /* pointer to zero-terminated comment or Z_NULL */ + uInt comm_max; /* space at comment (only when reading header) */ + int hcrc; /* true if there was or will be a header crc */ + int done; /* true when done reading gzip header (not used + when writing a gzip file) */ +} gz_header; + +typedef gz_header FAR *gz_headerp; + +/* + The application must update next_in and avail_in when avail_in has dropped + to zero. It must update next_out and avail_out when avail_out has dropped + to zero. The application must initialize zalloc, zfree and opaque before + calling the init function. All other fields are set by the compression + library and must not be updated by the application. + + The opaque value provided by the application will be passed as the first + parameter for calls of zalloc and zfree. This can be useful for custom + memory management. The compression library attaches no meaning to the + opaque value. + + zalloc must return Z_NULL if there is not enough memory for the object. + If zlib is used in a multi-threaded application, zalloc and zfree must be + thread safe. In that case, zlib is thread-safe. When zalloc and zfree are + Z_NULL on entry to the initialization function, they are set to internal + routines that use the standard library functions malloc() and free(). + + On 16-bit systems, the functions zalloc and zfree must be able to allocate + exactly 65536 bytes, but will not be required to allocate more than this if + the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, pointers + returned by zalloc for objects of exactly 65536 bytes *must* have their + offset normalized to zero. The default allocation function provided by this + library ensures this (see zutil.c). To reduce memory requirements and avoid + any allocation of 64K objects, at the expense of compression ratio, compile + the library with -DMAX_WBITS=14 (see zconf.h). + + The fields total_in and total_out can be used for statistics or progress + reports. After compression, total_in holds the total size of the + uncompressed data and may be saved for use by the decompressor (particularly + if the decompressor wants to decompress everything in a single step). +*/ + + /* constants */ + +#define Z_NO_FLUSH 0 +#define Z_PARTIAL_FLUSH 1 +#define Z_SYNC_FLUSH 2 +#define Z_FULL_FLUSH 3 +#define Z_FINISH 4 +#define Z_BLOCK 5 +#define Z_TREES 6 +/* Allowed flush values; see deflate() and inflate() below for details */ + +#define Z_OK 0 +#define Z_STREAM_END 1 +#define Z_NEED_DICT 2 +#define Z_ERRNO (-1) +#define Z_STREAM_ERROR (-2) +#define Z_DATA_ERROR (-3) +#define Z_MEM_ERROR (-4) +#define Z_BUF_ERROR (-5) +#define Z_VERSION_ERROR (-6) +/* Return codes for the compression/decompression functions. Negative values + * are errors, positive values are used for special but normal events. + */ + +#define Z_NO_COMPRESSION 0 +#define Z_BEST_SPEED 1 +#define Z_BEST_COMPRESSION 9 +#define Z_DEFAULT_COMPRESSION (-1) +/* compression levels */ + +#define Z_FILTERED 1 +#define Z_HUFFMAN_ONLY 2 +#define Z_RLE 3 +#define Z_FIXED 4 +#define Z_DEFAULT_STRATEGY 0 +/* compression strategy; see deflateInit2() below for details */ + +#define Z_BINARY 0 +#define Z_TEXT 1 +#define Z_ASCII Z_TEXT /* for compatibility with 1.2.2 and earlier */ +#define Z_UNKNOWN 2 +/* Possible values of the data_type field for deflate() */ + +#define Z_DEFLATED 8 +/* The deflate compression method (the only one supported in this version) */ + +#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */ + +#define zlib_version zlibVersion() +/* for compatibility with versions < 1.0.2 */ + + + /* basic functions */ + +ZEXTERN const char * ZEXPORT zlibVersion OF((void)); +/* The application can compare zlibVersion and ZLIB_VERSION for consistency. + If the first character differs, the library code actually used is not + compatible with the zlib.h header file used by the application. This check + is automatically made by deflateInit and inflateInit. + */ + +/* +ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level)); + + Initializes the internal stream state for compression. The fields + zalloc, zfree and opaque must be initialized before by the caller. If + zalloc and zfree are set to Z_NULL, deflateInit updates them to use default + allocation functions. + + The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: + 1 gives best speed, 9 gives best compression, 0 gives no compression at all + (the input data is simply copied a block at a time). Z_DEFAULT_COMPRESSION + requests a default compromise between speed and compression (currently + equivalent to level 6). + + deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if level is not a valid compression level, or + Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible + with the version assumed by the caller (ZLIB_VERSION). msg is set to null + if there is no error message. deflateInit does not perform any compression: + this will be done by deflate(). +*/ + + +ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush)); +/* + deflate compresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. deflate performs one or both of the + following actions: + + - Compress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in and avail_in are updated and + processing will resume at this point for the next call of deflate(). + + - Generate more output starting at next_out and update next_out and avail_out + accordingly. This action is forced if the parameter flush is non zero. + Forcing flush frequently degrades the compression ratio, so this parameter + should be set only when necessary. Some output may be provided even if + flush is zero. + + Before the call of deflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating avail_in or avail_out accordingly; avail_out should + never be zero before the call. The application can consume the compressed + output when it wants, for example when the output buffer is full (avail_out + == 0), or after each call of deflate(). If deflate returns Z_OK and with + zero avail_out, it must be called again after making room in the output + buffer because there might be more output pending. See deflatePending(), + which can be used if desired to determine whether or not there is more ouput + in that case. + + Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to + decide how much data to accumulate before producing output, in order to + maximize compression. + + If the parameter flush is set to Z_SYNC_FLUSH, all pending output is + flushed to the output buffer and the output is aligned on a byte boundary, so + that the decompressor can get all input data available so far. (In + particular avail_in is zero after the call if enough output space has been + provided before the call.) Flushing may degrade compression for some + compression algorithms and so it should be used only when necessary. This + completes the current deflate block and follows it with an empty stored block + that is three bits plus filler bits to the next byte, followed by four bytes + (00 00 ff ff). + + If flush is set to Z_PARTIAL_FLUSH, all pending output is flushed to the + output buffer, but the output is not aligned to a byte boundary. All of the + input data so far will be available to the decompressor, as for Z_SYNC_FLUSH. + This completes the current deflate block and follows it with an empty fixed + codes block that is 10 bits long. This assures that enough bytes are output + in order for the decompressor to finish the block before the empty fixed + codes block. + + If flush is set to Z_BLOCK, a deflate block is completed and emitted, as + for Z_SYNC_FLUSH, but the output is not aligned on a byte boundary, and up to + seven bits of the current block are held to be written as the next byte after + the next deflate block is completed. In this case, the decompressor may not + be provided enough bits at this point in order to complete decompression of + the data provided so far to the compressor. It may need to wait for the next + block to be emitted. This is for advanced applications that need to control + the emission of deflate blocks. + + If flush is set to Z_FULL_FLUSH, all output is flushed as with + Z_SYNC_FLUSH, and the compression state is reset so that decompression can + restart from this point if previous compressed data has been damaged or if + random access is desired. Using Z_FULL_FLUSH too often can seriously degrade + compression. + + If deflate returns with avail_out == 0, this function must be called again + with the same value of the flush parameter and more output space (updated + avail_out), until the flush is complete (deflate returns with non-zero + avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that + avail_out is greater than six to avoid repeated flush markers due to + avail_out == 0 on return. + + If the parameter flush is set to Z_FINISH, pending input is processed, + pending output is flushed and deflate returns with Z_STREAM_END if there was + enough output space. If deflate returns with Z_OK or Z_BUF_ERROR, this + function must be called again with Z_FINISH and more output space (updated + avail_out) but no more input data, until it returns with Z_STREAM_END or an + error. After deflate has returned Z_STREAM_END, the only possible operations + on the stream are deflateReset or deflateEnd. + + Z_FINISH can be used in the first deflate call after deflateInit if all the + compression is to be done in a single step. In order to complete in one + call, avail_out must be at least the value returned by deflateBound (see + below). Then deflate is guaranteed to return Z_STREAM_END. If not enough + output space is provided, deflate will not return Z_STREAM_END, and it must + be called again as described above. + + deflate() sets strm->adler to the Adler-32 checksum of all input read + so far (that is, total_in bytes). If a gzip stream is being generated, then + strm->adler will be the CRC-32 checksum of the input read so far. (See + deflateInit2 below.) + + deflate() may update strm->data_type if it can make a good guess about + the input data type (Z_BINARY or Z_TEXT). If in doubt, the data is + considered binary. This field is only for information purposes and does not + affect the compression algorithm in any manner. + + deflate() returns Z_OK if some progress has been made (more input + processed or more output produced), Z_STREAM_END if all input has been + consumed and all output has been produced (only when flush is set to + Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example + if next_in or next_out was Z_NULL or the state was inadvertently written over + by the application), or Z_BUF_ERROR if no progress is possible (for example + avail_in or avail_out was zero). Note that Z_BUF_ERROR is not fatal, and + deflate() can be called again with more input and more output space to + continue compressing. +*/ + + +ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm)); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the + stream state was inconsistent, Z_DATA_ERROR if the stream was freed + prematurely (some input or output was discarded). In the error case, msg + may be set but then points to a static string (which must not be + deallocated). +*/ + + +/* +ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm)); + + Initializes the internal stream state for decompression. The fields + next_in, avail_in, zalloc, zfree and opaque must be initialized before by + the caller. In the current version of inflate, the provided input is not + read or consumed. The allocation of a sliding window will be deferred to + the first call of inflate (if the decompression does not complete on the + first call). If zalloc and zfree are set to Z_NULL, inflateInit updates + them to use default allocation functions. + + inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit does not perform any decompression. + Actual decompression will be done by inflate(). So next_in, and avail_in, + next_out, and avail_out are unused and unchanged. The current + implementation of inflateInit() does not process any header information -- + that is deferred until inflate() is called. +*/ + + +ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush)); +/* + inflate decompresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. inflate performs one or both of the + following actions: + + - Decompress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), then next_in and avail_in are updated + accordingly, and processing will resume at this point for the next call of + inflate(). + + - Generate more output starting at next_out and update next_out and avail_out + accordingly. inflate() provides as much output as possible, until there is + no more input data or no more space in the output buffer (see below about + the flush parameter). + + Before the call of inflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating the next_* and avail_* values accordingly. If the + caller of inflate() does not provide both available input and available + output space, it is possible that there will be no progress made. The + application can consume the uncompressed output when it wants, for example + when the output buffer is full (avail_out == 0), or after each call of + inflate(). If inflate returns Z_OK and with zero avail_out, it must be + called again after making room in the output buffer because there might be + more output pending. + + The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FINISH, + Z_BLOCK, or Z_TREES. Z_SYNC_FLUSH requests that inflate() flush as much + output as possible to the output buffer. Z_BLOCK requests that inflate() + stop if and when it gets to the next deflate block boundary. When decoding + the zlib or gzip format, this will cause inflate() to return immediately + after the header and before the first block. When doing a raw inflate, + inflate() will go ahead and process the first block, and will return when it + gets to the end of that block, or when it runs out of data. + + The Z_BLOCK option assists in appending to or combining deflate streams. + To assist in this, on return inflate() always sets strm->data_type to the + number of unused bits in the last byte taken from strm->next_in, plus 64 if + inflate() is currently decoding the last block in the deflate stream, plus + 128 if inflate() returned immediately after decoding an end-of-block code or + decoding the complete header up to just before the first byte of the deflate + stream. The end-of-block will not be indicated until all of the uncompressed + data from that block has been written to strm->next_out. The number of + unused bits may in general be greater than seven, except when bit 7 of + data_type is set, in which case the number of unused bits will be less than + eight. data_type is set as noted here every time inflate() returns for all + flush options, and so can be used to determine the amount of currently + consumed input in bits. + + The Z_TREES option behaves as Z_BLOCK does, but it also returns when the + end of each deflate block header is reached, before any actual data in that + block is decoded. This allows the caller to determine the length of the + deflate block header for later use in random access within a deflate block. + 256 is added to the value of strm->data_type when inflate() returns + immediately after reaching the end of the deflate block header. + + inflate() should normally be called until it returns Z_STREAM_END or an + error. However if all decompression is to be performed in a single step (a + single call of inflate), the parameter flush should be set to Z_FINISH. In + this case all pending input is processed and all pending output is flushed; + avail_out must be large enough to hold all of the uncompressed data for the + operation to complete. (The size of the uncompressed data may have been + saved by the compressor for this purpose.) The use of Z_FINISH is not + required to perform an inflation in one step. However it may be used to + inform inflate that a faster approach can be used for the single inflate() + call. Z_FINISH also informs inflate to not maintain a sliding window if the + stream completes, which reduces inflate's memory footprint. If the stream + does not complete, either because not all of the stream is provided or not + enough output space is provided, then a sliding window will be allocated and + inflate() can be called again to continue the operation as if Z_NO_FLUSH had + been used. + + In this implementation, inflate() always flushes as much output as + possible to the output buffer, and always uses the faster approach on the + first call. So the effects of the flush parameter in this implementation are + on the return value of inflate() as noted below, when inflate() returns early + when Z_BLOCK or Z_TREES is used, and when inflate() avoids the allocation of + memory for a sliding window when Z_FINISH is used. + + If a preset dictionary is needed after this call (see inflateSetDictionary + below), inflate sets strm->adler to the Adler-32 checksum of the dictionary + chosen by the compressor and returns Z_NEED_DICT; otherwise it sets + strm->adler to the Adler-32 checksum of all output produced so far (that is, + total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described + below. At the end of the stream, inflate() checks that its computed Adler-32 + checksum is equal to that saved by the compressor and returns Z_STREAM_END + only if the checksum is correct. + + inflate() can decompress and check either zlib-wrapped or gzip-wrapped + deflate data. The header type is detected automatically, if requested when + initializing with inflateInit2(). Any information contained in the gzip + header is not retained unless inflateGetHeader() is used. When processing + gzip-wrapped deflate data, strm->adler32 is set to the CRC-32 of the output + produced so far. The CRC-32 is checked against the gzip trailer, as is the + uncompressed length, modulo 2^32. + + inflate() returns Z_OK if some progress has been made (more input processed + or more output produced), Z_STREAM_END if the end of the compressed data has + been reached and all uncompressed output has been produced, Z_NEED_DICT if a + preset dictionary is needed at this point, Z_DATA_ERROR if the input data was + corrupted (input stream not conforming to the zlib format or incorrect check + value, in which case strm->msg points to a string with a more specific + error), Z_STREAM_ERROR if the stream structure was inconsistent (for example + next_in or next_out was Z_NULL, or the state was inadvertently written over + by the application), Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR + if no progress was possible or if there was not enough room in the output + buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and + inflate() can be called again with more input and more output space to + continue decompressing. If Z_DATA_ERROR is returned, the application may + then call inflateSync() to look for a good compression block if a partial + recovery of the data is to be attempted. +*/ + + +ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm)); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + inflateEnd returns Z_OK if success, or Z_STREAM_ERROR if the stream state + was inconsistent. +*/ + + + /* Advanced functions */ + +/* + The following functions are needed only in some special applications. +*/ + +/* +ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm, + int level, + int method, + int windowBits, + int memLevel, + int strategy)); + + This is another version of deflateInit with more compression options. The + fields next_in, zalloc, zfree and opaque must be initialized before by the + caller. + + The method parameter is the compression method. It must be Z_DEFLATED in + this version of the library. + + The windowBits parameter is the base two logarithm of the window size + (the size of the history buffer). It should be in the range 8..15 for this + version of the library. Larger values of this parameter result in better + compression at the expense of memory usage. The default value is 15 if + deflateInit is used instead. + + For the current implementation of deflate(), a windowBits value of 8 (a + window size of 256 bytes) is not supported. As a result, a request for 8 + will result in 9 (a 512-byte window). In that case, providing 8 to + inflateInit2() will result in an error when the zlib header with 9 is + checked against the initialization of inflate(). The remedy is to not use 8 + with deflateInit2() with this initialization, or at least in that case use 9 + with inflateInit2(). + + windowBits can also be -8..-15 for raw deflate. In this case, -windowBits + determines the window size. deflate() will then generate raw deflate data + with no zlib header or trailer, and will not compute a check value. + + windowBits can also be greater than 15 for optional gzip encoding. Add + 16 to windowBits to write a simple gzip header and trailer around the + compressed data instead of a zlib wrapper. The gzip header will have no + file name, no extra data, no comment, no modification time (set to zero), no + header crc, and the operating system will be set to the appropriate value, + if the operating system was determined at compile time. If a gzip stream is + being written, strm->adler is a CRC-32 instead of an Adler-32. + + For raw deflate or gzip encoding, a request for a 256-byte window is + rejected as invalid, since only the zlib header provides a means of + transmitting the window size to the decompressor. + + The memLevel parameter specifies how much memory should be allocated + for the internal compression state. memLevel=1 uses minimum memory but is + slow and reduces compression ratio; memLevel=9 uses maximum memory for + optimal speed. The default value is 8. See zconf.h for total memory usage + as a function of windowBits and memLevel. + + The strategy parameter is used to tune the compression algorithm. Use the + value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a + filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no + string match), or Z_RLE to limit match distances to one (run-length + encoding). Filtered data consists mostly of small values with a somewhat + random distribution. In this case, the compression algorithm is tuned to + compress them better. The effect of Z_FILTERED is to force more Huffman + coding and less string matching; it is somewhat intermediate between + Z_DEFAULT_STRATEGY and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as + fast as Z_HUFFMAN_ONLY, but give better compression for PNG image data. The + strategy parameter only affects the compression ratio but not the + correctness of the compressed output even if it is not set appropriately. + Z_FIXED prevents the use of dynamic Huffman codes, allowing for a simpler + decoder for special applications. + + deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if any parameter is invalid (such as an invalid + method), or Z_VERSION_ERROR if the zlib library version (zlib_version) is + incompatible with the version assumed by the caller (ZLIB_VERSION). msg is + set to null if there is no error message. deflateInit2 does not perform any + compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm, + const Bytef *dictionary, + uInt dictLength)); +/* + Initializes the compression dictionary from the given byte sequence + without producing any compressed output. When using the zlib format, this + function must be called immediately after deflateInit, deflateInit2 or + deflateReset, and before any call of deflate. When doing raw deflate, this + function must be called either before any call of deflate, or immediately + after the completion of a deflate block, i.e. after all input has been + consumed and all output has been delivered when using any of the flush + options Z_BLOCK, Z_PARTIAL_FLUSH, Z_SYNC_FLUSH, or Z_FULL_FLUSH. The + compressor and decompressor must use exactly the same dictionary (see + inflateSetDictionary). + + The dictionary should consist of strings (byte sequences) that are likely + to be encountered later in the data to be compressed, with the most commonly + used strings preferably put towards the end of the dictionary. Using a + dictionary is most useful when the data to be compressed is short and can be + predicted with good accuracy; the data can then be compressed better than + with the default empty dictionary. + + Depending on the size of the compression data structures selected by + deflateInit or deflateInit2, a part of the dictionary may in effect be + discarded, for example if the dictionary is larger than the window size + provided in deflateInit or deflateInit2. Thus the strings most likely to be + useful should be put at the end of the dictionary, not at the front. In + addition, the current implementation of deflate will use at most the window + size minus 262 bytes of the provided dictionary. + + Upon return of this function, strm->adler is set to the Adler-32 value + of the dictionary; the decompressor may later use this value to determine + which dictionary has been used by the compressor. (The Adler-32 value + applies to the whole dictionary even if only a subset of the dictionary is + actually used by the compressor.) If a raw deflate was requested, then the + Adler-32 value is not computed and strm->adler is not set. + + deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent (for example if deflate has already been called for this stream + or if not at a block boundary for raw deflate). deflateSetDictionary does + not perform any compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateGetDictionary OF((z_streamp strm, + Bytef *dictionary, + uInt *dictLength)); +/* + Returns the sliding dictionary being maintained by deflate. dictLength is + set to the number of bytes in the dictionary, and that many bytes are copied + to dictionary. dictionary must have enough space, where 32768 bytes is + always enough. If deflateGetDictionary() is called with dictionary equal to + Z_NULL, then only the dictionary length is returned, and nothing is copied. + Similary, if dictLength is Z_NULL, then it is not set. + + deflateGetDictionary() may return a length less than the window size, even + when more than the window size in input has been provided. It may return up + to 258 bytes less in that case, due to how zlib's implementation of deflate + manages the sliding window and lookahead for matches, where matches can be + up to 258 bytes long. If the application needs the last window-size bytes of + input, then that would need to be saved by the application outside of zlib. + + deflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the + stream state is inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest, + z_streamp source)); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when several compression strategies will be + tried, for example when there are several ways of pre-processing the input + data with a filter. The streams that will be discarded should then be freed + by calling deflateEnd. Note that deflateCopy duplicates the internal + compression state which can be quite large, so this strategy is slow and can + consume lots of memory. + + deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being Z_NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm)); +/* + This function is equivalent to deflateEnd followed by deflateInit, but + does not free and reallocate the internal compression state. The stream + will leave the compression level and any other attributes that may have been + set unchanged. + + deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm, + int level, + int strategy)); +/* + Dynamically update the compression level and compression strategy. The + interpretation of level and strategy is as in deflateInit2(). This can be + used to switch between compression and straight copy of the input data, or + to switch to a different kind of input data requiring a different strategy. + If the compression approach (which is a function of the level) or the + strategy is changed, and if any input has been consumed in a previous + deflate() call, then the input available so far is compressed with the old + level and strategy using deflate(strm, Z_BLOCK). There are three approaches + for the compression levels 0, 1..3, and 4..9 respectively. The new level + and strategy will take effect at the next call of deflate(). + + If a deflate(strm, Z_BLOCK) is performed by deflateParams(), and it does + not have enough output space to complete, then the parameter change will not + take effect. In this case, deflateParams() can be called again with the + same parameters and more output space to try again. + + In order to assure a change in the parameters on the first try, the + deflate stream should be flushed using deflate() with Z_BLOCK or other flush + request until strm.avail_out is not zero, before calling deflateParams(). + Then no more input data should be provided before the deflateParams() call. + If this is done, the old level and strategy will be applied to the data + compressed before deflateParams(), and the new level and strategy will be + applied to the the data compressed after deflateParams(). + + deflateParams returns Z_OK on success, Z_STREAM_ERROR if the source stream + state was inconsistent or if a parameter was invalid, or Z_BUF_ERROR if + there was not enough output space to complete the compression of the + available input data before a change in the strategy or approach. Note that + in the case of a Z_BUF_ERROR, the parameters are not changed. A return + value of Z_BUF_ERROR is not fatal, in which case deflateParams() can be + retried with more output space. +*/ + +ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm, + int good_length, + int max_lazy, + int nice_length, + int max_chain)); +/* + Fine tune deflate's internal compression parameters. This should only be + used by someone who understands the algorithm used by zlib's deflate for + searching for the best matching string, and even then only by the most + fanatic optimizer trying to squeeze out the last compressed bit for their + specific input data. Read the deflate.c source code for the meaning of the + max_lazy, good_length, nice_length, and max_chain parameters. + + deflateTune() can be called after deflateInit() or deflateInit2(), and + returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream. + */ + +ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm, + uLong sourceLen)); +/* + deflateBound() returns an upper bound on the compressed size after + deflation of sourceLen bytes. It must be called after deflateInit() or + deflateInit2(), and after deflateSetHeader(), if used. This would be used + to allocate an output buffer for deflation in a single pass, and so would be + called before deflate(). If that first deflate() call is provided the + sourceLen input bytes, an output buffer allocated to the size returned by + deflateBound(), and the flush value Z_FINISH, then deflate() is guaranteed + to return Z_STREAM_END. Note that it is possible for the compressed size to + be larger than the value returned by deflateBound() if flush options other + than Z_FINISH or Z_NO_FLUSH are used. +*/ + +ZEXTERN int ZEXPORT deflatePending OF((z_streamp strm, + unsigned *pending, + int *bits)); +/* + deflatePending() returns the number of bytes and bits of output that have + been generated, but not yet provided in the available output. The bytes not + provided would be due to the available output space having being consumed. + The number of bits of output not provided are between 0 and 7, where they + await more bits to join them in order to fill out a full byte. If pending + or bits are Z_NULL, then those values are not set. + + deflatePending returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. + */ + +ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm, + int bits, + int value)); +/* + deflatePrime() inserts bits in the deflate output stream. The intent + is that this function is used to start off the deflate output with the bits + leftover from a previous deflate stream when appending to it. As such, this + function can only be used for raw deflate, and must be used before the first + deflate() call after a deflateInit2() or deflateReset(). bits must be less + than or equal to 16, and that many of the least significant bits of value + will be inserted in the output. + + deflatePrime returns Z_OK if success, Z_BUF_ERROR if there was not enough + room in the internal buffer to insert the bits, or Z_STREAM_ERROR if the + source stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm, + gz_headerp head)); +/* + deflateSetHeader() provides gzip header information for when a gzip + stream is requested by deflateInit2(). deflateSetHeader() may be called + after deflateInit2() or deflateReset() and before the first call of + deflate(). The text, time, os, extra field, name, and comment information + in the provided gz_header structure are written to the gzip header (xflag is + ignored -- the extra flags are set according to the compression level). The + caller must assure that, if not Z_NULL, name and comment are terminated with + a zero byte, and that if extra is not Z_NULL, that extra_len bytes are + available there. If hcrc is true, a gzip header crc is included. Note that + the current versions of the command-line version of gzip (up through version + 1.3.x) do not support header crc's, and will report that it is a "multi-part + gzip file" and give up. + + If deflateSetHeader is not used, the default gzip header has text false, + the time set to zero, and os set to 255, with no extra, name, or comment + fields. The gzip header is returned to the default state by deflateReset(). + + deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm, + int windowBits)); + + This is another version of inflateInit with an extra parameter. The + fields next_in, avail_in, zalloc, zfree and opaque must be initialized + before by the caller. + + The windowBits parameter is the base two logarithm of the maximum window + size (the size of the history buffer). It should be in the range 8..15 for + this version of the library. The default value is 15 if inflateInit is used + instead. windowBits must be greater than or equal to the windowBits value + provided to deflateInit2() while compressing, or it must be equal to 15 if + deflateInit2() was not used. If a compressed stream with a larger window + size is given as input, inflate() will return with the error code + Z_DATA_ERROR instead of trying to allocate a larger window. + + windowBits can also be zero to request that inflate use the window size in + the zlib header of the compressed stream. + + windowBits can also be -8..-15 for raw inflate. In this case, -windowBits + determines the window size. inflate() will then process raw deflate data, + not looking for a zlib or gzip header, not generating a check value, and not + looking for any check values for comparison at the end of the stream. This + is for use with other formats that use the deflate compressed data format + such as zip. Those formats provide their own check values. If a custom + format is developed using the raw deflate format for compressed data, it is + recommended that a check value such as an Adler-32 or a CRC-32 be applied to + the uncompressed data as is done in the zlib, gzip, and zip formats. For + most applications, the zlib format should be used as is. Note that comments + above on the use in deflateInit2() applies to the magnitude of windowBits. + + windowBits can also be greater than 15 for optional gzip decoding. Add + 32 to windowBits to enable zlib and gzip decoding with automatic header + detection, or add 16 to decode only the gzip format (the zlib format will + return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is a + CRC-32 instead of an Adler-32. Unlike the gunzip utility and gzread() (see + below), inflate() will not automatically decode concatenated gzip streams. + inflate() will return Z_STREAM_END at the end of the gzip stream. The state + would need to be reset to continue decoding a subsequent gzip stream. + + inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit2 does not perform any decompression + apart from possibly reading the zlib header if present: actual decompression + will be done by inflate(). (So next_in and avail_in may be modified, but + next_out and avail_out are unused and unchanged.) The current implementation + of inflateInit2() does not process any header information -- that is + deferred until inflate() is called. +*/ + +ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm, + const Bytef *dictionary, + uInt dictLength)); +/* + Initializes the decompression dictionary from the given uncompressed byte + sequence. This function must be called immediately after a call of inflate, + if that call returned Z_NEED_DICT. The dictionary chosen by the compressor + can be determined from the Adler-32 value returned by that call of inflate. + The compressor and decompressor must use exactly the same dictionary (see + deflateSetDictionary). For raw inflate, this function can be called at any + time to set the dictionary. If the provided dictionary is smaller than the + window and there is already data in the window, then the provided dictionary + will amend what's there. The application must insure that the dictionary + that was used for compression is provided. + + inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the + expected one (incorrect Adler-32 value). inflateSetDictionary does not + perform any decompression: this will be done by subsequent calls of + inflate(). +*/ + +ZEXTERN int ZEXPORT inflateGetDictionary OF((z_streamp strm, + Bytef *dictionary, + uInt *dictLength)); +/* + Returns the sliding dictionary being maintained by inflate. dictLength is + set to the number of bytes in the dictionary, and that many bytes are copied + to dictionary. dictionary must have enough space, where 32768 bytes is + always enough. If inflateGetDictionary() is called with dictionary equal to + Z_NULL, then only the dictionary length is returned, and nothing is copied. + Similary, if dictLength is Z_NULL, then it is not set. + + inflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the + stream state is inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm)); +/* + Skips invalid compressed data until a possible full flush point (see above + for the description of deflate with Z_FULL_FLUSH) can be found, or until all + available input is skipped. No output is provided. + + inflateSync searches for a 00 00 FF FF pattern in the compressed data. + All full flush points have this pattern, but not all occurrences of this + pattern are full flush points. + + inflateSync returns Z_OK if a possible full flush point has been found, + Z_BUF_ERROR if no more input was provided, Z_DATA_ERROR if no flush point + has been found, or Z_STREAM_ERROR if the stream structure was inconsistent. + In the success case, the application may save the current current value of + total_in which indicates where valid compressed data was found. In the + error case, the application may repeatedly call inflateSync, providing more + input each time, until success or end of the input data. +*/ + +ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest, + z_streamp source)); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when randomly accessing a large stream. The + first pass through the stream can periodically record the inflate state, + allowing restarting inflate at those points when randomly accessing the + stream. + + inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being Z_NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm)); +/* + This function is equivalent to inflateEnd followed by inflateInit, + but does not free and reallocate the internal decompression state. The + stream will keep attributes that may have been set by inflateInit2. + + inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT inflateReset2 OF((z_streamp strm, + int windowBits)); +/* + This function is the same as inflateReset, but it also permits changing + the wrap and window size requests. The windowBits parameter is interpreted + the same as it is for inflateInit2. If the window size is changed, then the + memory allocated for the window is freed, and the window will be reallocated + by inflate() if needed. + + inflateReset2 returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL), or if + the windowBits parameter is invalid. +*/ + +ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm, + int bits, + int value)); +/* + This function inserts bits in the inflate input stream. The intent is + that this function is used to start inflating at a bit position in the + middle of a byte. The provided bits will be used before any bytes are used + from next_in. This function should only be used with raw inflate, and + should be used before the first inflate() call after inflateInit2() or + inflateReset(). bits must be less than or equal to 16, and that many of the + least significant bits of value will be inserted in the input. + + If bits is negative, then the input stream bit buffer is emptied. Then + inflatePrime() can be called again to put bits in the buffer. This is used + to clear out bits leftover after feeding inflate a block description prior + to feeding inflate codes. + + inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +ZEXTERN long ZEXPORT inflateMark OF((z_streamp strm)); +/* + This function returns two values, one in the lower 16 bits of the return + value, and the other in the remaining upper bits, obtained by shifting the + return value down 16 bits. If the upper value is -1 and the lower value is + zero, then inflate() is currently decoding information outside of a block. + If the upper value is -1 and the lower value is non-zero, then inflate is in + the middle of a stored block, with the lower value equaling the number of + bytes from the input remaining to copy. If the upper value is not -1, then + it is the number of bits back from the current bit position in the input of + the code (literal or length/distance pair) currently being processed. In + that case the lower value is the number of bytes already emitted for that + code. + + A code is being processed if inflate is waiting for more input to complete + decoding of the code, or if it has completed decoding but is waiting for + more output space to write the literal or match data. + + inflateMark() is used to mark locations in the input data for random + access, which may be at bit positions, and to note those cases where the + output of a code may span boundaries of random access blocks. The current + location in the input stream can be determined from avail_in and data_type + as noted in the description for the Z_BLOCK flush parameter for inflate. + + inflateMark returns the value noted above, or -65536 if the provided + source stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm, + gz_headerp head)); +/* + inflateGetHeader() requests that gzip header information be stored in the + provided gz_header structure. inflateGetHeader() may be called after + inflateInit2() or inflateReset(), and before the first call of inflate(). + As inflate() processes the gzip stream, head->done is zero until the header + is completed, at which time head->done is set to one. If a zlib stream is + being decoded, then head->done is set to -1 to indicate that there will be + no gzip header information forthcoming. Note that Z_BLOCK or Z_TREES can be + used to force inflate() to return immediately after header processing is + complete and before any actual data is decompressed. + + The text, time, xflags, and os fields are filled in with the gzip header + contents. hcrc is set to true if there is a header CRC. (The header CRC + was valid if done is set to one.) If extra is not Z_NULL, then extra_max + contains the maximum number of bytes to write to extra. Once done is true, + extra_len contains the actual extra field length, and extra contains the + extra field, or that field truncated if extra_max is less than extra_len. + If name is not Z_NULL, then up to name_max characters are written there, + terminated with a zero unless the length is greater than name_max. If + comment is not Z_NULL, then up to comm_max characters are written there, + terminated with a zero unless the length is greater than comm_max. When any + of extra, name, or comment are not Z_NULL and the respective field is not + present in the header, then that field is set to Z_NULL to signal its + absence. This allows the use of deflateSetHeader() with the returned + structure to duplicate the header. However if those fields are set to + allocated memory, then the application will need to save those pointers + elsewhere so that they can be eventually freed. + + If inflateGetHeader is not used, then the header information is simply + discarded. The header is always checked for validity, including the header + CRC if present. inflateReset() will reset the process to discard the header + information. The application would need to call inflateGetHeader() again to + retrieve the header from the next gzip stream. + + inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits, + unsigned char FAR *window)); + + Initialize the internal stream state for decompression using inflateBack() + calls. The fields zalloc, zfree and opaque in strm must be initialized + before the call. If zalloc and zfree are Z_NULL, then the default library- + derived memory allocation routines are used. windowBits is the base two + logarithm of the window size, in the range 8..15. window is a caller + supplied buffer of that size. Except for special applications where it is + assured that deflate was used with small window sizes, windowBits must be 15 + and a 32K byte window must be supplied to be able to decompress general + deflate streams. + + See inflateBack() for the usage of these routines. + + inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of + the parameters are invalid, Z_MEM_ERROR if the internal state could not be + allocated, or Z_VERSION_ERROR if the version of the library does not match + the version of the header file. +*/ + +typedef unsigned (*in_func) OF((void FAR *, + z_const unsigned char FAR * FAR *)); +typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned)); + +ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm, + in_func in, void FAR *in_desc, + out_func out, void FAR *out_desc)); +/* + inflateBack() does a raw inflate with a single call using a call-back + interface for input and output. This is potentially more efficient than + inflate() for file i/o applications, in that it avoids copying between the + output and the sliding window by simply making the window itself the output + buffer. inflate() can be faster on modern CPUs when used with large + buffers. inflateBack() trusts the application to not change the output + buffer passed by the output function, at least until inflateBack() returns. + + inflateBackInit() must be called first to allocate the internal state + and to initialize the state with the user-provided window buffer. + inflateBack() may then be used multiple times to inflate a complete, raw + deflate stream with each call. inflateBackEnd() is then called to free the + allocated state. + + A raw deflate stream is one with no zlib or gzip header or trailer. + This routine would normally be used in a utility that reads zip or gzip + files and writes out uncompressed files. The utility would decode the + header and process the trailer on its own, hence this routine expects only + the raw deflate stream to decompress. This is different from the default + behavior of inflate(), which expects a zlib header and trailer around the + deflate stream. + + inflateBack() uses two subroutines supplied by the caller that are then + called by inflateBack() for input and output. inflateBack() calls those + routines until it reads a complete deflate stream and writes out all of the + uncompressed data, or until it encounters an error. The function's + parameters and return types are defined above in the in_func and out_func + typedefs. inflateBack() will call in(in_desc, &buf) which should return the + number of bytes of provided input, and a pointer to that input in buf. If + there is no input available, in() must return zero -- buf is ignored in that + case -- and inflateBack() will return a buffer error. inflateBack() will + call out(out_desc, buf, len) to write the uncompressed data buf[0..len-1]. + out() should return zero on success, or non-zero on failure. If out() + returns non-zero, inflateBack() will return with an error. Neither in() nor + out() are permitted to change the contents of the window provided to + inflateBackInit(), which is also the buffer that out() uses to write from. + The length written by out() will be at most the window size. Any non-zero + amount of input may be provided by in(). + + For convenience, inflateBack() can be provided input on the first call by + setting strm->next_in and strm->avail_in. If that input is exhausted, then + in() will be called. Therefore strm->next_in must be initialized before + calling inflateBack(). If strm->next_in is Z_NULL, then in() will be called + immediately for input. If strm->next_in is not Z_NULL, then strm->avail_in + must also be initialized, and then if strm->avail_in is not zero, input will + initially be taken from strm->next_in[0 .. strm->avail_in - 1]. + + The in_desc and out_desc parameters of inflateBack() is passed as the + first parameter of in() and out() respectively when they are called. These + descriptors can be optionally used to pass any information that the caller- + supplied in() and out() functions need to do their job. + + On return, inflateBack() will set strm->next_in and strm->avail_in to + pass back any unused input that was provided by the last in() call. The + return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR + if in() or out() returned an error, Z_DATA_ERROR if there was a format error + in the deflate stream (in which case strm->msg is set to indicate the nature + of the error), or Z_STREAM_ERROR if the stream was not properly initialized. + In the case of Z_BUF_ERROR, an input or output error can be distinguished + using strm->next_in which will be Z_NULL only if in() returned an error. If + strm->next_in is not Z_NULL, then the Z_BUF_ERROR was due to out() returning + non-zero. (in() will always be called before out(), so strm->next_in is + assured to be defined if out() returns non-zero.) Note that inflateBack() + cannot return Z_OK. +*/ + +ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm)); +/* + All memory allocated by inflateBackInit() is freed. + + inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream + state was inconsistent. +*/ + +ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void)); +/* Return flags indicating compile-time options. + + Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other: + 1.0: size of uInt + 3.2: size of uLong + 5.4: size of voidpf (pointer) + 7.6: size of z_off_t + + Compiler, assembler, and debug options: + 8: ZLIB_DEBUG + 9: ASMV or ASMINF -- use ASM code + 10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention + 11: 0 (reserved) + + One-time table building (smaller code, but not thread-safe if true): + 12: BUILDFIXED -- build static block decoding tables when needed + 13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed + 14,15: 0 (reserved) + + Library content (indicates missing functionality): + 16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking + deflate code when not needed) + 17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect + and decode gzip streams (to avoid linking crc code) + 18-19: 0 (reserved) + + Operation variations (changes in library functionality): + 20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate + 21: FASTEST -- deflate algorithm with only one, lowest compression level + 22,23: 0 (reserved) + + The sprintf variant used by gzprintf (zero is best): + 24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format + 25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure! + 26: 0 = returns value, 1 = void -- 1 means inferred string length returned + + Remainder: + 27-31: 0 (reserved) + */ + +#ifndef Z_SOLO + + /* utility functions */ + +/* + The following utility functions are implemented on top of the basic + stream-oriented functions. To simplify the interface, some default options + are assumed (compression level and memory usage, standard memory allocation + functions). The source code of these utility functions can be modified if + you need special options. +*/ + +ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); +/* + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed data. compress() is equivalent to compress2() with a level + parameter of Z_DEFAULT_COMPRESSION. + + compress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer. +*/ + +ZEXTERN int ZEXPORT compress2 OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen, + int level)); +/* + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed data. + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +*/ + +ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen)); +/* + compressBound() returns an upper bound on the compressed size after + compress() or compress2() on sourceLen bytes. It would be used before a + compress() or compress2() call to allocate the destination buffer. +*/ + +ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); +/* + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be large enough to hold the entire + uncompressed data. (The size of the uncompressed data must have been saved + previously by the compressor and transmitted to the decompressor by some + mechanism outside the scope of this compression library.) Upon exit, destLen + is the actual size of the uncompressed data. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. In + the case where there is not enough room, uncompress() will fill the output + buffer with the uncompressed data up to that point. +*/ + +ZEXTERN int ZEXPORT uncompress2 OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong *sourceLen)); +/* + Same as uncompress, except that sourceLen is a pointer, where the + length of the source is *sourceLen. On return, *sourceLen is the number of + source bytes consumed. +*/ + + /* gzip file access functions */ + +/* + This library supports reading and writing files in gzip (.gz) format with + an interface similar to that of stdio, using the functions that start with + "gz". The gzip format is different from the zlib format. gzip is a gzip + wrapper, documented in RFC 1952, wrapped around a deflate stream. +*/ + +typedef struct gzFile_s *gzFile; /* semi-opaque gzip file descriptor */ + +/* +ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode)); + + Opens a gzip (.gz) file for reading or writing. The mode parameter is as + in fopen ("rb" or "wb") but can also include a compression level ("wb9") or + a strategy: 'f' for filtered data as in "wb6f", 'h' for Huffman-only + compression as in "wb1h", 'R' for run-length encoding as in "wb1R", or 'F' + for fixed code compression as in "wb9F". (See the description of + deflateInit2 for more information about the strategy parameter.) 'T' will + request transparent writing or appending with no compression and not using + the gzip format. + + "a" can be used instead of "w" to request that the gzip stream that will + be written be appended to the file. "+" will result in an error, since + reading and writing to the same gzip file is not supported. The addition of + "x" when writing will create the file exclusively, which fails if the file + already exists. On systems that support it, the addition of "e" when + reading or writing will set the flag to close the file on an execve() call. + + These functions, as well as gzip, will read and decode a sequence of gzip + streams in a file. The append function of gzopen() can be used to create + such a file. (Also see gzflush() for another way to do this.) When + appending, gzopen does not test whether the file begins with a gzip stream, + nor does it look for the end of the gzip streams to begin appending. gzopen + will simply append a gzip stream to the existing file. + + gzopen can be used to read a file which is not in gzip format; in this + case gzread will directly read from the file without decompression. When + reading, this will be detected automatically by looking for the magic two- + byte gzip header. + + gzopen returns NULL if the file could not be opened, if there was + insufficient memory to allocate the gzFile state, or if an invalid mode was + specified (an 'r', 'w', or 'a' was not provided, or '+' was provided). + errno can be checked to determine if the reason gzopen failed was that the + file could not be opened. +*/ + +ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode)); +/* + gzdopen associates a gzFile with the file descriptor fd. File descriptors + are obtained from calls like open, dup, creat, pipe or fileno (if the file + has been previously opened with fopen). The mode parameter is as in gzopen. + + The next call of gzclose on the returned gzFile will also close the file + descriptor fd, just like fclose(fdopen(fd, mode)) closes the file descriptor + fd. If you want to keep fd open, use fd = dup(fd_keep); gz = gzdopen(fd, + mode);. The duplicated descriptor should be saved to avoid a leak, since + gzdopen does not close fd if it fails. If you are using fileno() to get the + file descriptor from a FILE *, then you will have to use dup() to avoid + double-close()ing the file descriptor. Both gzclose() and fclose() will + close the associated file descriptor, so they need to have different file + descriptors. + + gzdopen returns NULL if there was insufficient memory to allocate the + gzFile state, if an invalid mode was specified (an 'r', 'w', or 'a' was not + provided, or '+' was provided), or if fd is -1. The file descriptor is not + used until the next gz* read, write, seek, or close operation, so gzdopen + will not detect if fd is invalid (unless fd is -1). +*/ + +ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size)); +/* + Set the internal buffer size used by this library's functions. The + default buffer size is 8192 bytes. This function must be called after + gzopen() or gzdopen(), and before any other calls that read or write the + file. The buffer memory allocation is always deferred to the first read or + write. Three times that size in buffer space is allocated. A larger buffer + size of, for example, 64K or 128K bytes will noticeably increase the speed + of decompression (reading). + + The new buffer size also affects the maximum length for gzprintf(). + + gzbuffer() returns 0 on success, or -1 on failure, such as being called + too late. +*/ + +ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy)); +/* + Dynamically update the compression level or strategy. See the description + of deflateInit2 for the meaning of these parameters. Previously provided + data is flushed before the parameter change. + + gzsetparams returns Z_OK if success, Z_STREAM_ERROR if the file was not + opened for writing, Z_ERRNO if there is an error writing the flushed data, + or Z_MEM_ERROR if there is a memory allocation error. +*/ + +ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len)); +/* + Reads the given number of uncompressed bytes from the compressed file. If + the input file is not in gzip format, gzread copies the given number of + bytes into the buffer directly from the file. + + After reaching the end of a gzip stream in the input, gzread will continue + to read, looking for another gzip stream. Any number of gzip streams may be + concatenated in the input file, and will all be decompressed by gzread(). + If something other than a gzip stream is encountered after a gzip stream, + that remaining trailing garbage is ignored (and no error is returned). + + gzread can be used to read a gzip file that is being concurrently written. + Upon reaching the end of the input, gzread will return with the available + data. If the error code returned by gzerror is Z_OK or Z_BUF_ERROR, then + gzclearerr can be used to clear the end of file indicator in order to permit + gzread to be tried again. Z_OK indicates that a gzip stream was completed + on the last gzread. Z_BUF_ERROR indicates that the input file ended in the + middle of a gzip stream. Note that gzread does not return -1 in the event + of an incomplete gzip stream. This error is deferred until gzclose(), which + will return Z_BUF_ERROR if the last gzread ended in the middle of a gzip + stream. Alternatively, gzerror can be used before gzclose to detect this + case. + + gzread returns the number of uncompressed bytes actually read, less than + len for end of file, or -1 for error. If len is too large to fit in an int, + then nothing is read, -1 is returned, and the error state is set to + Z_STREAM_ERROR. +*/ + +ZEXTERN z_size_t ZEXPORT gzfread OF((voidp buf, z_size_t size, z_size_t nitems, + gzFile file)); +/* + Read up to nitems items of size size from file to buf, otherwise operating + as gzread() does. This duplicates the interface of stdio's fread(), with + size_t request and return types. If the library defines size_t, then + z_size_t is identical to size_t. If not, then z_size_t is an unsigned + integer type that can contain a pointer. + + gzfread() returns the number of full items read of size size, or zero if + the end of the file was reached and a full item could not be read, or if + there was an error. gzerror() must be consulted if zero is returned in + order to determine if there was an error. If the multiplication of size and + nitems overflows, i.e. the product does not fit in a z_size_t, then nothing + is read, zero is returned, and the error state is set to Z_STREAM_ERROR. + + In the event that the end of file is reached and only a partial item is + available at the end, i.e. the remaining uncompressed data length is not a + multiple of size, then the final partial item is nevetheless read into buf + and the end-of-file flag is set. The length of the partial item read is not + provided, but could be inferred from the result of gztell(). This behavior + is the same as the behavior of fread() implementations in common libraries, + but it prevents the direct use of gzfread() to read a concurrently written + file, reseting and retrying on end-of-file, when size is not 1. +*/ + +ZEXTERN int ZEXPORT gzwrite OF((gzFile file, + voidpc buf, unsigned len)); +/* + Writes the given number of uncompressed bytes into the compressed file. + gzwrite returns the number of uncompressed bytes written or 0 in case of + error. +*/ + +ZEXTERN z_size_t ZEXPORT gzfwrite OF((voidpc buf, z_size_t size, + z_size_t nitems, gzFile file)); +/* + gzfwrite() writes nitems items of size size from buf to file, duplicating + the interface of stdio's fwrite(), with size_t request and return types. If + the library defines size_t, then z_size_t is identical to size_t. If not, + then z_size_t is an unsigned integer type that can contain a pointer. + + gzfwrite() returns the number of full items written of size size, or zero + if there was an error. If the multiplication of size and nitems overflows, + i.e. the product does not fit in a z_size_t, then nothing is written, zero + is returned, and the error state is set to Z_STREAM_ERROR. +*/ + +ZEXTERN int ZEXPORTVA gzprintf Z_ARG((gzFile file, const char *format, ...)); +/* + Converts, formats, and writes the arguments to the compressed file under + control of the format string, as in fprintf. gzprintf returns the number of + uncompressed bytes actually written, or a negative zlib error code in case + of error. The number of uncompressed bytes written is limited to 8191, or + one less than the buffer size given to gzbuffer(). The caller should assure + that this limit is not exceeded. If it is exceeded, then gzprintf() will + return an error (0) with nothing written. In this case, there may also be a + buffer overflow with unpredictable consequences, which is possible only if + zlib was compiled with the insecure functions sprintf() or vsprintf() + because the secure snprintf() or vsnprintf() functions were not available. + This can be determined using zlibCompileFlags(). +*/ + +ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s)); +/* + Writes the given null-terminated string to the compressed file, excluding + the terminating null character. + + gzputs returns the number of characters written, or -1 in case of error. +*/ + +ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len)); +/* + Reads bytes from the compressed file until len-1 characters are read, or a + newline character is read and transferred to buf, or an end-of-file + condition is encountered. If any characters are read or if len == 1, the + string is terminated with a null character. If no characters are read due + to an end-of-file or len < 1, then the buffer is left untouched. + + gzgets returns buf which is a null-terminated string, or it returns NULL + for end-of-file or in case of error. If there was an error, the contents at + buf are indeterminate. +*/ + +ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c)); +/* + Writes c, converted to an unsigned char, into the compressed file. gzputc + returns the value that was written, or -1 in case of error. +*/ + +ZEXTERN int ZEXPORT gzgetc OF((gzFile file)); +/* + Reads one byte from the compressed file. gzgetc returns this byte or -1 + in case of end of file or error. This is implemented as a macro for speed. + As such, it does not do all of the checking the other functions do. I.e. + it does not check to see if file is NULL, nor whether the structure file + points to has been clobbered or not. +*/ + +ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file)); +/* + Push one character back onto the stream to be read as the first character + on the next read. At least one character of push-back is allowed. + gzungetc() returns the character pushed, or -1 on failure. gzungetc() will + fail if c is -1, and may fail if a character has been pushed but not read + yet. If gzungetc is used immediately after gzopen or gzdopen, at least the + output buffer size of pushed characters is allowed. (See gzbuffer above.) + The pushed character will be discarded if the stream is repositioned with + gzseek() or gzrewind(). +*/ + +ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush)); +/* + Flushes all pending output into the compressed file. The parameter flush + is as in the deflate() function. The return value is the zlib error number + (see function gzerror below). gzflush is only permitted when writing. + + If the flush parameter is Z_FINISH, the remaining data is written and the + gzip stream is completed in the output. If gzwrite() is called again, a new + gzip stream will be started in the output. gzread() is able to read such + concatenated gzip streams. + + gzflush should be called only when strictly necessary because it will + degrade compression if called too often. +*/ + +/* +ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file, + z_off_t offset, int whence)); + + Sets the starting position for the next gzread or gzwrite on the given + compressed file. The offset represents a number of bytes in the + uncompressed data stream. The whence parameter is defined as in lseek(2); + the value SEEK_END is not supported. + + If the file is opened for reading, this function is emulated but can be + extremely slow. If the file is opened for writing, only forward seeks are + supported; gzseek then compresses a sequence of zeroes up to the new + starting position. + + gzseek returns the resulting offset location as measured in bytes from + the beginning of the uncompressed stream, or -1 in case of error, in + particular if the file is opened for writing and the new starting position + would be before the current position. +*/ + +ZEXTERN int ZEXPORT gzrewind OF((gzFile file)); +/* + Rewinds the given file. This function is supported only for reading. + + gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET) +*/ + +/* +ZEXTERN z_off_t ZEXPORT gztell OF((gzFile file)); + + Returns the starting position for the next gzread or gzwrite on the given + compressed file. This position represents a number of bytes in the + uncompressed data stream, and is zero when starting, even if appending or + reading a gzip stream from the middle of a file using gzdopen(). + + gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) +*/ + +/* +ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile file)); + + Returns the current offset in the file being read or written. This offset + includes the count of bytes that precede the gzip stream, for example when + appending or when using gzdopen() for reading. When reading, the offset + does not include as yet unused buffered input. This information can be used + for a progress indicator. On error, gzoffset() returns -1. +*/ + +ZEXTERN int ZEXPORT gzeof OF((gzFile file)); +/* + Returns true (1) if the end-of-file indicator has been set while reading, + false (0) otherwise. Note that the end-of-file indicator is set only if the + read tried to go past the end of the input, but came up short. Therefore, + just like feof(), gzeof() may return false even if there is no more data to + read, in the event that the last read request was for the exact number of + bytes remaining in the input file. This will happen if the input file size + is an exact multiple of the buffer size. + + If gzeof() returns true, then the read functions will return no more data, + unless the end-of-file indicator is reset by gzclearerr() and the input file + has grown since the previous end of file was detected. +*/ + +ZEXTERN int ZEXPORT gzdirect OF((gzFile file)); +/* + Returns true (1) if file is being copied directly while reading, or false + (0) if file is a gzip stream being decompressed. + + If the input file is empty, gzdirect() will return true, since the input + does not contain a gzip stream. + + If gzdirect() is used immediately after gzopen() or gzdopen() it will + cause buffers to be allocated to allow reading the file to determine if it + is a gzip file. Therefore if gzbuffer() is used, it should be called before + gzdirect(). + + When writing, gzdirect() returns true (1) if transparent writing was + requested ("wT" for the gzopen() mode), or false (0) otherwise. (Note: + gzdirect() is not needed when writing. Transparent writing must be + explicitly requested, so the application already knows the answer. When + linking statically, using gzdirect() will include all of the zlib code for + gzip file reading and decompression, which may not be desired.) +*/ + +ZEXTERN int ZEXPORT gzclose OF((gzFile file)); +/* + Flushes all pending output if necessary, closes the compressed file and + deallocates the (de)compression state. Note that once file is closed, you + cannot call gzerror with file, since its structures have been deallocated. + gzclose must not be called more than once on the same file, just as free + must not be called more than once on the same allocation. + + gzclose will return Z_STREAM_ERROR if file is not valid, Z_ERRNO on a + file operation error, Z_MEM_ERROR if out of memory, Z_BUF_ERROR if the + last read ended in the middle of a gzip stream, or Z_OK on success. +*/ + +ZEXTERN int ZEXPORT gzclose_r OF((gzFile file)); +ZEXTERN int ZEXPORT gzclose_w OF((gzFile file)); +/* + Same as gzclose(), but gzclose_r() is only for use when reading, and + gzclose_w() is only for use when writing or appending. The advantage to + using these instead of gzclose() is that they avoid linking in zlib + compression or decompression code that is not used when only reading or only + writing respectively. If gzclose() is used, then both compression and + decompression code will be included the application when linking to a static + zlib library. +*/ + +ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum)); +/* + Returns the error message for the last error which occurred on the given + compressed file. errnum is set to zlib error number. If an error occurred + in the file system and not in the compression library, errnum is set to + Z_ERRNO and the application may consult errno to get the exact error code. + + The application must not modify the returned string. Future calls to + this function may invalidate the previously returned string. If file is + closed, then the string previously returned by gzerror will no longer be + available. + + gzerror() should be used to distinguish errors from end-of-file for those + functions above that do not distinguish those cases in their return values. +*/ + +ZEXTERN void ZEXPORT gzclearerr OF((gzFile file)); +/* + Clears the error and end-of-file flags for file. This is analogous to the + clearerr() function in stdio. This is useful for continuing to read a gzip + file that is being written concurrently. +*/ + +#endif /* !Z_SOLO */ + + /* checksum functions */ + +/* + These functions are not related to compression but are exported + anyway because they might be useful in applications using the compression + library. +*/ + +ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len)); +/* + Update a running Adler-32 checksum with the bytes buf[0..len-1] and + return the updated checksum. If buf is Z_NULL, this function returns the + required initial value for the checksum. + + An Adler-32 checksum is almost as reliable as a CRC-32 but can be computed + much faster. + + Usage example: + + uLong adler = adler32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + adler = adler32(adler, buffer, length); + } + if (adler != original_adler) error(); +*/ + +ZEXTERN uLong ZEXPORT adler32_z OF((uLong adler, const Bytef *buf, + z_size_t len)); +/* + Same as adler32(), but with a size_t length. +*/ + +/* +ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2, + z_off_t len2)); + + Combine two Adler-32 checksums into one. For two sequences of bytes, seq1 + and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for + each, adler1 and adler2. adler32_combine() returns the Adler-32 checksum of + seq1 and seq2 concatenated, requiring only adler1, adler2, and len2. Note + that the z_off_t type (like off_t) is a signed integer. If len2 is + negative, the result has no meaning or utility. +*/ + +ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len)); +/* + Update a running CRC-32 with the bytes buf[0..len-1] and return the + updated CRC-32. If buf is Z_NULL, this function returns the required + initial value for the crc. Pre- and post-conditioning (one's complement) is + performed within this function so it shouldn't be done by the application. + + Usage example: + + uLong crc = crc32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + crc = crc32(crc, buffer, length); + } + if (crc != original_crc) error(); +*/ + +ZEXTERN uLong ZEXPORT crc32_z OF((uLong adler, const Bytef *buf, + z_size_t len)); +/* + Same as crc32(), but with a size_t length. +*/ + +/* +ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2)); + + Combine two CRC-32 check values into one. For two sequences of bytes, + seq1 and seq2 with lengths len1 and len2, CRC-32 check values were + calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32 + check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and + len2. +*/ + + + /* various hacks, don't look :) */ + +/* deflateInit and inflateInit are macros to allow checking the zlib version + * and the compiler's view of z_stream: + */ +ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int level, int method, + int windowBits, int memLevel, + int strategy, const char *version, + int stream_size)); +ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int windowBits, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits, + unsigned char FAR *window, + const char *version, + int stream_size)); +#ifdef Z_PREFIX_SET +# define z_deflateInit(strm, level) \ + deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ + deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ + (strategy), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, \ + (int)sizeof(z_stream)) +# define z_inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, (int)sizeof(z_stream)) +#else +# define deflateInit(strm, level) \ + deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream)) +# define inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream)) +# define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ + deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ + (strategy), ZLIB_VERSION, (int)sizeof(z_stream)) +# define inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, \ + (int)sizeof(z_stream)) +# define inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, (int)sizeof(z_stream)) +#endif + +#ifndef Z_SOLO + +/* gzgetc() macro and its supporting function and exposed data structure. Note + * that the real internal state is much larger than the exposed structure. + * This abbreviated structure exposes just enough for the gzgetc() macro. The + * user should not mess with these exposed elements, since their names or + * behavior could change in the future, perhaps even capriciously. They can + * only be used by the gzgetc() macro. You have been warned. + */ +struct gzFile_s { + unsigned have; + unsigned char *next; + z_off64_t pos; +}; +ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file)); /* backward compatibility */ +#ifdef Z_PREFIX_SET +# undef z_gzgetc +# define z_gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g)) +#else +# define gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g)) +#endif + +/* provide 64-bit offset functions if _LARGEFILE64_SOURCE defined, and/or + * change the regular functions to 64 bits if _FILE_OFFSET_BITS is 64 (if + * both are true, the application gets the *64 functions, and the regular + * functions are changed to 64 bits) -- in case these are set on systems + * without large file support, _LFS64_LARGEFILE must also be true + */ +#ifdef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); + ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int)); + ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile)); + ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile)); + ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off64_t)); + ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off64_t)); +#endif + +#if !defined(ZLIB_INTERNAL) && defined(Z_WANT64) +# ifdef Z_PREFIX_SET +# define z_gzopen z_gzopen64 +# define z_gzseek z_gzseek64 +# define z_gztell z_gztell64 +# define z_gzoffset z_gzoffset64 +# define z_adler32_combine z_adler32_combine64 +# define z_crc32_combine z_crc32_combine64 +# else +# define gzopen gzopen64 +# define gzseek gzseek64 +# define gztell gztell64 +# define gzoffset gzoffset64 +# define adler32_combine adler32_combine64 +# define crc32_combine crc32_combine64 +# endif +# ifndef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); + ZEXTERN z_off_t ZEXPORT gzseek64 OF((gzFile, z_off_t, int)); + ZEXTERN z_off_t ZEXPORT gztell64 OF((gzFile)); + ZEXTERN z_off_t ZEXPORT gzoffset64 OF((gzFile)); + ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t)); +# endif +#else + ZEXTERN gzFile ZEXPORT gzopen OF((const char *, const char *)); + ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile, z_off_t, int)); + ZEXTERN z_off_t ZEXPORT gztell OF((gzFile)); + ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile)); + ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t)); +#endif + +#else /* Z_SOLO */ + + ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t)); + +#endif /* !Z_SOLO */ + +/* undocumented functions */ +ZEXTERN const char * ZEXPORT zError OF((int)); +ZEXTERN int ZEXPORT inflateSyncPoint OF((z_streamp)); +ZEXTERN const z_crc_t FAR * ZEXPORT get_crc_table OF((void)); +ZEXTERN int ZEXPORT inflateUndermine OF((z_streamp, int)); +ZEXTERN int ZEXPORT inflateValidate OF((z_streamp, int)); +ZEXTERN unsigned long ZEXPORT inflateCodesUsed OF ((z_streamp)); +ZEXTERN int ZEXPORT inflateResetKeep OF((z_streamp)); +ZEXTERN int ZEXPORT deflateResetKeep OF((z_streamp)); +#if (defined(_WIN32) || defined(__CYGWIN__)) && !defined(Z_SOLO) +ZEXTERN gzFile ZEXPORT gzopen_w OF((const wchar_t *path, + const char *mode)); +#endif +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +ZEXTERN int ZEXPORTVA gzvprintf Z_ARG((gzFile file, + const char *format, + va_list va)); +# endif +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* ZLIB_H */ diff --git a/deps/SZ/zlib/zutil.c b/deps/SZ/zlib/zutil.c new file mode 100644 index 0000000000000000000000000000000000000000..a76c6b0c7e557f8c29cfcf58a5ef9ef79c5e4e8a --- /dev/null +++ b/deps/SZ/zlib/zutil.c @@ -0,0 +1,325 @@ +/* zutil.c -- target dependent utility functions for the compression library + * Copyright (C) 1995-2017 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#include "zutil.h" +#ifndef Z_SOLO +# include "gzguts.h" +#endif + +z_const char * const z_errmsg[10] = { + (z_const char *)"need dictionary", /* Z_NEED_DICT 2 */ + (z_const char *)"stream end", /* Z_STREAM_END 1 */ + (z_const char *)"", /* Z_OK 0 */ + (z_const char *)"file error", /* Z_ERRNO (-1) */ + (z_const char *)"stream error", /* Z_STREAM_ERROR (-2) */ + (z_const char *)"data error", /* Z_DATA_ERROR (-3) */ + (z_const char *)"insufficient memory", /* Z_MEM_ERROR (-4) */ + (z_const char *)"buffer error", /* Z_BUF_ERROR (-5) */ + (z_const char *)"incompatible version",/* Z_VERSION_ERROR (-6) */ + (z_const char *)"" +}; + + +const char * ZEXPORT zlibVersion() +{ + return ZLIB_VERSION; +} + +uLong ZEXPORT zlibCompileFlags() +{ + uLong flags; + + flags = 0; + switch ((int)(sizeof(uInt))) { + case 2: break; + case 4: flags += 1; break; + case 8: flags += 2; break; + default: flags += 3; + } + switch ((int)(sizeof(uLong))) { + case 2: break; + case 4: flags += 1 << 2; break; + case 8: flags += 2 << 2; break; + default: flags += 3 << 2; + } + switch ((int)(sizeof(voidpf))) { + case 2: break; + case 4: flags += 1 << 4; break; + case 8: flags += 2 << 4; break; + default: flags += 3 << 4; + } + switch ((int)(sizeof(z_off_t))) { + case 2: break; + case 4: flags += 1 << 6; break; + case 8: flags += 2 << 6; break; + default: flags += 3 << 6; + } +#ifdef ZLIB_DEBUG + flags += 1 << 8; +#endif +#if defined(ASMV) || defined(ASMINF) + flags += 1 << 9; +#endif +#ifdef ZLIB_WINAPI + flags += 1 << 10; +#endif +#ifdef BUILDFIXED + flags += 1 << 12; +#endif +#ifdef DYNAMIC_CRC_TABLE + flags += 1 << 13; +#endif +#ifdef NO_GZCOMPRESS + flags += 1L << 16; +#endif +#ifdef NO_GZIP + flags += 1L << 17; +#endif +#ifdef PKZIP_BUG_WORKAROUND + flags += 1L << 20; +#endif +#ifdef FASTEST + flags += 1L << 21; +#endif +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifdef NO_vsnprintf + flags += 1L << 25; +# ifdef HAS_vsprintf_void + flags += 1L << 26; +# endif +# else +# ifdef HAS_vsnprintf_void + flags += 1L << 26; +# endif +# endif +#else + flags += 1L << 24; +# ifdef NO_snprintf + flags += 1L << 25; +# ifdef HAS_sprintf_void + flags += 1L << 26; +# endif +# else +# ifdef HAS_snprintf_void + flags += 1L << 26; +# endif +# endif +#endif + return flags; +} + +#ifdef ZLIB_DEBUG +#include +# ifndef verbose +# define verbose 0 +# endif +int ZLIB_INTERNAL z_verbose = verbose; + +void ZLIB_INTERNAL z_error (m) + char *m; +{ + fprintf(stderr, "%s\n", m); + exit(1); +} +#endif + +/* exported to allow conversion of error code to string for compress() and + * uncompress() + */ +const char * ZEXPORT zError(err) + int err; +{ + return ERR_MSG(err); +} + +#if defined(_WIN32_WCE) + /* The Microsoft C Run-Time Library for Windows CE doesn't have + * errno. We define it as a global variable to simplify porting. + * Its value is always 0 and should not be used. + */ + int errno = 0; +#endif + +#ifndef HAVE_MEMCPY + +void ZLIB_INTERNAL zmemcpy(dest, source, len) + Bytef* dest; + const Bytef* source; + uInt len; +{ + if (len == 0) return; + do { + *dest++ = *source++; /* ??? to be unrolled */ + } while (--len != 0); +} + +int ZLIB_INTERNAL zmemcmp(s1, s2, len) + const Bytef* s1; + const Bytef* s2; + uInt len; +{ + uInt j; + + for (j = 0; j < len; j++) { + if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1; + } + return 0; +} + +void ZLIB_INTERNAL zmemzero(dest, len) + Bytef* dest; + uInt len; +{ + if (len == 0) return; + do { + *dest++ = 0; /* ??? to be unrolled */ + } while (--len != 0); +} +#endif + +#ifndef Z_SOLO + +#ifdef SYS16BIT + +#ifdef __TURBOC__ +/* Turbo C in 16-bit mode */ + +# define MY_ZCALLOC + +/* Turbo C malloc() does not allow dynamic allocation of 64K bytes + * and farmalloc(64K) returns a pointer with an offset of 8, so we + * must fix the pointer. Warning: the pointer must be put back to its + * original form in order to free it, use zcfree(). + */ + +#define MAX_PTR 10 +/* 10*64K = 640K */ + +local int next_ptr = 0; + +typedef struct ptr_table_s { + voidpf org_ptr; + voidpf new_ptr; +} ptr_table; + +local ptr_table table[MAX_PTR]; +/* This table is used to remember the original form of pointers + * to large buffers (64K). Such pointers are normalized with a zero offset. + * Since MSDOS is not a preemptive multitasking OS, this table is not + * protected from concurrent access. This hack doesn't work anyway on + * a protected system like OS/2. Use Microsoft C instead. + */ + +voidpf ZLIB_INTERNAL zcalloc (voidpf opaque, unsigned items, unsigned size) +{ + voidpf buf; + ulg bsize = (ulg)items*size; + + (void)opaque; + + /* If we allocate less than 65520 bytes, we assume that farmalloc + * will return a usable pointer which doesn't have to be normalized. + */ + if (bsize < 65520L) { + buf = farmalloc(bsize); + if (*(ush*)&buf != 0) return buf; + } else { + buf = farmalloc(bsize + 16L); + } + if (buf == NULL || next_ptr >= MAX_PTR) return NULL; + table[next_ptr].org_ptr = buf; + + /* Normalize the pointer to seg:0 */ + *((ush*)&buf+1) += ((ush)((uch*)buf-0) + 15) >> 4; + *(ush*)&buf = 0; + table[next_ptr++].new_ptr = buf; + return buf; +} + +void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr) +{ + int n; + + (void)opaque; + + if (*(ush*)&ptr != 0) { /* object < 64K */ + farfree(ptr); + return; + } + /* Find the original pointer */ + for (n = 0; n < next_ptr; n++) { + if (ptr != table[n].new_ptr) continue; + + farfree(table[n].org_ptr); + while (++n < next_ptr) { + table[n-1] = table[n]; + } + next_ptr--; + return; + } + Assert(0, "zcfree: ptr not found"); +} + +#endif /* __TURBOC__ */ + + +#ifdef M_I86 +/* Microsoft C in 16-bit mode */ + +# define MY_ZCALLOC + +#if (!defined(_MSC_VER) || (_MSC_VER <= 600)) +# define _halloc halloc +# define _hfree hfree +#endif + +voidpf ZLIB_INTERNAL zcalloc (voidpf opaque, uInt items, uInt size) +{ + (void)opaque; + return _halloc((long)items, size); +} + +void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr) +{ + (void)opaque; + _hfree(ptr); +} + +#endif /* M_I86 */ + +#endif /* SYS16BIT */ + + +#ifndef MY_ZCALLOC /* Any system without a special alloc function */ + +#ifndef STDC +extern voidp malloc OF((uInt size)); +extern voidp calloc OF((uInt items, uInt size)); +extern void free OF((voidpf ptr)); +#endif + +voidpf ZLIB_INTERNAL zcalloc (opaque, items, size) + voidpf opaque; + unsigned items; + unsigned size; +{ + (void)opaque; + return sizeof(uInt) > 2 ? (voidpf)malloc(items * size) : + (voidpf)calloc(items, size); +} + +void ZLIB_INTERNAL zcfree (opaque, ptr) + voidpf opaque; + voidpf ptr; +{ + (void)opaque; + free(ptr); +} + +#endif /* MY_ZCALLOC */ + +#endif /* !Z_SOLO */ diff --git a/deps/SZ/zlib/zutil.h b/deps/SZ/zlib/zutil.h new file mode 100644 index 0000000000000000000000000000000000000000..b079ea6a80f5abd23a6b2451d6eaee50ceda969b --- /dev/null +++ b/deps/SZ/zlib/zutil.h @@ -0,0 +1,271 @@ +/* zutil.h -- internal interface and configuration of the compression library + * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* @(#) $Id$ */ + +#ifndef ZUTIL_H +#define ZUTIL_H + +#ifdef HAVE_HIDDEN +# define ZLIB_INTERNAL __attribute__((visibility ("hidden"))) +#else +# define ZLIB_INTERNAL +#endif + +#include "zlib.h" + +#if defined(STDC) && !defined(Z_SOLO) +# if !(defined(_WIN32_WCE) && defined(_MSC_VER)) +# include +# endif +# include +# include +#endif + +#ifdef Z_SOLO + typedef long ptrdiff_t; /* guess -- will be caught if guess is wrong */ +#endif + +#ifndef local +# define local static +#endif +/* since "static" is used to mean two completely different things in C, we + define "local" for the non-static meaning of "static", for readability + (compile with -Dlocal if your debugger can't find static symbols) */ + +typedef unsigned char uch; +typedef uch FAR uchf; +typedef unsigned short ush; +typedef ush FAR ushf; +typedef unsigned long ulg; + +extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ +/* (size given to avoid silly warnings with Visual C++) */ + +#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)] + +#define ERR_RETURN(strm,err) \ + return (strm->msg = ERR_MSG(err), (err)) +/* To be used only when the state is known to be valid */ + + /* common constants */ + +#ifndef DEF_WBITS +# define DEF_WBITS MAX_WBITS +#endif +/* default windowBits for decompression. MAX_WBITS is for compression only */ + +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif +/* default memLevel */ + +#define STORED_BLOCK 0 +#define STATIC_TREES 1 +#define DYN_TREES 2 +/* The three kinds of block type */ + +#define MIN_MATCH 3 +#define MAX_MATCH 258 +/* The minimum and maximum match lengths */ + +#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ + + /* target dependencies */ + +#if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32)) +# define OS_CODE 0x00 +# ifndef Z_SOLO +# if defined(__TURBOC__) || defined(__BORLANDC__) +# if (__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__)) + /* Allow compilation with ANSI keywords only enabled */ + void _Cdecl farfree( void *block ); + void *_Cdecl farmalloc( unsigned long nbytes ); +# else +# include +# endif +# else /* MSC or DJGPP */ +# include +# endif +# endif +#endif + +#ifdef AMIGA +# define OS_CODE 1 +#endif + +#if defined(VAXC) || defined(VMS) +# define OS_CODE 2 +# define F_OPEN(name, mode) \ + fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512") +#endif + +#ifdef __370__ +# if __TARGET_LIB__ < 0x20000000 +# define OS_CODE 4 +# elif __TARGET_LIB__ < 0x40000000 +# define OS_CODE 11 +# else +# define OS_CODE 8 +# endif +#endif + +#if defined(ATARI) || defined(atarist) +# define OS_CODE 5 +#endif + +#ifdef OS2 +# define OS_CODE 6 +# if defined(M_I86) && !defined(Z_SOLO) +# include +# endif +#endif + +#if defined(MACOS) || defined(TARGET_OS_MAC) +# define OS_CODE 7 +# ifndef Z_SOLO +# if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os +# include /* for fdopen */ +# else +# ifndef fdopen +# define fdopen(fd,mode) NULL /* No fdopen() */ +# endif +# endif +# endif +#endif + +#ifdef __acorn +# define OS_CODE 13 +#endif + +#if defined(WIN32) && !defined(__CYGWIN__) +# define OS_CODE 10 +#endif + +#ifdef _BEOS_ +# define OS_CODE 16 +#endif + +#ifdef __TOS_OS400__ +# define OS_CODE 18 +#endif + +#ifdef __APPLE__ +# define OS_CODE 19 +#endif + +#if defined(_BEOS_) || defined(RISCOS) +# define fdopen(fd,mode) NULL /* No fdopen() */ +#endif + +#if (defined(_MSC_VER) && (_MSC_VER > 600)) && !defined __INTERIX +# if defined(_WIN32_WCE) +# define fdopen(fd,mode) NULL /* No fdopen() */ +# ifndef _PTRDIFF_T_DEFINED + typedef int ptrdiff_t; +# define _PTRDIFF_T_DEFINED +# endif +# else +# define fdopen(fd,type) _fdopen(fd,type) +# endif +#endif + +#if defined(__BORLANDC__) && !defined(MSDOS) + #pragma warn -8004 + #pragma warn -8008 + #pragma warn -8066 +#endif + +/* provide prototypes for these when building zlib without LFS */ +#if !defined(_WIN32) && \ + (!defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0) + ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t)); +#endif + + /* common defaults */ + +#ifndef OS_CODE +# define OS_CODE 3 /* assume Unix */ +#endif + +#ifndef F_OPEN +# define F_OPEN(name, mode) fopen((name), (mode)) +#endif + + /* functions */ + +#if defined(pyr) || defined(Z_SOLO) +# define NO_MEMCPY +#endif +#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__) + /* Use our own functions for small and medium model with MSC <= 5.0. + * You may have to use the same strategy for Borland C (untested). + * The __SC__ check is for Symantec. + */ +# define NO_MEMCPY +#endif +#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY) +# define HAVE_MEMCPY +#endif +#ifdef HAVE_MEMCPY +# ifdef SMALL_MEDIUM /* MSDOS small or medium model */ +# define zmemcpy _fmemcpy +# define zmemcmp _fmemcmp +# define zmemzero(dest, len) _fmemset(dest, 0, len) +# else +# define zmemcpy memcpy +# define zmemcmp memcmp +# define zmemzero(dest, len) memset(dest, 0, len) +# endif +#else + void ZLIB_INTERNAL zmemcpy OF((Bytef* dest, const Bytef* source, uInt len)); + int ZLIB_INTERNAL zmemcmp OF((const Bytef* s1, const Bytef* s2, uInt len)); + void ZLIB_INTERNAL zmemzero OF((Bytef* dest, uInt len)); +#endif + +/* Diagnostic functions */ +#ifdef ZLIB_DEBUG +# include + extern int ZLIB_INTERNAL z_verbose; + extern void ZLIB_INTERNAL z_error OF((char *m)); +# define Assert(cond,msg) {if(!(cond)) z_error(msg);} +# define Trace(x) {if (z_verbose>=0) fprintf x ;} +# define Tracev(x) {if (z_verbose>0) fprintf x ;} +# define Tracevv(x) {if (z_verbose>1) fprintf x ;} +# define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;} +# define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;} +#else +# define Assert(cond,msg) +# define Trace(x) +# define Tracev(x) +# define Tracevv(x) +# define Tracec(c,x) +# define Tracecv(c,x) +#endif + +#ifndef Z_SOLO + voidpf ZLIB_INTERNAL zcalloc OF((voidpf opaque, unsigned items, + unsigned size)); + void ZLIB_INTERNAL zcfree OF((voidpf opaque, voidpf ptr)); +#endif + +#define ZALLOC(strm, items, size) \ + (*((strm)->zalloc))((strm)->opaque, (items), (size)) +#define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr)) +#define TRY_FREE(s, p) {if (p) ZFREE(s, p);} + +/* Reverse the bytes in a 32-bit value */ +#define ZSWAP32(q) ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \ + (((q) & 0xff00) << 8) + (((q) & 0xff) << 24)) + +#endif /* ZUTIL_H */ diff --git a/deps/SZ/zstd/CMakeLists.txt b/deps/SZ/zstd/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..59665ce0ac1ce52978c56a2ae8865f4e6c5eaff6 --- /dev/null +++ b/deps/SZ/zstd/CMakeLists.txt @@ -0,0 +1,52 @@ +add_library(zstd + ./common/entropy_common.c + ./common/pool.c + ./common/threading.c + ./common/debug.c + ./common/xxhash.c + ./common/fse_decompress.c + ./common/zstd_common.c + ./common/error_private.c + ./compress/zstd_ldm.c + ./compress/zstd_lazy.c + ./compress/huf_compress.c + ./compress/zstd_opt.c + ./compress/zstd_double_fast.c + ./compress/zstd_compress.c + ./compress/zstd_fast.c + ./compress/fse_compress.c + ./compress/zstdmt_compress.c + ./compress/hist.c + ./decompress/zstd_decompress.c + ./decompress/huf_decompress.c + ./deprecated/zbuff_common.c + ./deprecated/zbuff_compress.c + ./deprecated/zbuff_decompress.c + ./legacy/zstd_v05.c + ./legacy/zstd_v04.c + ./legacy/zstd_v06.c + ./legacy/zstd_v07.c + ./legacy/zstd_v03.c + ./legacy/zstd_v02.c + ./legacy/zstd_v01.c + ./dictBuilder/cover.c + ./dictBuilder/divsufsort.c + ./dictBuilder/zdict.c + ) + +target_include_directories(zstd + PUBLIC + $ + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/common + ${CMAKE_CURRENT_SOURCE_DIR}/compress + ${CMAKE_CURRENT_SOURCE_DIR}/decompress + ${CMAKE_CURRENT_SOURCE_DIR}/deprecated + ${CMAKE_CURRENT_SOURCE_DIR}/dictBuilder + ${CMAKE_CURRENT_SOURCE_DIR}/dll + ${CMAKE_CURRENT_SOURCE_DIR}/legacy + ) + + + + diff --git a/deps/SZ/zstd/LICENSE b/deps/SZ/zstd/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..a793a802892567f17d464a831e2e531dc8833f55 --- /dev/null +++ b/deps/SZ/zstd/LICENSE @@ -0,0 +1,30 @@ +BSD License + +For Zstandard software + +Copyright (c) 2016-present, Facebook, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name Facebook nor the names of its contributors may be used to + endorse or promote products derived from this software without specific + prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/deps/SZ/zstd/Makefile.am b/deps/SZ/zstd/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..64713cbd2d0711f495c00abcf2fdd6c671f638fd --- /dev/null +++ b/deps/SZ/zstd/Makefile.am @@ -0,0 +1,69 @@ +AUTOMAKE_OPTIONS=foreign +include_HEADERS=./compress/zstdmt_compress.h \ + ./compress/zstd_opt.h \ + ./compress/zstd_ldm.h \ + ./compress/zstd_compress_internal.h \ + ./compress/hist.h \ + ./compress/zstd_fast.h \ + ./compress/zstd_double_fast.h \ + ./compress/zstd_lazy.h \ + ./common/fse.h \ + ./common/bitstream.h \ + ./common/mem.h \ + ./common/zstd_errors.h \ + ./common/compiler.h \ + ./common/debug.h \ + ./common/huf.h \ + ./common/zstd_internal.h \ + ./common/xxhash.h \ + ./common/cpu.h \ + ./common/pool.h \ + ./common/threading.h \ + ./common/error_private.h \ + ./deprecated/zbuff.h \ + ./dictBuilder/zdict.h \ + ./dictBuilder/divsufsort.h \ + ./legacy/zstd_v07.h \ + ./legacy/zstd_v02.h \ + ./legacy/zstd_v04.h \ + ./legacy/zstd_legacy.h \ + ./legacy/zstd_v06.h \ + ./legacy/zstd_v05.h \ + ./legacy/zstd_v01.h \ + ./legacy/zstd_v03.h \ + ./zstd.h +lib_LTLIBRARIES=libzstd.la +libzstd_la_CFLAGS=-I./ -I./compress -I./common -I./deprecated -I./dictBuilder -I./legacy +libzstd_la_SOURCES=./decompress/zstd_decompress.c \ + ./decompress/huf_decompress.c \ + ./compress/zstd_lazy.c \ + ./compress/zstdmt_compress.c \ + ./compress/zstd_double_fast.c \ + ./compress/zstd_fast.c \ + ./compress/hist.c \ + ./compress/fse_compress.c \ + ./compress/zstd_opt.c \ + ./compress/zstd_compress.c \ + ./compress/huf_compress.c \ + ./compress/zstd_ldm.c \ + ./common/xxhash.c \ + ./common/fse_decompress.c \ + ./common/pool.c \ + ./common/zstd_common.c \ + ./common/error_private.c \ + ./common/debug.c \ + ./common/threading.c \ + ./common/entropy_common.c \ + ./deprecated/zbuff_compress.c \ + ./deprecated/zbuff_decompress.c \ + ./deprecated/zbuff_common.c \ + ./dictBuilder/zdict.c \ + ./dictBuilder/divsufsort.c \ + ./dictBuilder/cover.c \ + ./legacy/zstd_v03.c \ + ./legacy/zstd_v07.c \ + ./legacy/zstd_v06.c \ + ./legacy/zstd_v01.c \ + ./legacy/zstd_v02.c \ + ./legacy/zstd_v04.c \ + ./legacy/zstd_v05.c diff --git a/deps/SZ/zstd/Makefile.in b/deps/SZ/zstd/Makefile.in new file mode 100644 index 0000000000000000000000000000000000000000..f51b6e105ee5c5e565e3b10113153057f39d08bd --- /dev/null +++ b/deps/SZ/zstd/Makefile.in @@ -0,0 +1,1284 @@ +# Makefile.in generated by automake 1.16.2 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2020 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = zstd +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(include_HEADERS) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" +LTLIBRARIES = $(lib_LTLIBRARIES) +libzstd_la_LIBADD = +am__dirstamp = $(am__leading_dot)dirstamp +am_libzstd_la_OBJECTS = ./decompress/libzstd_la-zstd_decompress.lo \ + ./decompress/libzstd_la-huf_decompress.lo \ + ./compress/libzstd_la-zstd_lazy.lo \ + ./compress/libzstd_la-zstdmt_compress.lo \ + ./compress/libzstd_la-zstd_double_fast.lo \ + ./compress/libzstd_la-zstd_fast.lo \ + ./compress/libzstd_la-hist.lo \ + ./compress/libzstd_la-fse_compress.lo \ + ./compress/libzstd_la-zstd_opt.lo \ + ./compress/libzstd_la-zstd_compress.lo \ + ./compress/libzstd_la-huf_compress.lo \ + ./compress/libzstd_la-zstd_ldm.lo \ + ./common/libzstd_la-xxhash.lo \ + ./common/libzstd_la-fse_decompress.lo \ + ./common/libzstd_la-pool.lo ./common/libzstd_la-zstd_common.lo \ + ./common/libzstd_la-error_private.lo \ + ./common/libzstd_la-debug.lo ./common/libzstd_la-threading.lo \ + ./common/libzstd_la-entropy_common.lo \ + ./deprecated/libzstd_la-zbuff_compress.lo \ + ./deprecated/libzstd_la-zbuff_decompress.lo \ + ./deprecated/libzstd_la-zbuff_common.lo \ + ./dictBuilder/libzstd_la-zdict.lo \ + ./dictBuilder/libzstd_la-divsufsort.lo \ + ./dictBuilder/libzstd_la-cover.lo \ + ./legacy/libzstd_la-zstd_v03.lo \ + ./legacy/libzstd_la-zstd_v07.lo \ + ./legacy/libzstd_la-zstd_v06.lo \ + ./legacy/libzstd_la-zstd_v01.lo \ + ./legacy/libzstd_la-zstd_v02.lo \ + ./legacy/libzstd_la-zstd_v04.lo \ + ./legacy/libzstd_la-zstd_v05.lo +libzstd_la_OBJECTS = $(am_libzstd_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libzstd_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(libzstd_la_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./common/$(DEPDIR)/libzstd_la-debug.Plo \ + ./common/$(DEPDIR)/libzstd_la-entropy_common.Plo \ + ./common/$(DEPDIR)/libzstd_la-error_private.Plo \ + ./common/$(DEPDIR)/libzstd_la-fse_decompress.Plo \ + ./common/$(DEPDIR)/libzstd_la-pool.Plo \ + ./common/$(DEPDIR)/libzstd_la-threading.Plo \ + ./common/$(DEPDIR)/libzstd_la-xxhash.Plo \ + ./common/$(DEPDIR)/libzstd_la-zstd_common.Plo \ + ./compress/$(DEPDIR)/libzstd_la-fse_compress.Plo \ + ./compress/$(DEPDIR)/libzstd_la-hist.Plo \ + ./compress/$(DEPDIR)/libzstd_la-huf_compress.Plo \ + ./compress/$(DEPDIR)/libzstd_la-zstd_compress.Plo \ + ./compress/$(DEPDIR)/libzstd_la-zstd_double_fast.Plo \ + ./compress/$(DEPDIR)/libzstd_la-zstd_fast.Plo \ + ./compress/$(DEPDIR)/libzstd_la-zstd_lazy.Plo \ + ./compress/$(DEPDIR)/libzstd_la-zstd_ldm.Plo \ + ./compress/$(DEPDIR)/libzstd_la-zstd_opt.Plo \ + ./compress/$(DEPDIR)/libzstd_la-zstdmt_compress.Plo \ + ./decompress/$(DEPDIR)/libzstd_la-huf_decompress.Plo \ + ./decompress/$(DEPDIR)/libzstd_la-zstd_decompress.Plo \ + ./deprecated/$(DEPDIR)/libzstd_la-zbuff_common.Plo \ + ./deprecated/$(DEPDIR)/libzstd_la-zbuff_compress.Plo \ + ./deprecated/$(DEPDIR)/libzstd_la-zbuff_decompress.Plo \ + ./dictBuilder/$(DEPDIR)/libzstd_la-cover.Plo \ + ./dictBuilder/$(DEPDIR)/libzstd_la-divsufsort.Plo \ + ./dictBuilder/$(DEPDIR)/libzstd_la-zdict.Plo \ + ./legacy/$(DEPDIR)/libzstd_la-zstd_v01.Plo \ + ./legacy/$(DEPDIR)/libzstd_la-zstd_v02.Plo \ + ./legacy/$(DEPDIR)/libzstd_la-zstd_v03.Plo \ + ./legacy/$(DEPDIR)/libzstd_la-zstd_v04.Plo \ + ./legacy/$(DEPDIR)/libzstd_la-zstd_v05.Plo \ + ./legacy/$(DEPDIR)/libzstd_la-zstd_v06.Plo \ + ./legacy/$(DEPDIR)/libzstd_la-zstd_v07.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libzstd_la_SOURCES) +DIST_SOURCES = $(libzstd_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +HEADERS = $(include_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FC = @FC@ +FCFLAGS = @FCFLAGS@ +FGREP = @FGREP@ +GREP = @GREP@ +GSL_CFLAGS = @GSL_CFLAGS@ +GSL_CONFIG = @GSL_CONFIG@ +GSL_HDR = @GSL_HDR@ +GSL_LIB = @GSL_LIB@ +GSL_LIBS = @GSL_LIBS@ +GSL_STATIC = @GSL_STATIC@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_FLAGS = @OPENMP_FLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PASTRI_FLAGS = @PASTRI_FLAGS@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +RANDOMACCESS_FLAGS = @RANDOMACCESS_FLAGS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +TIMECMPR_FLAGS = @TIMECMPR_FLAGS@ +VERSION = @VERSION@ +WRITESTATS_FLAGS = @WRITESTATS_FLAGS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_FC = @ac_ct_FC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AUTOMAKE_OPTIONS = foreign +include_HEADERS = ./compress/zstdmt_compress.h \ + ./compress/zstd_opt.h \ + ./compress/zstd_ldm.h \ + ./compress/zstd_compress_internal.h \ + ./compress/hist.h \ + ./compress/zstd_fast.h \ + ./compress/zstd_double_fast.h \ + ./compress/zstd_lazy.h \ + ./common/fse.h \ + ./common/bitstream.h \ + ./common/mem.h \ + ./common/zstd_errors.h \ + ./common/compiler.h \ + ./common/debug.h \ + ./common/huf.h \ + ./common/zstd_internal.h \ + ./common/xxhash.h \ + ./common/cpu.h \ + ./common/pool.h \ + ./common/threading.h \ + ./common/error_private.h \ + ./deprecated/zbuff.h \ + ./dictBuilder/zdict.h \ + ./dictBuilder/divsufsort.h \ + ./legacy/zstd_v07.h \ + ./legacy/zstd_v02.h \ + ./legacy/zstd_v04.h \ + ./legacy/zstd_legacy.h \ + ./legacy/zstd_v06.h \ + ./legacy/zstd_v05.h \ + ./legacy/zstd_v01.h \ + ./legacy/zstd_v03.h \ + ./zstd.h + +lib_LTLIBRARIES = libzstd.la +libzstd_la_CFLAGS = -I./ -I./compress -I./common -I./deprecated -I./dictBuilder -I./legacy +libzstd_la_SOURCES = ./decompress/zstd_decompress.c \ + ./decompress/huf_decompress.c \ + ./compress/zstd_lazy.c \ + ./compress/zstdmt_compress.c \ + ./compress/zstd_double_fast.c \ + ./compress/zstd_fast.c \ + ./compress/hist.c \ + ./compress/fse_compress.c \ + ./compress/zstd_opt.c \ + ./compress/zstd_compress.c \ + ./compress/huf_compress.c \ + ./compress/zstd_ldm.c \ + ./common/xxhash.c \ + ./common/fse_decompress.c \ + ./common/pool.c \ + ./common/zstd_common.c \ + ./common/error_private.c \ + ./common/debug.c \ + ./common/threading.c \ + ./common/entropy_common.c \ + ./deprecated/zbuff_compress.c \ + ./deprecated/zbuff_decompress.c \ + ./deprecated/zbuff_common.c \ + ./dictBuilder/zdict.c \ + ./dictBuilder/divsufsort.c \ + ./dictBuilder/cover.c \ + ./legacy/zstd_v03.c \ + ./legacy/zstd_v07.c \ + ./legacy/zstd_v06.c \ + ./legacy/zstd_v01.c \ + ./legacy/zstd_v02.c \ + ./legacy/zstd_v04.c \ + ./legacy/zstd_v05.c + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign zstd/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign zstd/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } +decompress/$(am__dirstamp): + @$(MKDIR_P) ./decompress + @: > decompress/$(am__dirstamp) +decompress/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ./decompress/$(DEPDIR) + @: > decompress/$(DEPDIR)/$(am__dirstamp) +./decompress/libzstd_la-zstd_decompress.lo: \ + decompress/$(am__dirstamp) \ + decompress/$(DEPDIR)/$(am__dirstamp) +./decompress/libzstd_la-huf_decompress.lo: decompress/$(am__dirstamp) \ + decompress/$(DEPDIR)/$(am__dirstamp) +compress/$(am__dirstamp): + @$(MKDIR_P) ./compress + @: > compress/$(am__dirstamp) +compress/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ./compress/$(DEPDIR) + @: > compress/$(DEPDIR)/$(am__dirstamp) +./compress/libzstd_la-zstd_lazy.lo: compress/$(am__dirstamp) \ + compress/$(DEPDIR)/$(am__dirstamp) +./compress/libzstd_la-zstdmt_compress.lo: compress/$(am__dirstamp) \ + compress/$(DEPDIR)/$(am__dirstamp) +./compress/libzstd_la-zstd_double_fast.lo: compress/$(am__dirstamp) \ + compress/$(DEPDIR)/$(am__dirstamp) +./compress/libzstd_la-zstd_fast.lo: compress/$(am__dirstamp) \ + compress/$(DEPDIR)/$(am__dirstamp) +./compress/libzstd_la-hist.lo: compress/$(am__dirstamp) \ + compress/$(DEPDIR)/$(am__dirstamp) +./compress/libzstd_la-fse_compress.lo: compress/$(am__dirstamp) \ + compress/$(DEPDIR)/$(am__dirstamp) +./compress/libzstd_la-zstd_opt.lo: compress/$(am__dirstamp) \ + compress/$(DEPDIR)/$(am__dirstamp) +./compress/libzstd_la-zstd_compress.lo: compress/$(am__dirstamp) \ + compress/$(DEPDIR)/$(am__dirstamp) +./compress/libzstd_la-huf_compress.lo: compress/$(am__dirstamp) \ + compress/$(DEPDIR)/$(am__dirstamp) +./compress/libzstd_la-zstd_ldm.lo: compress/$(am__dirstamp) \ + compress/$(DEPDIR)/$(am__dirstamp) +common/$(am__dirstamp): + @$(MKDIR_P) ./common + @: > common/$(am__dirstamp) +common/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ./common/$(DEPDIR) + @: > common/$(DEPDIR)/$(am__dirstamp) +./common/libzstd_la-xxhash.lo: common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +./common/libzstd_la-fse_decompress.lo: common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +./common/libzstd_la-pool.lo: common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +./common/libzstd_la-zstd_common.lo: common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +./common/libzstd_la-error_private.lo: common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +./common/libzstd_la-debug.lo: common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +./common/libzstd_la-threading.lo: common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +./common/libzstd_la-entropy_common.lo: common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +deprecated/$(am__dirstamp): + @$(MKDIR_P) ./deprecated + @: > deprecated/$(am__dirstamp) +deprecated/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ./deprecated/$(DEPDIR) + @: > deprecated/$(DEPDIR)/$(am__dirstamp) +./deprecated/libzstd_la-zbuff_compress.lo: deprecated/$(am__dirstamp) \ + deprecated/$(DEPDIR)/$(am__dirstamp) +./deprecated/libzstd_la-zbuff_decompress.lo: \ + deprecated/$(am__dirstamp) \ + deprecated/$(DEPDIR)/$(am__dirstamp) +./deprecated/libzstd_la-zbuff_common.lo: deprecated/$(am__dirstamp) \ + deprecated/$(DEPDIR)/$(am__dirstamp) +dictBuilder/$(am__dirstamp): + @$(MKDIR_P) ./dictBuilder + @: > dictBuilder/$(am__dirstamp) +dictBuilder/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ./dictBuilder/$(DEPDIR) + @: > dictBuilder/$(DEPDIR)/$(am__dirstamp) +./dictBuilder/libzstd_la-zdict.lo: dictBuilder/$(am__dirstamp) \ + dictBuilder/$(DEPDIR)/$(am__dirstamp) +./dictBuilder/libzstd_la-divsufsort.lo: dictBuilder/$(am__dirstamp) \ + dictBuilder/$(DEPDIR)/$(am__dirstamp) +./dictBuilder/libzstd_la-cover.lo: dictBuilder/$(am__dirstamp) \ + dictBuilder/$(DEPDIR)/$(am__dirstamp) +legacy/$(am__dirstamp): + @$(MKDIR_P) ./legacy + @: > legacy/$(am__dirstamp) +legacy/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ./legacy/$(DEPDIR) + @: > legacy/$(DEPDIR)/$(am__dirstamp) +./legacy/libzstd_la-zstd_v03.lo: legacy/$(am__dirstamp) \ + legacy/$(DEPDIR)/$(am__dirstamp) +./legacy/libzstd_la-zstd_v07.lo: legacy/$(am__dirstamp) \ + legacy/$(DEPDIR)/$(am__dirstamp) +./legacy/libzstd_la-zstd_v06.lo: legacy/$(am__dirstamp) \ + legacy/$(DEPDIR)/$(am__dirstamp) +./legacy/libzstd_la-zstd_v01.lo: legacy/$(am__dirstamp) \ + legacy/$(DEPDIR)/$(am__dirstamp) +./legacy/libzstd_la-zstd_v02.lo: legacy/$(am__dirstamp) \ + legacy/$(DEPDIR)/$(am__dirstamp) +./legacy/libzstd_la-zstd_v04.lo: legacy/$(am__dirstamp) \ + legacy/$(DEPDIR)/$(am__dirstamp) +./legacy/libzstd_la-zstd_v05.lo: legacy/$(am__dirstamp) \ + legacy/$(DEPDIR)/$(am__dirstamp) + +libzstd.la: $(libzstd_la_OBJECTS) $(libzstd_la_DEPENDENCIES) $(EXTRA_libzstd_la_DEPENDENCIES) + $(AM_V_CCLD)$(libzstd_la_LINK) -rpath $(libdir) $(libzstd_la_OBJECTS) $(libzstd_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f ./common/*.$(OBJEXT) + -rm -f ./common/*.lo + -rm -f ./compress/*.$(OBJEXT) + -rm -f ./compress/*.lo + -rm -f ./decompress/*.$(OBJEXT) + -rm -f ./decompress/*.lo + -rm -f ./deprecated/*.$(OBJEXT) + -rm -f ./deprecated/*.lo + -rm -f ./dictBuilder/*.$(OBJEXT) + -rm -f ./dictBuilder/*.lo + -rm -f ./legacy/*.$(OBJEXT) + -rm -f ./legacy/*.lo + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./common/$(DEPDIR)/libzstd_la-debug.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./common/$(DEPDIR)/libzstd_la-entropy_common.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./common/$(DEPDIR)/libzstd_la-error_private.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./common/$(DEPDIR)/libzstd_la-fse_decompress.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./common/$(DEPDIR)/libzstd_la-pool.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./common/$(DEPDIR)/libzstd_la-threading.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./common/$(DEPDIR)/libzstd_la-xxhash.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./common/$(DEPDIR)/libzstd_la-zstd_common.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./compress/$(DEPDIR)/libzstd_la-fse_compress.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./compress/$(DEPDIR)/libzstd_la-hist.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./compress/$(DEPDIR)/libzstd_la-huf_compress.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./compress/$(DEPDIR)/libzstd_la-zstd_compress.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./compress/$(DEPDIR)/libzstd_la-zstd_double_fast.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./compress/$(DEPDIR)/libzstd_la-zstd_fast.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./compress/$(DEPDIR)/libzstd_la-zstd_lazy.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./compress/$(DEPDIR)/libzstd_la-zstd_ldm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./compress/$(DEPDIR)/libzstd_la-zstd_opt.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./compress/$(DEPDIR)/libzstd_la-zstdmt_compress.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./decompress/$(DEPDIR)/libzstd_la-huf_decompress.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./decompress/$(DEPDIR)/libzstd_la-zstd_decompress.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./deprecated/$(DEPDIR)/libzstd_la-zbuff_common.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./deprecated/$(DEPDIR)/libzstd_la-zbuff_compress.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./deprecated/$(DEPDIR)/libzstd_la-zbuff_decompress.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./dictBuilder/$(DEPDIR)/libzstd_la-cover.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./dictBuilder/$(DEPDIR)/libzstd_la-divsufsort.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./dictBuilder/$(DEPDIR)/libzstd_la-zdict.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./legacy/$(DEPDIR)/libzstd_la-zstd_v01.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./legacy/$(DEPDIR)/libzstd_la-zstd_v02.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./legacy/$(DEPDIR)/libzstd_la-zstd_v03.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./legacy/$(DEPDIR)/libzstd_la-zstd_v04.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./legacy/$(DEPDIR)/libzstd_la-zstd_v05.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./legacy/$(DEPDIR)/libzstd_la-zstd_v06.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./legacy/$(DEPDIR)/libzstd_la-zstd_v07.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +./decompress/libzstd_la-zstd_decompress.lo: ./decompress/zstd_decompress.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./decompress/libzstd_la-zstd_decompress.lo -MD -MP -MF ./decompress/$(DEPDIR)/libzstd_la-zstd_decompress.Tpo -c -o ./decompress/libzstd_la-zstd_decompress.lo `test -f './decompress/zstd_decompress.c' || echo '$(srcdir)/'`./decompress/zstd_decompress.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./decompress/$(DEPDIR)/libzstd_la-zstd_decompress.Tpo ./decompress/$(DEPDIR)/libzstd_la-zstd_decompress.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./decompress/zstd_decompress.c' object='./decompress/libzstd_la-zstd_decompress.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./decompress/libzstd_la-zstd_decompress.lo `test -f './decompress/zstd_decompress.c' || echo '$(srcdir)/'`./decompress/zstd_decompress.c + +./decompress/libzstd_la-huf_decompress.lo: ./decompress/huf_decompress.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./decompress/libzstd_la-huf_decompress.lo -MD -MP -MF ./decompress/$(DEPDIR)/libzstd_la-huf_decompress.Tpo -c -o ./decompress/libzstd_la-huf_decompress.lo `test -f './decompress/huf_decompress.c' || echo '$(srcdir)/'`./decompress/huf_decompress.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./decompress/$(DEPDIR)/libzstd_la-huf_decompress.Tpo ./decompress/$(DEPDIR)/libzstd_la-huf_decompress.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./decompress/huf_decompress.c' object='./decompress/libzstd_la-huf_decompress.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./decompress/libzstd_la-huf_decompress.lo `test -f './decompress/huf_decompress.c' || echo '$(srcdir)/'`./decompress/huf_decompress.c + +./compress/libzstd_la-zstd_lazy.lo: ./compress/zstd_lazy.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./compress/libzstd_la-zstd_lazy.lo -MD -MP -MF ./compress/$(DEPDIR)/libzstd_la-zstd_lazy.Tpo -c -o ./compress/libzstd_la-zstd_lazy.lo `test -f './compress/zstd_lazy.c' || echo '$(srcdir)/'`./compress/zstd_lazy.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./compress/$(DEPDIR)/libzstd_la-zstd_lazy.Tpo ./compress/$(DEPDIR)/libzstd_la-zstd_lazy.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./compress/zstd_lazy.c' object='./compress/libzstd_la-zstd_lazy.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./compress/libzstd_la-zstd_lazy.lo `test -f './compress/zstd_lazy.c' || echo '$(srcdir)/'`./compress/zstd_lazy.c + +./compress/libzstd_la-zstdmt_compress.lo: ./compress/zstdmt_compress.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./compress/libzstd_la-zstdmt_compress.lo -MD -MP -MF ./compress/$(DEPDIR)/libzstd_la-zstdmt_compress.Tpo -c -o ./compress/libzstd_la-zstdmt_compress.lo `test -f './compress/zstdmt_compress.c' || echo '$(srcdir)/'`./compress/zstdmt_compress.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./compress/$(DEPDIR)/libzstd_la-zstdmt_compress.Tpo ./compress/$(DEPDIR)/libzstd_la-zstdmt_compress.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./compress/zstdmt_compress.c' object='./compress/libzstd_la-zstdmt_compress.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./compress/libzstd_la-zstdmt_compress.lo `test -f './compress/zstdmt_compress.c' || echo '$(srcdir)/'`./compress/zstdmt_compress.c + +./compress/libzstd_la-zstd_double_fast.lo: ./compress/zstd_double_fast.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./compress/libzstd_la-zstd_double_fast.lo -MD -MP -MF ./compress/$(DEPDIR)/libzstd_la-zstd_double_fast.Tpo -c -o ./compress/libzstd_la-zstd_double_fast.lo `test -f './compress/zstd_double_fast.c' || echo '$(srcdir)/'`./compress/zstd_double_fast.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./compress/$(DEPDIR)/libzstd_la-zstd_double_fast.Tpo ./compress/$(DEPDIR)/libzstd_la-zstd_double_fast.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./compress/zstd_double_fast.c' object='./compress/libzstd_la-zstd_double_fast.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./compress/libzstd_la-zstd_double_fast.lo `test -f './compress/zstd_double_fast.c' || echo '$(srcdir)/'`./compress/zstd_double_fast.c + +./compress/libzstd_la-zstd_fast.lo: ./compress/zstd_fast.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./compress/libzstd_la-zstd_fast.lo -MD -MP -MF ./compress/$(DEPDIR)/libzstd_la-zstd_fast.Tpo -c -o ./compress/libzstd_la-zstd_fast.lo `test -f './compress/zstd_fast.c' || echo '$(srcdir)/'`./compress/zstd_fast.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./compress/$(DEPDIR)/libzstd_la-zstd_fast.Tpo ./compress/$(DEPDIR)/libzstd_la-zstd_fast.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./compress/zstd_fast.c' object='./compress/libzstd_la-zstd_fast.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./compress/libzstd_la-zstd_fast.lo `test -f './compress/zstd_fast.c' || echo '$(srcdir)/'`./compress/zstd_fast.c + +./compress/libzstd_la-hist.lo: ./compress/hist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./compress/libzstd_la-hist.lo -MD -MP -MF ./compress/$(DEPDIR)/libzstd_la-hist.Tpo -c -o ./compress/libzstd_la-hist.lo `test -f './compress/hist.c' || echo '$(srcdir)/'`./compress/hist.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./compress/$(DEPDIR)/libzstd_la-hist.Tpo ./compress/$(DEPDIR)/libzstd_la-hist.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./compress/hist.c' object='./compress/libzstd_la-hist.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./compress/libzstd_la-hist.lo `test -f './compress/hist.c' || echo '$(srcdir)/'`./compress/hist.c + +./compress/libzstd_la-fse_compress.lo: ./compress/fse_compress.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./compress/libzstd_la-fse_compress.lo -MD -MP -MF ./compress/$(DEPDIR)/libzstd_la-fse_compress.Tpo -c -o ./compress/libzstd_la-fse_compress.lo `test -f './compress/fse_compress.c' || echo '$(srcdir)/'`./compress/fse_compress.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./compress/$(DEPDIR)/libzstd_la-fse_compress.Tpo ./compress/$(DEPDIR)/libzstd_la-fse_compress.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./compress/fse_compress.c' object='./compress/libzstd_la-fse_compress.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./compress/libzstd_la-fse_compress.lo `test -f './compress/fse_compress.c' || echo '$(srcdir)/'`./compress/fse_compress.c + +./compress/libzstd_la-zstd_opt.lo: ./compress/zstd_opt.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./compress/libzstd_la-zstd_opt.lo -MD -MP -MF ./compress/$(DEPDIR)/libzstd_la-zstd_opt.Tpo -c -o ./compress/libzstd_la-zstd_opt.lo `test -f './compress/zstd_opt.c' || echo '$(srcdir)/'`./compress/zstd_opt.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./compress/$(DEPDIR)/libzstd_la-zstd_opt.Tpo ./compress/$(DEPDIR)/libzstd_la-zstd_opt.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./compress/zstd_opt.c' object='./compress/libzstd_la-zstd_opt.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./compress/libzstd_la-zstd_opt.lo `test -f './compress/zstd_opt.c' || echo '$(srcdir)/'`./compress/zstd_opt.c + +./compress/libzstd_la-zstd_compress.lo: ./compress/zstd_compress.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./compress/libzstd_la-zstd_compress.lo -MD -MP -MF ./compress/$(DEPDIR)/libzstd_la-zstd_compress.Tpo -c -o ./compress/libzstd_la-zstd_compress.lo `test -f './compress/zstd_compress.c' || echo '$(srcdir)/'`./compress/zstd_compress.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./compress/$(DEPDIR)/libzstd_la-zstd_compress.Tpo ./compress/$(DEPDIR)/libzstd_la-zstd_compress.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./compress/zstd_compress.c' object='./compress/libzstd_la-zstd_compress.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./compress/libzstd_la-zstd_compress.lo `test -f './compress/zstd_compress.c' || echo '$(srcdir)/'`./compress/zstd_compress.c + +./compress/libzstd_la-huf_compress.lo: ./compress/huf_compress.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./compress/libzstd_la-huf_compress.lo -MD -MP -MF ./compress/$(DEPDIR)/libzstd_la-huf_compress.Tpo -c -o ./compress/libzstd_la-huf_compress.lo `test -f './compress/huf_compress.c' || echo '$(srcdir)/'`./compress/huf_compress.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./compress/$(DEPDIR)/libzstd_la-huf_compress.Tpo ./compress/$(DEPDIR)/libzstd_la-huf_compress.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./compress/huf_compress.c' object='./compress/libzstd_la-huf_compress.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./compress/libzstd_la-huf_compress.lo `test -f './compress/huf_compress.c' || echo '$(srcdir)/'`./compress/huf_compress.c + +./compress/libzstd_la-zstd_ldm.lo: ./compress/zstd_ldm.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./compress/libzstd_la-zstd_ldm.lo -MD -MP -MF ./compress/$(DEPDIR)/libzstd_la-zstd_ldm.Tpo -c -o ./compress/libzstd_la-zstd_ldm.lo `test -f './compress/zstd_ldm.c' || echo '$(srcdir)/'`./compress/zstd_ldm.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./compress/$(DEPDIR)/libzstd_la-zstd_ldm.Tpo ./compress/$(DEPDIR)/libzstd_la-zstd_ldm.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./compress/zstd_ldm.c' object='./compress/libzstd_la-zstd_ldm.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./compress/libzstd_la-zstd_ldm.lo `test -f './compress/zstd_ldm.c' || echo '$(srcdir)/'`./compress/zstd_ldm.c + +./common/libzstd_la-xxhash.lo: ./common/xxhash.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./common/libzstd_la-xxhash.lo -MD -MP -MF ./common/$(DEPDIR)/libzstd_la-xxhash.Tpo -c -o ./common/libzstd_la-xxhash.lo `test -f './common/xxhash.c' || echo '$(srcdir)/'`./common/xxhash.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./common/$(DEPDIR)/libzstd_la-xxhash.Tpo ./common/$(DEPDIR)/libzstd_la-xxhash.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./common/xxhash.c' object='./common/libzstd_la-xxhash.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./common/libzstd_la-xxhash.lo `test -f './common/xxhash.c' || echo '$(srcdir)/'`./common/xxhash.c + +./common/libzstd_la-fse_decompress.lo: ./common/fse_decompress.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./common/libzstd_la-fse_decompress.lo -MD -MP -MF ./common/$(DEPDIR)/libzstd_la-fse_decompress.Tpo -c -o ./common/libzstd_la-fse_decompress.lo `test -f './common/fse_decompress.c' || echo '$(srcdir)/'`./common/fse_decompress.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./common/$(DEPDIR)/libzstd_la-fse_decompress.Tpo ./common/$(DEPDIR)/libzstd_la-fse_decompress.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./common/fse_decompress.c' object='./common/libzstd_la-fse_decompress.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./common/libzstd_la-fse_decompress.lo `test -f './common/fse_decompress.c' || echo '$(srcdir)/'`./common/fse_decompress.c + +./common/libzstd_la-pool.lo: ./common/pool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./common/libzstd_la-pool.lo -MD -MP -MF ./common/$(DEPDIR)/libzstd_la-pool.Tpo -c -o ./common/libzstd_la-pool.lo `test -f './common/pool.c' || echo '$(srcdir)/'`./common/pool.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./common/$(DEPDIR)/libzstd_la-pool.Tpo ./common/$(DEPDIR)/libzstd_la-pool.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./common/pool.c' object='./common/libzstd_la-pool.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./common/libzstd_la-pool.lo `test -f './common/pool.c' || echo '$(srcdir)/'`./common/pool.c + +./common/libzstd_la-zstd_common.lo: ./common/zstd_common.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./common/libzstd_la-zstd_common.lo -MD -MP -MF ./common/$(DEPDIR)/libzstd_la-zstd_common.Tpo -c -o ./common/libzstd_la-zstd_common.lo `test -f './common/zstd_common.c' || echo '$(srcdir)/'`./common/zstd_common.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./common/$(DEPDIR)/libzstd_la-zstd_common.Tpo ./common/$(DEPDIR)/libzstd_la-zstd_common.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./common/zstd_common.c' object='./common/libzstd_la-zstd_common.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./common/libzstd_la-zstd_common.lo `test -f './common/zstd_common.c' || echo '$(srcdir)/'`./common/zstd_common.c + +./common/libzstd_la-error_private.lo: ./common/error_private.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./common/libzstd_la-error_private.lo -MD -MP -MF ./common/$(DEPDIR)/libzstd_la-error_private.Tpo -c -o ./common/libzstd_la-error_private.lo `test -f './common/error_private.c' || echo '$(srcdir)/'`./common/error_private.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./common/$(DEPDIR)/libzstd_la-error_private.Tpo ./common/$(DEPDIR)/libzstd_la-error_private.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./common/error_private.c' object='./common/libzstd_la-error_private.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./common/libzstd_la-error_private.lo `test -f './common/error_private.c' || echo '$(srcdir)/'`./common/error_private.c + +./common/libzstd_la-debug.lo: ./common/debug.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./common/libzstd_la-debug.lo -MD -MP -MF ./common/$(DEPDIR)/libzstd_la-debug.Tpo -c -o ./common/libzstd_la-debug.lo `test -f './common/debug.c' || echo '$(srcdir)/'`./common/debug.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./common/$(DEPDIR)/libzstd_la-debug.Tpo ./common/$(DEPDIR)/libzstd_la-debug.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./common/debug.c' object='./common/libzstd_la-debug.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./common/libzstd_la-debug.lo `test -f './common/debug.c' || echo '$(srcdir)/'`./common/debug.c + +./common/libzstd_la-threading.lo: ./common/threading.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./common/libzstd_la-threading.lo -MD -MP -MF ./common/$(DEPDIR)/libzstd_la-threading.Tpo -c -o ./common/libzstd_la-threading.lo `test -f './common/threading.c' || echo '$(srcdir)/'`./common/threading.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./common/$(DEPDIR)/libzstd_la-threading.Tpo ./common/$(DEPDIR)/libzstd_la-threading.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./common/threading.c' object='./common/libzstd_la-threading.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./common/libzstd_la-threading.lo `test -f './common/threading.c' || echo '$(srcdir)/'`./common/threading.c + +./common/libzstd_la-entropy_common.lo: ./common/entropy_common.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./common/libzstd_la-entropy_common.lo -MD -MP -MF ./common/$(DEPDIR)/libzstd_la-entropy_common.Tpo -c -o ./common/libzstd_la-entropy_common.lo `test -f './common/entropy_common.c' || echo '$(srcdir)/'`./common/entropy_common.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./common/$(DEPDIR)/libzstd_la-entropy_common.Tpo ./common/$(DEPDIR)/libzstd_la-entropy_common.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./common/entropy_common.c' object='./common/libzstd_la-entropy_common.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./common/libzstd_la-entropy_common.lo `test -f './common/entropy_common.c' || echo '$(srcdir)/'`./common/entropy_common.c + +./deprecated/libzstd_la-zbuff_compress.lo: ./deprecated/zbuff_compress.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./deprecated/libzstd_la-zbuff_compress.lo -MD -MP -MF ./deprecated/$(DEPDIR)/libzstd_la-zbuff_compress.Tpo -c -o ./deprecated/libzstd_la-zbuff_compress.lo `test -f './deprecated/zbuff_compress.c' || echo '$(srcdir)/'`./deprecated/zbuff_compress.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./deprecated/$(DEPDIR)/libzstd_la-zbuff_compress.Tpo ./deprecated/$(DEPDIR)/libzstd_la-zbuff_compress.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./deprecated/zbuff_compress.c' object='./deprecated/libzstd_la-zbuff_compress.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./deprecated/libzstd_la-zbuff_compress.lo `test -f './deprecated/zbuff_compress.c' || echo '$(srcdir)/'`./deprecated/zbuff_compress.c + +./deprecated/libzstd_la-zbuff_decompress.lo: ./deprecated/zbuff_decompress.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./deprecated/libzstd_la-zbuff_decompress.lo -MD -MP -MF ./deprecated/$(DEPDIR)/libzstd_la-zbuff_decompress.Tpo -c -o ./deprecated/libzstd_la-zbuff_decompress.lo `test -f './deprecated/zbuff_decompress.c' || echo '$(srcdir)/'`./deprecated/zbuff_decompress.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./deprecated/$(DEPDIR)/libzstd_la-zbuff_decompress.Tpo ./deprecated/$(DEPDIR)/libzstd_la-zbuff_decompress.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./deprecated/zbuff_decompress.c' object='./deprecated/libzstd_la-zbuff_decompress.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./deprecated/libzstd_la-zbuff_decompress.lo `test -f './deprecated/zbuff_decompress.c' || echo '$(srcdir)/'`./deprecated/zbuff_decompress.c + +./deprecated/libzstd_la-zbuff_common.lo: ./deprecated/zbuff_common.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./deprecated/libzstd_la-zbuff_common.lo -MD -MP -MF ./deprecated/$(DEPDIR)/libzstd_la-zbuff_common.Tpo -c -o ./deprecated/libzstd_la-zbuff_common.lo `test -f './deprecated/zbuff_common.c' || echo '$(srcdir)/'`./deprecated/zbuff_common.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./deprecated/$(DEPDIR)/libzstd_la-zbuff_common.Tpo ./deprecated/$(DEPDIR)/libzstd_la-zbuff_common.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./deprecated/zbuff_common.c' object='./deprecated/libzstd_la-zbuff_common.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./deprecated/libzstd_la-zbuff_common.lo `test -f './deprecated/zbuff_common.c' || echo '$(srcdir)/'`./deprecated/zbuff_common.c + +./dictBuilder/libzstd_la-zdict.lo: ./dictBuilder/zdict.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./dictBuilder/libzstd_la-zdict.lo -MD -MP -MF ./dictBuilder/$(DEPDIR)/libzstd_la-zdict.Tpo -c -o ./dictBuilder/libzstd_la-zdict.lo `test -f './dictBuilder/zdict.c' || echo '$(srcdir)/'`./dictBuilder/zdict.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./dictBuilder/$(DEPDIR)/libzstd_la-zdict.Tpo ./dictBuilder/$(DEPDIR)/libzstd_la-zdict.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./dictBuilder/zdict.c' object='./dictBuilder/libzstd_la-zdict.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./dictBuilder/libzstd_la-zdict.lo `test -f './dictBuilder/zdict.c' || echo '$(srcdir)/'`./dictBuilder/zdict.c + +./dictBuilder/libzstd_la-divsufsort.lo: ./dictBuilder/divsufsort.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./dictBuilder/libzstd_la-divsufsort.lo -MD -MP -MF ./dictBuilder/$(DEPDIR)/libzstd_la-divsufsort.Tpo -c -o ./dictBuilder/libzstd_la-divsufsort.lo `test -f './dictBuilder/divsufsort.c' || echo '$(srcdir)/'`./dictBuilder/divsufsort.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./dictBuilder/$(DEPDIR)/libzstd_la-divsufsort.Tpo ./dictBuilder/$(DEPDIR)/libzstd_la-divsufsort.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./dictBuilder/divsufsort.c' object='./dictBuilder/libzstd_la-divsufsort.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./dictBuilder/libzstd_la-divsufsort.lo `test -f './dictBuilder/divsufsort.c' || echo '$(srcdir)/'`./dictBuilder/divsufsort.c + +./dictBuilder/libzstd_la-cover.lo: ./dictBuilder/cover.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./dictBuilder/libzstd_la-cover.lo -MD -MP -MF ./dictBuilder/$(DEPDIR)/libzstd_la-cover.Tpo -c -o ./dictBuilder/libzstd_la-cover.lo `test -f './dictBuilder/cover.c' || echo '$(srcdir)/'`./dictBuilder/cover.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./dictBuilder/$(DEPDIR)/libzstd_la-cover.Tpo ./dictBuilder/$(DEPDIR)/libzstd_la-cover.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./dictBuilder/cover.c' object='./dictBuilder/libzstd_la-cover.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./dictBuilder/libzstd_la-cover.lo `test -f './dictBuilder/cover.c' || echo '$(srcdir)/'`./dictBuilder/cover.c + +./legacy/libzstd_la-zstd_v03.lo: ./legacy/zstd_v03.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./legacy/libzstd_la-zstd_v03.lo -MD -MP -MF ./legacy/$(DEPDIR)/libzstd_la-zstd_v03.Tpo -c -o ./legacy/libzstd_la-zstd_v03.lo `test -f './legacy/zstd_v03.c' || echo '$(srcdir)/'`./legacy/zstd_v03.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./legacy/$(DEPDIR)/libzstd_la-zstd_v03.Tpo ./legacy/$(DEPDIR)/libzstd_la-zstd_v03.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./legacy/zstd_v03.c' object='./legacy/libzstd_la-zstd_v03.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./legacy/libzstd_la-zstd_v03.lo `test -f './legacy/zstd_v03.c' || echo '$(srcdir)/'`./legacy/zstd_v03.c + +./legacy/libzstd_la-zstd_v07.lo: ./legacy/zstd_v07.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./legacy/libzstd_la-zstd_v07.lo -MD -MP -MF ./legacy/$(DEPDIR)/libzstd_la-zstd_v07.Tpo -c -o ./legacy/libzstd_la-zstd_v07.lo `test -f './legacy/zstd_v07.c' || echo '$(srcdir)/'`./legacy/zstd_v07.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./legacy/$(DEPDIR)/libzstd_la-zstd_v07.Tpo ./legacy/$(DEPDIR)/libzstd_la-zstd_v07.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./legacy/zstd_v07.c' object='./legacy/libzstd_la-zstd_v07.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./legacy/libzstd_la-zstd_v07.lo `test -f './legacy/zstd_v07.c' || echo '$(srcdir)/'`./legacy/zstd_v07.c + +./legacy/libzstd_la-zstd_v06.lo: ./legacy/zstd_v06.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./legacy/libzstd_la-zstd_v06.lo -MD -MP -MF ./legacy/$(DEPDIR)/libzstd_la-zstd_v06.Tpo -c -o ./legacy/libzstd_la-zstd_v06.lo `test -f './legacy/zstd_v06.c' || echo '$(srcdir)/'`./legacy/zstd_v06.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./legacy/$(DEPDIR)/libzstd_la-zstd_v06.Tpo ./legacy/$(DEPDIR)/libzstd_la-zstd_v06.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./legacy/zstd_v06.c' object='./legacy/libzstd_la-zstd_v06.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./legacy/libzstd_la-zstd_v06.lo `test -f './legacy/zstd_v06.c' || echo '$(srcdir)/'`./legacy/zstd_v06.c + +./legacy/libzstd_la-zstd_v01.lo: ./legacy/zstd_v01.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./legacy/libzstd_la-zstd_v01.lo -MD -MP -MF ./legacy/$(DEPDIR)/libzstd_la-zstd_v01.Tpo -c -o ./legacy/libzstd_la-zstd_v01.lo `test -f './legacy/zstd_v01.c' || echo '$(srcdir)/'`./legacy/zstd_v01.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./legacy/$(DEPDIR)/libzstd_la-zstd_v01.Tpo ./legacy/$(DEPDIR)/libzstd_la-zstd_v01.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./legacy/zstd_v01.c' object='./legacy/libzstd_la-zstd_v01.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./legacy/libzstd_la-zstd_v01.lo `test -f './legacy/zstd_v01.c' || echo '$(srcdir)/'`./legacy/zstd_v01.c + +./legacy/libzstd_la-zstd_v02.lo: ./legacy/zstd_v02.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./legacy/libzstd_la-zstd_v02.lo -MD -MP -MF ./legacy/$(DEPDIR)/libzstd_la-zstd_v02.Tpo -c -o ./legacy/libzstd_la-zstd_v02.lo `test -f './legacy/zstd_v02.c' || echo '$(srcdir)/'`./legacy/zstd_v02.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./legacy/$(DEPDIR)/libzstd_la-zstd_v02.Tpo ./legacy/$(DEPDIR)/libzstd_la-zstd_v02.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./legacy/zstd_v02.c' object='./legacy/libzstd_la-zstd_v02.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./legacy/libzstd_la-zstd_v02.lo `test -f './legacy/zstd_v02.c' || echo '$(srcdir)/'`./legacy/zstd_v02.c + +./legacy/libzstd_la-zstd_v04.lo: ./legacy/zstd_v04.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./legacy/libzstd_la-zstd_v04.lo -MD -MP -MF ./legacy/$(DEPDIR)/libzstd_la-zstd_v04.Tpo -c -o ./legacy/libzstd_la-zstd_v04.lo `test -f './legacy/zstd_v04.c' || echo '$(srcdir)/'`./legacy/zstd_v04.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./legacy/$(DEPDIR)/libzstd_la-zstd_v04.Tpo ./legacy/$(DEPDIR)/libzstd_la-zstd_v04.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./legacy/zstd_v04.c' object='./legacy/libzstd_la-zstd_v04.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./legacy/libzstd_la-zstd_v04.lo `test -f './legacy/zstd_v04.c' || echo '$(srcdir)/'`./legacy/zstd_v04.c + +./legacy/libzstd_la-zstd_v05.lo: ./legacy/zstd_v05.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -MT ./legacy/libzstd_la-zstd_v05.lo -MD -MP -MF ./legacy/$(DEPDIR)/libzstd_la-zstd_v05.Tpo -c -o ./legacy/libzstd_la-zstd_v05.lo `test -f './legacy/zstd_v05.c' || echo '$(srcdir)/'`./legacy/zstd_v05.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ./legacy/$(DEPDIR)/libzstd_la-zstd_v05.Tpo ./legacy/$(DEPDIR)/libzstd_la-zstd_v05.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='./legacy/zstd_v05.c' object='./legacy/libzstd_la-zstd_v05.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libzstd_la_CFLAGS) $(CFLAGS) -c -o ./legacy/libzstd_la-zstd_v05.lo `test -f './legacy/zstd_v05.c' || echo '$(srcdir)/'`./legacy/zstd_v05.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + -rm -rf ./common/.libs ./common/_libs + -rm -rf ./compress/.libs ./compress/_libs + -rm -rf ./decompress/.libs ./decompress/_libs + -rm -rf ./deprecated/.libs ./deprecated/_libs + -rm -rf ./dictBuilder/.libs ./dictBuilder/_libs + -rm -rf ./legacy/.libs ./legacy/_libs +install-includeHEADERS: $(include_HEADERS) + @$(NORMAL_INSTALL) + @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(includedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(includedir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \ + done + +uninstall-includeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir) + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) $(HEADERS) +installdirs: + for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f common/$(DEPDIR)/$(am__dirstamp) + -rm -f common/$(am__dirstamp) + -rm -f compress/$(DEPDIR)/$(am__dirstamp) + -rm -f compress/$(am__dirstamp) + -rm -f decompress/$(DEPDIR)/$(am__dirstamp) + -rm -f decompress/$(am__dirstamp) + -rm -f deprecated/$(DEPDIR)/$(am__dirstamp) + -rm -f deprecated/$(am__dirstamp) + -rm -f dictBuilder/$(DEPDIR)/$(am__dirstamp) + -rm -f dictBuilder/$(am__dirstamp) + -rm -f legacy/$(DEPDIR)/$(am__dirstamp) + -rm -f legacy/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./common/$(DEPDIR)/libzstd_la-debug.Plo + -rm -f ./common/$(DEPDIR)/libzstd_la-entropy_common.Plo + -rm -f ./common/$(DEPDIR)/libzstd_la-error_private.Plo + -rm -f ./common/$(DEPDIR)/libzstd_la-fse_decompress.Plo + -rm -f ./common/$(DEPDIR)/libzstd_la-pool.Plo + -rm -f ./common/$(DEPDIR)/libzstd_la-threading.Plo + -rm -f ./common/$(DEPDIR)/libzstd_la-xxhash.Plo + -rm -f ./common/$(DEPDIR)/libzstd_la-zstd_common.Plo + -rm -f ./compress/$(DEPDIR)/libzstd_la-fse_compress.Plo + -rm -f ./compress/$(DEPDIR)/libzstd_la-hist.Plo + -rm -f ./compress/$(DEPDIR)/libzstd_la-huf_compress.Plo + -rm -f ./compress/$(DEPDIR)/libzstd_la-zstd_compress.Plo + -rm -f ./compress/$(DEPDIR)/libzstd_la-zstd_double_fast.Plo + -rm -f ./compress/$(DEPDIR)/libzstd_la-zstd_fast.Plo + -rm -f ./compress/$(DEPDIR)/libzstd_la-zstd_lazy.Plo + -rm -f ./compress/$(DEPDIR)/libzstd_la-zstd_ldm.Plo + -rm -f ./compress/$(DEPDIR)/libzstd_la-zstd_opt.Plo + -rm -f ./compress/$(DEPDIR)/libzstd_la-zstdmt_compress.Plo + -rm -f ./decompress/$(DEPDIR)/libzstd_la-huf_decompress.Plo + -rm -f ./decompress/$(DEPDIR)/libzstd_la-zstd_decompress.Plo + -rm -f ./deprecated/$(DEPDIR)/libzstd_la-zbuff_common.Plo + -rm -f ./deprecated/$(DEPDIR)/libzstd_la-zbuff_compress.Plo + -rm -f ./deprecated/$(DEPDIR)/libzstd_la-zbuff_decompress.Plo + -rm -f ./dictBuilder/$(DEPDIR)/libzstd_la-cover.Plo + -rm -f ./dictBuilder/$(DEPDIR)/libzstd_la-divsufsort.Plo + -rm -f ./dictBuilder/$(DEPDIR)/libzstd_la-zdict.Plo + -rm -f ./legacy/$(DEPDIR)/libzstd_la-zstd_v01.Plo + -rm -f ./legacy/$(DEPDIR)/libzstd_la-zstd_v02.Plo + -rm -f ./legacy/$(DEPDIR)/libzstd_la-zstd_v03.Plo + -rm -f ./legacy/$(DEPDIR)/libzstd_la-zstd_v04.Plo + -rm -f ./legacy/$(DEPDIR)/libzstd_la-zstd_v05.Plo + -rm -f ./legacy/$(DEPDIR)/libzstd_la-zstd_v06.Plo + -rm -f ./legacy/$(DEPDIR)/libzstd_la-zstd_v07.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-includeHEADERS + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-libLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./common/$(DEPDIR)/libzstd_la-debug.Plo + -rm -f ./common/$(DEPDIR)/libzstd_la-entropy_common.Plo + -rm -f ./common/$(DEPDIR)/libzstd_la-error_private.Plo + -rm -f ./common/$(DEPDIR)/libzstd_la-fse_decompress.Plo + -rm -f ./common/$(DEPDIR)/libzstd_la-pool.Plo + -rm -f ./common/$(DEPDIR)/libzstd_la-threading.Plo + -rm -f ./common/$(DEPDIR)/libzstd_la-xxhash.Plo + -rm -f ./common/$(DEPDIR)/libzstd_la-zstd_common.Plo + -rm -f ./compress/$(DEPDIR)/libzstd_la-fse_compress.Plo + -rm -f ./compress/$(DEPDIR)/libzstd_la-hist.Plo + -rm -f ./compress/$(DEPDIR)/libzstd_la-huf_compress.Plo + -rm -f ./compress/$(DEPDIR)/libzstd_la-zstd_compress.Plo + -rm -f ./compress/$(DEPDIR)/libzstd_la-zstd_double_fast.Plo + -rm -f ./compress/$(DEPDIR)/libzstd_la-zstd_fast.Plo + -rm -f ./compress/$(DEPDIR)/libzstd_la-zstd_lazy.Plo + -rm -f ./compress/$(DEPDIR)/libzstd_la-zstd_ldm.Plo + -rm -f ./compress/$(DEPDIR)/libzstd_la-zstd_opt.Plo + -rm -f ./compress/$(DEPDIR)/libzstd_la-zstdmt_compress.Plo + -rm -f ./decompress/$(DEPDIR)/libzstd_la-huf_decompress.Plo + -rm -f ./decompress/$(DEPDIR)/libzstd_la-zstd_decompress.Plo + -rm -f ./deprecated/$(DEPDIR)/libzstd_la-zbuff_common.Plo + -rm -f ./deprecated/$(DEPDIR)/libzstd_la-zbuff_compress.Plo + -rm -f ./deprecated/$(DEPDIR)/libzstd_la-zbuff_decompress.Plo + -rm -f ./dictBuilder/$(DEPDIR)/libzstd_la-cover.Plo + -rm -f ./dictBuilder/$(DEPDIR)/libzstd_la-divsufsort.Plo + -rm -f ./dictBuilder/$(DEPDIR)/libzstd_la-zdict.Plo + -rm -f ./legacy/$(DEPDIR)/libzstd_la-zstd_v01.Plo + -rm -f ./legacy/$(DEPDIR)/libzstd_la-zstd_v02.Plo + -rm -f ./legacy/$(DEPDIR)/libzstd_la-zstd_v03.Plo + -rm -f ./legacy/$(DEPDIR)/libzstd_la-zstd_v04.Plo + -rm -f ./legacy/$(DEPDIR)/libzstd_la-zstd_v05.Plo + -rm -f ./legacy/$(DEPDIR)/libzstd_la-zstd_v06.Plo + -rm -f ./legacy/$(DEPDIR)/libzstd_la-zstd_v07.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-includeHEADERS uninstall-libLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-generic clean-libLTLIBRARIES clean-libtool cscopelist-am \ + ctags ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am \ + install-includeHEADERS install-info install-info-am \ + install-libLTLIBRARIES install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am uninstall-includeHEADERS \ + uninstall-libLTLIBRARIES + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/deps/SZ/zstd/README.md b/deps/SZ/zstd/README.md new file mode 100644 index 0000000000000000000000000000000000000000..75debe872f93fb19aca0f4367f88d76fbf7ccaf8 --- /dev/null +++ b/deps/SZ/zstd/README.md @@ -0,0 +1,119 @@ +Zstandard library files +================================ + +The __lib__ directory is split into several sub-directories, +in order to make it easier to select or exclude features. + + +#### Building + +`Makefile` script is provided, supporting all standard [Makefile conventions](https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html#Makefile-Conventions), +including commands variables, staged install, directory variables and standard targets. +- `make` : generates both static and dynamic libraries +- `make install` : install libraries in default system directories + +`libzstd` default scope includes compression, decompression, dictionary building, +and decoding support for legacy formats >= v0.4.0. + + +#### API + +Zstandard's stable API is exposed within [lib/zstd.h](zstd.h). + + +#### Advanced API + +Optional advanced features are exposed via : + +- `lib/common/zstd_errors.h` : translates `size_t` function results + into an `ZSTD_ErrorCode`, for accurate error handling. +- `ZSTD_STATIC_LINKING_ONLY` : if this macro is defined _before_ including `zstd.h`, + it unlocks access to advanced experimental API, + exposed in second part of `zstd.h`. + These APIs are not "stable", their definition may change in the future. + As a consequence, it shall ___never be used with dynamic library___ ! + Only static linking is allowed. + + +#### Modular build + +It's possible to compile only a limited set of features. + +- Directory `lib/common` is always required, for all variants. +- Compression source code lies in `lib/compress` +- Decompression source code lies in `lib/decompress` +- It's possible to include only `compress` or only `decompress`, they don't depend on each other. +- `lib/dictBuilder` : makes it possible to generate dictionaries from a set of samples. + The API is exposed in `lib/dictBuilder/zdict.h`. + This module depends on both `lib/common` and `lib/compress` . +- `lib/legacy` : source code to decompress legacy zstd formats, starting from `v0.1.0`. + This module depends on `lib/common` and `lib/decompress`. + To enable this feature, it's required to define `ZSTD_LEGACY_SUPPORT` during compilation. + Typically, with `gcc`, add argument `-DZSTD_LEGACY_SUPPORT=1`. + Using higher number limits versions supported. + For example, `ZSTD_LEGACY_SUPPORT=2` means : "support legacy formats >= v0.2.0". + `ZSTD_LEGACY_SUPPORT=3` means : "support legacy formats >= v0.3.0", and so on. + Starting v0.8.0, all versions of `zstd` produce frames compliant with specification. + As a consequence, `ZSTD_LEGACY_SUPPORT=8` (or more) doesn't trigger legacy support. + Also, `ZSTD_LEGACY_SUPPORT=0` means "do __not__ support legacy formats". + Once enabled, this capability is transparently triggered within decompression functions. + It's also possible to invoke directly legacy API, as exposed in `lib/legacy/zstd_legacy.h`. + Each version also provides an additional dedicated set of advanced API. + For example, advanced API for version `v0.4` is exposed in `lib/legacy/zstd_v04.h` . + Note : `lib/legacy` only supports _decoding_ legacy formats. +- Similarly, you can define `ZSTD_LIB_COMPRESSION, ZSTD_LIB_DECOMPRESSION`, `ZSTD_LIB_DICTBUILDER`, + and `ZSTD_LIB_DEPRECATED` as 0 to forgo compilation of the corresponding features. This will + also disable compilation of all dependencies (eg. `ZSTD_LIB_COMPRESSION=0` will also disable + dictBuilder). + + +#### Multithreading support + +Multithreading is disabled by default when building with `make`. +Enabling multithreading requires 2 conditions : +- set macro `ZSTD_MULTITHREAD` +- on POSIX systems : compile with pthread (`-pthread` compilation flag for `gcc`) + +Both conditions are automatically triggered by invoking `make lib-mt` target. +Note that, when linking a POSIX program with a multithreaded version of `libzstd`, +it's necessary to trigger `-pthread` flag during link stage. + +Multithreading capabilities are exposed +via [advanced API `ZSTD_compress_generic()` defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/dev/lib/zstd.h#L919). +This API is still considered experimental, +but is expected to become "stable" at some point in the future. + + +#### Windows : using MinGW+MSYS to create DLL + +DLL can be created using MinGW+MSYS with the `make libzstd` command. +This command creates `dll\libzstd.dll` and the import library `dll\libzstd.lib`. +The import library is only required with Visual C++. +The header file `zstd.h` and the dynamic library `dll\libzstd.dll` are required to +compile a project using gcc/MinGW. +The dynamic library has to be added to linking options. +It means that if a project that uses ZSTD consists of a single `test-dll.c` +file it should be linked with `dll\libzstd.dll`. For example: +``` + gcc $(CFLAGS) -Iinclude/ test-dll.c -o test-dll dll\libzstd.dll +``` +The compiled executable will require ZSTD DLL which is available at `dll\libzstd.dll`. + + +#### Deprecated API + +Obsolete API on their way out are stored in directory `lib/deprecated`. +At this stage, it contains older streaming prototypes, in `lib/deprecated/zbuff.h`. +These prototypes will be removed in some future version. +Consider migrating code towards supported streaming API exposed in `zstd.h`. + + +#### Miscellaneous + +The other files are not source code. There are : + + - `LICENSE` : contains the BSD license text + - `Makefile` : `make` script to build and install zstd library (static and dynamic) + - `BUCK` : support for `buck` build system (https://buckbuild.com/) + - `libzstd.pc.in` : for `pkg-config` (used in `make install`) + - `README.md` : this file diff --git a/deps/SZ/zstd/common/bitstream.h b/deps/SZ/zstd/common/bitstream.h new file mode 100644 index 0000000000000000000000000000000000000000..2f91460c5eb0a0e319c8f0b91e4f8e041ff59da5 --- /dev/null +++ b/deps/SZ/zstd/common/bitstream.h @@ -0,0 +1,458 @@ +/* ****************************************************************** + bitstream + Part of FSE library + Copyright (C) 2013-present, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy +****************************************************************** */ +#ifndef BITSTREAM_H_MODULE +#define BITSTREAM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + +/* +* This API consists of small unitary functions, which must be inlined for best performance. +* Since link-time-optimization is not available for all compilers, +* these functions are defined into a .h to be included. +*/ + +/*-**************************************** +* Dependencies +******************************************/ +#include "mem.h" /* unaligned access routines */ +#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */ +#include "error_private.h" /* error codes and messages */ + + +/*========================================= +* Target specific +=========================================*/ +#if defined(__BMI__) && defined(__GNUC__) +# include /* support for bextr (experimental) */ +#endif + +#define STREAM_ACCUMULATOR_MIN_32 25 +#define STREAM_ACCUMULATOR_MIN_64 57 +#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) + + +/*-****************************************** +* bitStream encoding API (write forward) +********************************************/ +/* bitStream can mix input from multiple sources. + * A critical property of these streams is that they encode and decode in **reverse** direction. + * So the first bit sequence you add will be the last to be read, like a LIFO stack. + */ +typedef struct { + size_t bitContainer; + unsigned bitPos; + char* startPtr; + char* ptr; + char* endPtr; +} BIT_CStream_t; + +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); + +/* Start with initCStream, providing the size of buffer to write into. +* bitStream will never write outside of this buffer. +* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. +* +* bits are first added to a local register. +* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. +* Writing data into memory is an explicit operation, performed by the flushBits function. +* Hence keep track how many bits are potentially stored into local register to avoid register overflow. +* After a flushBits, a maximum of 7 bits might still be stored into local register. +* +* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. +* +* Last operation is to close the bitStream. +* The function returns the final size of CStream in bytes. +* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) +*/ + + +/*-******************************************** +* bitStream decoding API (read backward) +**********************************************/ +typedef struct { + size_t bitContainer; + unsigned bitsConsumed; + const char* ptr; + const char* start; + const char* limitPtr; +} BIT_DStream_t; + +typedef enum { BIT_DStream_unfinished = 0, + BIT_DStream_endOfBuffer = 1, + BIT_DStream_completed = 2, + BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ + /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ + +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); + + +/* Start by invoking BIT_initDStream(). +* A chunk of the bitStream is then stored into a local register. +* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +* You can then retrieve bitFields stored into the local register, **in reverse order**. +* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. +* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. +* Otherwise, it can be less than that, so proceed accordingly. +* Checking if DStream has reached its end can be performed with BIT_endOfDStream(). +*/ + + +/*-**************************************** +* unsafe API +******************************************/ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ + +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); +/* unsafe version; does not check buffer overflow */ + +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); +/* faster, but works only if nbBits >= 1 */ + + + +/*-************************************************************** +* Internal functions +****************************************************************/ +MEM_STATIC unsigned BIT_highbit32 (U32 val) +{ + assert(val != 0); + { +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + _BitScanReverse ( &r, val ); + return (unsigned) r; +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ + return 31 - __builtin_clz (val); +# else /* Software version */ + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, + 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, + 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; +# endif + } +} + +/*===== Local Constants =====*/ +static const unsigned BIT_mask[] = { + 0, 1, 3, 7, 0xF, 0x1F, + 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, + 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, + 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, + 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF, + 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */ +#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0])) + +/*-************************************************************** +* bitStream encoding +****************************************************************/ +/*! BIT_initCStream() : + * `dstCapacity` must be > sizeof(size_t) + * @return : 0 if success, + * otherwise an error code (can be tested using ERR_isError()) */ +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, + void* startPtr, size_t dstCapacity) +{ + bitC->bitContainer = 0; + bitC->bitPos = 0; + bitC->startPtr = (char*)startPtr; + bitC->ptr = bitC->startPtr; + bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer); + if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall); + return 0; +} + +/*! BIT_addBits() : + * can add up to 31 bits into `bitC`. + * Note : does not check for register overflow ! */ +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, + size_t value, unsigned nbBits) +{ + MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32); + assert(nbBits < BIT_MASK_SIZE); + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); + bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_addBitsFast() : + * works only if `value` is _clean_, + * meaning all high bits above nbBits are 0 */ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, + size_t value, unsigned nbBits) +{ + assert((value>>nbBits) == 0); + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); + bitC->bitContainer |= value << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_flushBitsFast() : + * assumption : bitContainer has not overflowed + * unsafe version; does not check buffer overflow */ +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + assert(bitC->ptr <= bitC->endPtr); + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; +} + +/*! BIT_flushBits() : + * assumption : bitContainer has not overflowed + * safe version; check for buffer overflow, and prevents it. + * note : does not signal buffer overflow. + * overflow will be revealed later on using BIT_closeCStream() */ +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; +} + +/*! BIT_closeCStream() : + * @return : size of CStream, in bytes, + * or 0 if it could not fit into dstBuffer */ +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) +{ + BIT_addBitsFast(bitC, 1, 1); /* endMark */ + BIT_flushBits(bitC); + if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ + return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); +} + + +/*-******************************************************** +* bitStream decoding +**********************************************************/ +/*! BIT_initDStream() : + * Initialize a BIT_DStream_t. + * `bitD` : a pointer to an already allocated BIT_DStream_t structure. + * `srcSize` must be the *exact* size of the bitStream, in bytes. + * @return : size of stream (== srcSize), or an errorCode if a problem is detected + */ +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) +{ + if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } + + bitD->start = (const char*)srcBuffer; + bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer); + + if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ + bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); + bitD->bitContainer = MEM_readLEST(bitD->ptr); + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ + if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } + } else { + bitD->ptr = bitD->start; + bitD->bitContainer = *(const BYTE*)(bitD->start); + switch(srcSize) + { + case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); + /* fall-through */ + + case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); + /* fall-through */ + + case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); + /* fall-through */ + + case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; + /* fall-through */ + + case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; + /* fall-through */ + + case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; + /* fall-through */ + + default: break; + } + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; + if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ + } + bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; + } + + return srcSize; +} + +MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) +{ + return bitContainer >> start; +} + +MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) +{ +#if defined(__BMI__) && defined(__GNUC__) && __GNUC__*1000+__GNUC_MINOR__ >= 4008 /* experimental */ +# if defined(__x86_64__) + if (sizeof(bitContainer)==8) + return _bextr_u64(bitContainer, start, nbBits); + else +# endif + return _bextr_u32(bitContainer, start, nbBits); +#else + assert(nbBits < BIT_MASK_SIZE); + return (bitContainer >> start) & BIT_mask[nbBits]; +#endif +} + +MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) +{ + assert(nbBits < BIT_MASK_SIZE); + return bitContainer & BIT_mask[nbBits]; +} + +/*! BIT_lookBits() : + * Provides next n bits from local register. + * local register is not modified. + * On 32-bits, maxNbBits==24. + * On 64-bits, maxNbBits==56. + * @return : value extracted */ +MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) +{ +#if defined(__BMI__) && defined(__GNUC__) /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */ + return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); +#else + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; + return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); +#endif +} + +/*! BIT_lookBitsFast() : + * unsafe version; only works if nbBits >= 1 */ +MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) +{ + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; + assert(nbBits >= 1); + return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); +} + +MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) +{ + bitD->bitsConsumed += nbBits; +} + +/*! BIT_readBits() : + * Read (consume) next n bits from local register and update. + * Pay attention to not read more than nbBits contained into local register. + * @return : extracted value. */ +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) +{ + size_t const value = BIT_lookBits(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*! BIT_readBitsFast() : + * unsafe version; only works only if nbBits >= 1 */ +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) +{ + size_t const value = BIT_lookBitsFast(bitD, nbBits); + assert(nbBits >= 1); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*! BIT_reloadDStream() : + * Refill `bitD` from buffer previously set in BIT_initDStream() . + * This function is safe, it guarantees it will not read beyond src buffer. + * @return : status of `BIT_DStream_t` internal register. + * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) +{ + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ + return BIT_DStream_overflow; + + if (bitD->ptr >= bitD->limitPtr) { + bitD->ptr -= bitD->bitsConsumed >> 3; + bitD->bitsConsumed &= 7; + bitD->bitContainer = MEM_readLEST(bitD->ptr); + return BIT_DStream_unfinished; + } + if (bitD->ptr == bitD->start) { + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; + return BIT_DStream_completed; + } + /* start < ptr < limitPtr */ + { U32 nbBytes = bitD->bitsConsumed >> 3; + BIT_DStream_status result = BIT_DStream_unfinished; + if (bitD->ptr - nbBytes < bitD->start) { + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ + result = BIT_DStream_endOfBuffer; + } + bitD->ptr -= nbBytes; + bitD->bitsConsumed -= nbBytes*8; + bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */ + return result; + } +} + +/*! BIT_endOfDStream() : + * @return : 1 if DStream has _exactly_ reached its end (all bits consumed). + */ +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) +{ + return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* BITSTREAM_H_MODULE */ diff --git a/deps/SZ/zstd/common/compiler.h b/deps/SZ/zstd/common/compiler.h new file mode 100644 index 0000000000000000000000000000000000000000..366ed2b4b4fe00595a945bad50a2f88dae9e4682 --- /dev/null +++ b/deps/SZ/zstd/common/compiler.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPILER_H +#define ZSTD_COMPILER_H + +/*-******************************************************* +* Compiler specifics +*********************************************************/ +/* force inlining */ +#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# define INLINE_KEYWORD inline +#else +# define INLINE_KEYWORD +#endif + +#if defined(__GNUC__) +# define FORCE_INLINE_ATTR __attribute__((always_inline)) +#elif defined(_MSC_VER) +# define FORCE_INLINE_ATTR __forceinline +#else +# define FORCE_INLINE_ATTR +#endif + +/** + * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant + * parameters. They must be inlined for the compiler to elimininate the constant + * branches. + */ +#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR +/** + * HINT_INLINE is used to help the compiler generate better code. It is *not* + * used for "templates", so it can be tweaked based on the compilers + * performance. + * + * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the + * always_inline attribute. + * + * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline + * attribute. + */ +#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 +# define HINT_INLINE static INLINE_KEYWORD +#else +# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR +#endif + +/* force no inlining */ +#ifdef _MSC_VER +# define FORCE_NOINLINE static __declspec(noinline) +#else +# ifdef __GNUC__ +# define FORCE_NOINLINE static __attribute__((__noinline__)) +# else +# define FORCE_NOINLINE static +# endif +#endif + +/* target attribute */ +#ifndef __has_attribute + #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ +#endif +#if defined(__GNUC__) +# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) +#else +# define TARGET_ATTRIBUTE(target) +#endif + +/* Enable runtime BMI2 dispatch based on the CPU. + * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. + */ +#ifndef DYNAMIC_BMI2 + #if ((defined(__clang__) && __has_attribute(__target__)) \ + || (defined(__GNUC__) \ + && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ + && (defined(__x86_64__) || defined(_M_X86)) \ + && !defined(__BMI2__) + # define DYNAMIC_BMI2 1 + #else + # define DYNAMIC_BMI2 0 + #endif +#endif + +/* prefetch */ +#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ +# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ +# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0) +#elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) +# define PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0) +#else +# define PREFETCH(ptr) /* disabled */ +#endif + +/* disable warnings */ +#ifdef _MSC_VER /* Visual Studio */ +# include /* For Visual 2005 */ +# pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ +# pragma warning(disable : 4324) /* disable: C4324: padded structure */ +#endif + +#endif /* ZSTD_COMPILER_H */ diff --git a/deps/SZ/zstd/common/cpu.h b/deps/SZ/zstd/common/cpu.h new file mode 100644 index 0000000000000000000000000000000000000000..88e0ebf44f86207afe6ddd688f9cb2dea714f14e --- /dev/null +++ b/deps/SZ/zstd/common/cpu.h @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2018-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMMON_CPU_H +#define ZSTD_COMMON_CPU_H + +/** + * Implementation taken from folly/CpuId.h + * https://github.com/facebook/folly/blob/master/folly/CpuId.h + */ + +#include + +#include "mem.h" + +#ifdef _MSC_VER +#include +#endif + +typedef struct { + U32 f1c; + U32 f1d; + U32 f7b; + U32 f7c; +} ZSTD_cpuid_t; + +MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { + U32 f1c = 0; + U32 f1d = 0; + U32 f7b = 0; + U32 f7c = 0; +#ifdef _MSC_VER + int reg[4]; + __cpuid((int*)reg, 0); + { + int const n = reg[0]; + if (n >= 1) { + __cpuid((int*)reg, 1); + f1c = (U32)reg[2]; + f1d = (U32)reg[3]; + } + if (n >= 7) { + __cpuidex((int*)reg, 7, 0); + f7b = (U32)reg[1]; + f7c = (U32)reg[2]; + } + } +#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__) + /* The following block like the normal cpuid branch below, but gcc + * reserves ebx for use of its pic register so we must specially + * handle the save and restore to avoid clobbering the register + */ + U32 n; + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "popl %%ebx\n\t" + : "=a"(n) + : "a"(0) + : "ecx", "edx"); + if (n >= 1) { + U32 f1a; + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "popl %%ebx\n\t" + : "=a"(f1a), "=c"(f1c), "=d"(f1d) + : "a"(1)); + } + if (n >= 7) { + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "movl %%ebx, %%eax\n\r" + "popl %%ebx" + : "=a"(f7b), "=c"(f7c) + : "a"(7), "c"(0) + : "edx"); + } +#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__) + U32 n; + __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx"); + if (n >= 1) { + U32 f1a; + __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx"); + } + if (n >= 7) { + U32 f7a; + __asm__("cpuid" + : "=a"(f7a), "=b"(f7b), "=c"(f7c) + : "a"(7), "c"(0) + : "edx"); + } +#endif + { + ZSTD_cpuid_t cpuid; + cpuid.f1c = f1c; + cpuid.f1d = f1d; + cpuid.f7b = f7b; + cpuid.f7c = f7c; + return cpuid; + } +} + +#define X(name, r, bit) \ + MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \ + return ((cpuid.r) & (1U << bit)) != 0; \ + } + +/* cpuid(1): Processor Info and Feature Bits. */ +#define C(name, bit) X(name, f1c, bit) + C(sse3, 0) + C(pclmuldq, 1) + C(dtes64, 2) + C(monitor, 3) + C(dscpl, 4) + C(vmx, 5) + C(smx, 6) + C(eist, 7) + C(tm2, 8) + C(ssse3, 9) + C(cnxtid, 10) + C(fma, 12) + C(cx16, 13) + C(xtpr, 14) + C(pdcm, 15) + C(pcid, 17) + C(dca, 18) + C(sse41, 19) + C(sse42, 20) + C(x2apic, 21) + C(movbe, 22) + C(popcnt, 23) + C(tscdeadline, 24) + C(aes, 25) + C(xsave, 26) + C(osxsave, 27) + C(avx, 28) + C(f16c, 29) + C(rdrand, 30) +#undef C +#define D(name, bit) X(name, f1d, bit) + D(fpu, 0) + D(vme, 1) + D(de, 2) + D(pse, 3) + D(tsc, 4) + D(msr, 5) + D(pae, 6) + D(mce, 7) + D(cx8, 8) + D(apic, 9) + D(sep, 11) + D(mtrr, 12) + D(pge, 13) + D(mca, 14) + D(cmov, 15) + D(pat, 16) + D(pse36, 17) + D(psn, 18) + D(clfsh, 19) + D(ds, 21) + D(acpi, 22) + D(mmx, 23) + D(fxsr, 24) + D(sse, 25) + D(sse2, 26) + D(ss, 27) + D(htt, 28) + D(tm, 29) + D(pbe, 31) +#undef D + +/* cpuid(7): Extended Features. */ +#define B(name, bit) X(name, f7b, bit) + B(bmi1, 3) + B(hle, 4) + B(avx2, 5) + B(smep, 7) + B(bmi2, 8) + B(erms, 9) + B(invpcid, 10) + B(rtm, 11) + B(mpx, 14) + B(avx512f, 16) + B(avx512dq, 17) + B(rdseed, 18) + B(adx, 19) + B(smap, 20) + B(avx512ifma, 21) + B(pcommit, 22) + B(clflushopt, 23) + B(clwb, 24) + B(avx512pf, 26) + B(avx512er, 27) + B(avx512cd, 28) + B(sha, 29) + B(avx512bw, 30) + B(avx512vl, 31) +#undef B +#define C(name, bit) X(name, f7c, bit) + C(prefetchwt1, 0) + C(avx512vbmi, 1) +#undef C + +#undef X + +#endif /* ZSTD_COMMON_CPU_H */ diff --git a/deps/SZ/zstd/common/debug.c b/deps/SZ/zstd/common/debug.c new file mode 100644 index 0000000000000000000000000000000000000000..3ebdd1cb15a6288e871f361f5d3fbbd8835d1d90 --- /dev/null +++ b/deps/SZ/zstd/common/debug.c @@ -0,0 +1,44 @@ +/* ****************************************************************** + debug + Part of FSE library + Copyright (C) 2013-present, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy +****************************************************************** */ + + +/* + * This module only hosts one global variable + * which can be used to dynamically influence the verbosity of traces, + * such as DEBUGLOG and RAWLOG + */ + +#include "debug.h" + +int g_debuglevel = DEBUGLEVEL; diff --git a/deps/SZ/zstd/common/debug.h b/deps/SZ/zstd/common/debug.h new file mode 100644 index 0000000000000000000000000000000000000000..0c04ad2cc98c4b949035141c56a6385ed9797012 --- /dev/null +++ b/deps/SZ/zstd/common/debug.h @@ -0,0 +1,123 @@ +/* ****************************************************************** + debug + Part of FSE library + Copyright (C) 2013-present, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy +****************************************************************** */ + + +/* + * The purpose of this header is to enable debug functions. + * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time, + * and DEBUG_STATIC_ASSERT() for compile-time. + * + * By default, DEBUGLEVEL==0, which means run-time debug is disabled. + * + * Level 1 enables assert() only. + * Starting level 2, traces can be generated and pushed to stderr. + * The higher the level, the more verbose the traces. + * + * It's possible to dynamically adjust level using variable g_debug_level, + * which is only declared if DEBUGLEVEL>=2, + * and is a global variable, not multi-thread protected (use with care) + */ + +#ifndef DEBUG_H_12987983217 +#define DEBUG_H_12987983217 + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* static assert is triggered at compile time, leaving no runtime artefact, + * but can only work with compile-time constants. + * This variant can only be used inside a function. */ +#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1]) + + +/* DEBUGLEVEL is expected to be defined externally, + * typically through compiler command line. + * Value must be a number. */ +#ifndef DEBUGLEVEL +# define DEBUGLEVEL 0 +#endif + +/* recommended values for DEBUGLEVEL : + * 0 : no debug, all run-time functions disabled + * 1 : no display, enables assert() only + * 2 : reserved, for currently active debug path + * 3 : events once per object lifetime (CCtx, CDict, etc.) + * 4 : events once per frame + * 5 : events once per block + * 6 : events once per sequence (verbose) + * 7+: events at every position (*very* verbose) + * + * It's generally inconvenient to output traces > 5. + * In which case, it's possible to selectively enable higher verbosity levels + * by modifying g_debug_level. + */ + +#if (DEBUGLEVEL>=1) +# include +#else +# ifndef assert /* assert may be already defined, due to prior #include */ +# define assert(condition) ((void)0) /* disable assert (default) */ +# endif +#endif + +#if (DEBUGLEVEL>=2) +# include +extern int g_debuglevel; /* here, this variable is only declared, + it actually lives in debug.c, + and is shared by the whole process. + It's typically used to enable very verbose levels + on selective conditions (such as position in src) */ + +# define RAWLOG(l, ...) { \ + if (l<=g_debuglevel) { \ + fprintf(stderr, __VA_ARGS__); \ + } } +# define DEBUGLOG(l, ...) { \ + if (l<=g_debuglevel) { \ + fprintf(stderr, __FILE__ ": " __VA_ARGS__); \ + fprintf(stderr, " \n"); \ + } } +#else +# define RAWLOG(l, ...) {} /* disabled */ +# define DEBUGLOG(l, ...) {} /* disabled */ +#endif + + +#if defined (__cplusplus) +} +#endif + +#endif /* DEBUG_H_12987983217 */ diff --git a/deps/SZ/zstd/common/entropy_common.c b/deps/SZ/zstd/common/entropy_common.c new file mode 100644 index 0000000000000000000000000000000000000000..b12944e1de93ad10a0349fd9075bb0258128697c --- /dev/null +++ b/deps/SZ/zstd/common/entropy_common.c @@ -0,0 +1,236 @@ +/* + Common functions of New Generation Entropy library + Copyright (C) 2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +*************************************************************************** */ + +/* ************************************* +* Dependencies +***************************************/ +#include "mem.h" +#include "error_private.h" /* ERR_*, ERROR */ +#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ +#include "fse.h" +#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ +#include "huf.h" + + +/*=== Version ===*/ +unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } + + +/*=== Error Management ===*/ +unsigned FSE_isError(size_t code) { return ERR_isError(code); } +const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } + +unsigned HUF_isError(size_t code) { return ERR_isError(code); } +const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } + + +/*-************************************************************** +* FSE NCount encoding-decoding +****************************************************************/ +size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + const BYTE* const istart = (const BYTE*) headerBuffer; + const BYTE* const iend = istart + hbSize; + const BYTE* ip = istart; + int nbBits; + int remaining; + int threshold; + U32 bitStream; + int bitCount; + unsigned charnum = 0; + int previous0 = 0; + + if (hbSize < 4) { + /* This function only works when hbSize >= 4 */ + char buffer[4]; + memset(buffer, 0, sizeof(buffer)); + memcpy(buffer, headerBuffer, hbSize); + { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, + buffer, sizeof(buffer)); + if (FSE_isError(countSize)) return countSize; + if (countSize > hbSize) return ERROR(corruption_detected); + return countSize; + } } + assert(hbSize >= 4); + + /* init */ + memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ + bitStream = MEM_readLE32(ip); + nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ + if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); + bitStream >>= 4; + bitCount = 4; + *tableLogPtr = nbBits; + remaining = (1<1) & (charnum<=*maxSVPtr)) { + if (previous0) { + unsigned n0 = charnum; + while ((bitStream & 0xFFFF) == 0xFFFF) { + n0 += 24; + if (ip < iend-5) { + ip += 2; + bitStream = MEM_readLE32(ip) >> bitCount; + } else { + bitStream >>= 16; + bitCount += 16; + } } + while ((bitStream & 3) == 3) { + n0 += 3; + bitStream >>= 2; + bitCount += 2; + } + n0 += bitStream & 3; + bitCount += 2; + if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); + while (charnum < n0) normalizedCounter[charnum++] = 0; + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + assert((bitCount >> 3) <= 3); /* For first condition to work */ + ip += bitCount>>3; + bitCount &= 7; + bitStream = MEM_readLE32(ip) >> bitCount; + } else { + bitStream >>= 2; + } } + { int const max = (2*threshold-1) - remaining; + int count; + + if ((bitStream & (threshold-1)) < (U32)max) { + count = bitStream & (threshold-1); + bitCount += nbBits-1; + } else { + count = bitStream & (2*threshold-1); + if (count >= threshold) count -= max; + bitCount += nbBits; + } + + count--; /* extra accuracy */ + remaining -= count < 0 ? -count : count; /* -1 means +1 */ + normalizedCounter[charnum++] = (short)count; + previous0 = !count; + while (remaining < threshold) { + nbBits--; + threshold >>= 1; + } + + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + ip += bitCount>>3; + bitCount &= 7; + } else { + bitCount -= (int)(8 * (iend - 4 - ip)); + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> (bitCount & 31); + } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ + if (remaining != 1) return ERROR(corruption_detected); + if (bitCount > 32) return ERROR(corruption_detected); + *maxSVPtr = charnum-1; + + ip += (bitCount+7)>>3; + return ip-istart; +} + + +/*! HUF_readStats() : + Read compact Huffman tree, saved by HUF_writeCTable(). + `huffWeight` is destination buffer. + `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. + @return : size read from `src` , or an error Code . + Note : Needed by HUF_readCTable() and HUF_readDTableX?() . +*/ +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize) +{ + U32 weightTotal; + const BYTE* ip = (const BYTE*) src; + size_t iSize; + size_t oSize; + + if (!srcSize) return ERROR(srcSize_wrong); + iSize = ip[0]; + /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ + + if (iSize >= 128) { /* special header */ + oSize = iSize - 127; + iSize = ((oSize+1)/2); + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + if (oSize >= hwSize) return ERROR(corruption_detected); + ip += 1; + { U32 n; + for (n=0; n> 4; + huffWeight[n+1] = ip[n/2] & 15; + } } } + else { /* header compressed with FSE (normal case) */ + FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */ + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */ + if (FSE_isError(oSize)) return oSize; + } + + /* collect weight stats */ + memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); + weightTotal = 0; + { U32 n; for (n=0; n= HUF_TABLELOG_MAX) return ERROR(corruption_detected); + rankStats[huffWeight[n]]++; + weightTotal += (1 << huffWeight[n]) >> 1; + } } + if (weightTotal == 0) return ERROR(corruption_detected); + + /* get last non-null symbol weight (implied, total must be 2^n) */ + { U32 const tableLog = BIT_highbit32(weightTotal) + 1; + if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); + *tableLogPtr = tableLog; + /* determine last weight */ + { U32 const total = 1 << tableLog; + U32 const rest = total - weightTotal; + U32 const verif = 1 << BIT_highbit32(rest); + U32 const lastWeight = BIT_highbit32(rest) + 1; + if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ + huffWeight[oSize] = (BYTE)lastWeight; + rankStats[lastWeight]++; + } } + + /* check tree construction validity */ + if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ + + /* results */ + *nbSymbolsPtr = (U32)(oSize+1); + return iSize+1; +} diff --git a/deps/SZ/zstd/common/error_private.c b/deps/SZ/zstd/common/error_private.c new file mode 100644 index 0000000000000000000000000000000000000000..d004ee636c67971408ad81d457ac80d445c6097b --- /dev/null +++ b/deps/SZ/zstd/common/error_private.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* The purpose of this file is to have a single list of error strings embedded in binary */ + +#include "error_private.h" + +const char* ERR_getErrorString(ERR_enum code) +{ + static const char* const notErrorCode = "Unspecified error code"; + switch( code ) + { + case PREFIX(no_error): return "No error detected"; + case PREFIX(GENERIC): return "Error (generic)"; + case PREFIX(prefix_unknown): return "Unknown frame descriptor"; + case PREFIX(version_unsupported): return "Version not supported"; + case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; + case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; + case PREFIX(corruption_detected): return "Corrupted block detected"; + case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; + case PREFIX(parameter_unsupported): return "Unsupported parameter"; + case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; + case PREFIX(init_missing): return "Context should be init first"; + case PREFIX(memory_allocation): return "Allocation error : not enough memory"; + case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough"; + case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; + case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; + case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; + case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; + case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; + case PREFIX(dictionary_wrong): return "Dictionary mismatch"; + case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; + case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; + case PREFIX(srcSize_wrong): return "Src size is incorrect"; + /* following error codes are not stable and may be removed or changed in a future version */ + case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; + case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; + case PREFIX(maxCode): + default: return notErrorCode; + } +} diff --git a/deps/SZ/zstd/common/error_private.h b/deps/SZ/zstd/common/error_private.h new file mode 100644 index 0000000000000000000000000000000000000000..0d2fa7e34b01459d3f5afa858ae133cfdd9db144 --- /dev/null +++ b/deps/SZ/zstd/common/error_private.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* Note : this module is expected to remain private, do not expose it */ + +#ifndef ERROR_H_MODULE +#define ERROR_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* **************************************** +* Dependencies +******************************************/ +#include /* size_t */ +#include "zstd_errors.h" /* enum list */ + + +/* **************************************** +* Compiler-specific +******************************************/ +#if defined(__GNUC__) +# define ERR_STATIC static __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define ERR_STATIC static inline +#elif defined(_MSC_VER) +# define ERR_STATIC static __inline +#else +# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/*-**************************************** +* Customization (error_public.h) +******************************************/ +typedef ZSTD_ErrorCode ERR_enum; +#define PREFIX(name) ZSTD_error_##name + + +/*-**************************************** +* Error codes handling +******************************************/ +#undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */ +#define ERROR(name) ZSTD_ERROR(name) +#define ZSTD_ERROR(name) ((size_t)-PREFIX(name)) + +ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } + +ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } + + +/*-**************************************** +* Error Strings +******************************************/ + +const char* ERR_getErrorString(ERR_enum code); /* error_private.c */ + +ERR_STATIC const char* ERR_getErrorName(size_t code) +{ + return ERR_getErrorString(ERR_getErrorCode(code)); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* ERROR_H_MODULE */ diff --git a/deps/SZ/zstd/common/fse.h b/deps/SZ/zstd/common/fse.h new file mode 100644 index 0000000000000000000000000000000000000000..a5a6b6d4db70062053511bcc3a278a1b3d9b7287 --- /dev/null +++ b/deps/SZ/zstd/common/fse.h @@ -0,0 +1,708 @@ +/* ****************************************************************** + FSE : Finite State Entropy codec + Public Prototypes declaration + Copyright (C) 2013-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy +****************************************************************** */ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef FSE_H +#define FSE_H + + +/*-***************************************** +* Dependencies +******************************************/ +#include /* size_t, ptrdiff_t */ + + +/*-***************************************** +* FSE_PUBLIC_API : control library symbols visibility +******************************************/ +#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) +# define FSE_PUBLIC_API __attribute__ ((visibility ("default"))) +#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ +# define FSE_PUBLIC_API __declspec(dllexport) +#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) +# define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define FSE_PUBLIC_API +#endif + +/*------ Version ------*/ +#define FSE_VERSION_MAJOR 0 +#define FSE_VERSION_MINOR 9 +#define FSE_VERSION_RELEASE 0 + +#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE +#define FSE_QUOTE(str) #str +#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str) +#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION) + +#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE) +FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */ + + +/*-**************************************** +* FSE simple functions +******************************************/ +/*! FSE_compress() : + Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. + 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize). + @return : size of compressed data (<= dstCapacity). + Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! + if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. + if FSE_isError(return), compression failed (more details using FSE_getErrorName()) +*/ +FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +/*! FSE_decompress(): + Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', + into already allocated destination buffer 'dst', of size 'dstCapacity'. + @return : size of regenerated data (<= maxDstSize), + or an error code, which can be tested using FSE_isError() . + + ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!! + Why ? : making this distinction requires a header. + Header management is intentionally delegated to the user layer, which can better manage special cases. +*/ +FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity, + const void* cSrc, size_t cSrcSize); + + +/*-***************************************** +* Tool functions +******************************************/ +FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */ + +/* Error Management */ +FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ +FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ + + +/*-***************************************** +* FSE advanced functions +******************************************/ +/*! FSE_compress2() : + Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog' + Both parameters can be defined as '0' to mean : use default value + @return : size of compressed data + Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!! + if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. + if FSE_isError(return), it's an error code. +*/ +FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); + + +/*-***************************************** +* FSE detailed API +******************************************/ +/*! +FSE_compress() does the following: +1. count symbol occurrence from source[] into table count[] (see hist.h) +2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) +3. save normalized counters to memory buffer using writeNCount() +4. build encoding table 'CTable' from normalized counters +5. encode the data stream using encoding table 'CTable' + +FSE_decompress() does the following: +1. read normalized counters with readNCount() +2. build decoding table 'DTable' from normalized counters +3. decode the data stream using decoding table 'DTable' + +The following API allows targeting specific sub-functions for advanced tasks. +For example, it's possible to compress several blocks using the same 'CTable', +or to save and provide normalized distribution using external method. +*/ + +/* *** COMPRESSION *** */ + +/*! FSE_optimalTableLog(): + dynamically downsize 'tableLog' when conditions are met. + It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. + @return : recommended tableLog (necessarily <= 'maxTableLog') */ +FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); + +/*! FSE_normalizeCount(): + normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) + 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). + @return : tableLog, + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, + const unsigned* count, size_t srcSize, unsigned maxSymbolValue); + +/*! FSE_NCountWriteBound(): + Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. + Typically useful for allocation purpose. */ +FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_writeNCount(): + Compactly save 'normalizedCounter' into 'buffer'. + @return : size of the compressed table, + or an errorCode, which can be tested using FSE_isError(). */ +FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, + const short* normalizedCounter, + unsigned maxSymbolValue, unsigned tableLog); + +/*! Constructor and Destructor of FSE_CTable. + Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ +typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ +FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog); +FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct); + +/*! FSE_buildCTable(): + Builds `ct`, which must be already allocated, using FSE_createCTable(). + @return : 0, or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_compress_usingCTable(): + Compress `src` using `ct` into `dst` which must be already allocated. + @return : size of compressed data (<= `dstCapacity`), + or 0 if compressed data could not fit into `dst`, + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct); + +/*! +Tutorial : +---------- +The first step is to count all symbols. FSE_count() does this job very fast. +Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells. +'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0] +maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value) +FSE_count() will return the number of occurrence of the most frequent symbol. +This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). + +The next step is to normalize the frequencies. +FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'. +It also guarantees a minimum of 1 to any Symbol with frequency >= 1. +You can use 'tableLog'==0 to mean "use default tableLog value". +If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(), +which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default"). + +The result of FSE_normalizeCount() will be saved into a table, +called 'normalizedCounter', which is a table of signed short. +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells. +The return value is tableLog if everything proceeded as expected. +It is 0 if there is a single symbol within distribution. +If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()). + +'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount(). +'buffer' must be already allocated. +For guaranteed success, buffer size must be at least FSE_headerBound(). +The result of the function is the number of bytes written into 'buffer'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small). + +'normalizedCounter' can then be used to create the compression table 'CTable'. +The space required by 'CTable' must be already allocated, using FSE_createCTable(). +You can then use FSE_buildCTable() to fill 'CTable'. +If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()). + +'CTable' can then be used to compress 'src', with FSE_compress_usingCTable(). +Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize' +The function returns the size of compressed data (without header), necessarily <= `dstCapacity`. +If it returns '0', compressed data could not fit into 'dst'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). +*/ + + +/* *** DECOMPRESSION *** */ + +/*! FSE_readNCount(): + Read compactly saved 'normalizedCounter' from 'rBuffer'. + @return : size read from 'rBuffer', + or an errorCode, which can be tested using FSE_isError(). + maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ +FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, + unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, + const void* rBuffer, size_t rBuffSize); + +/*! Constructor and Destructor of FSE_DTable. + Note that its size depends on 'tableLog' */ +typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ +FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog); +FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt); + +/*! FSE_buildDTable(): + Builds 'dt', which must be already allocated, using FSE_createDTable(). + return : 0, or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_decompress_usingDTable(): + Decompress compressed source `cSrc` of size `cSrcSize` using `dt` + into `dst` which must be already allocated. + @return : size of regenerated data (necessarily <= `dstCapacity`), + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); + +/*! +Tutorial : +---------- +(Note : these functions only decompress FSE-compressed blocks. + If block is uncompressed, use memcpy() instead + If block is a single repeated byte, use memset() instead ) + +The first step is to obtain the normalized frequencies of symbols. +This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount(). +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short. +In practice, that means it's necessary to know 'maxSymbolValue' beforehand, +or size the table to handle worst case situations (typically 256). +FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'. +The result of FSE_readNCount() is the number of bytes read from 'rBuffer'. +Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that. +If there is an error, the function will return an error code, which can be tested using FSE_isError(). + +The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'. +This is performed by the function FSE_buildDTable(). +The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). + +`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable(). +`cSrcSize` must be strictly correct, otherwise decompression will fail. +FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) +*/ + +#endif /* FSE_H */ + +#if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY) +#define FSE_H_FSE_STATIC_LINKING_ONLY + +/* *** Dependency *** */ +#include "bitstream.h" + + +/* ***************************************** +* Static allocation +*******************************************/ +/* FSE buffer bounds */ +#define FSE_NCOUNTBOUND 512 +#define FSE_BLOCKBOUND(size) (size + (size>>7)) +#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ +#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) +#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1< 12) ? (1 << (maxTableLog - 2)) : 1024) ) +size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); + +size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); +/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */ + +size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); +/**< build a fake FSE_CTable, designed to compress always the same symbolValue */ + +/* FSE_buildCTable_wksp() : + * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). + * `wkspSize` must be >= `(1<= BIT_DStream_completed + +When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. +Checking if DStream has reached its end is performed by : + BIT_endOfDStream(&DStream); +Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. + FSE_endOfDState(&DState); +*/ + + +/* ***************************************** +* FSE unsafe API +*******************************************/ +static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); +/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ + + +/* ***************************************** +* Implementation of inlined functions +*******************************************/ +typedef struct { + int deltaFindState; + U32 deltaNbBits; +} FSE_symbolCompressionTransform; /* total 8 bytes */ + +MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) +{ + const void* ptr = ct; + const U16* u16ptr = (const U16*) ptr; + const U32 tableLog = MEM_read16(ptr); + statePtr->value = (ptrdiff_t)1<stateTable = u16ptr+2; + statePtr->symbolTT = ((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1)); + statePtr->stateLog = tableLog; +} + + +/*! FSE_initCState2() : +* Same as FSE_initCState(), but the first symbol to include (which will be the last to be read) +* uses the smallest state value possible, saving the cost of this symbol */ +MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol) +{ + FSE_initCState(statePtr, ct); + { const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* stateTable = (const U16*)(statePtr->stateTable); + U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16); + statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits; + statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; + } +} + +MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol) +{ + FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* const stateTable = (const U16*)(statePtr->stateTable); + U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); + BIT_addBits(bitC, statePtr->value, nbBitsOut); + statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; +} + +MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr) +{ + BIT_addBits(bitC, statePtr->value, statePtr->stateLog); + BIT_flushBits(bitC); +} + + +/* FSE_getMaxNbBits() : + * Approximate maximum cost of a symbol, in bits. + * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) + * note 1 : assume symbolValue is valid (<= maxSymbolValue) + * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ +MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) +{ + const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; + return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16; +} + +/* FSE_bitCost() : + * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits) + * note 1 : assume symbolValue is valid (<= maxSymbolValue) + * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ +MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog) +{ + const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; + U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16; + U32 const threshold = (minNbBits+1) << 16; + assert(tableLog < 16); + assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */ + { U32 const tableSize = 1 << tableLog; + U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize); + U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */ + U32 const bitMultiplier = 1 << accuracyLog; + assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold); + assert(normalizedDeltaFromThreshold <= bitMultiplier); + return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold; + } +} + + +/* ====== Decompression ====== */ + +typedef struct { + U16 tableLog; + U16 fastMode; +} FSE_DTableHeader; /* sizeof U32 */ + +typedef struct +{ + unsigned short newState; + unsigned char symbol; + unsigned char nbBits; +} FSE_decode_t; /* size == U32 */ + +MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + return DInfo.symbol; +} + +MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.newState + lowBits; +} + +MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBits(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +/*! FSE_decodeSymbolFast() : + unsafe, only works if no symbol has a probability > 50% */ +MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBitsFast(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) +{ + return DStatePtr->state == 0; +} + + + +#ifndef FSE_COMMONDEFS_ONLY + +/* ************************************************************** +* Tuning parameters +****************************************************************/ +/*!MEMORY_USAGE : +* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect +* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ +#ifndef FSE_MAX_MEMORY_USAGE +# define FSE_MAX_MEMORY_USAGE 14 +#endif +#ifndef FSE_DEFAULT_MEMORY_USAGE +# define FSE_DEFAULT_MEMORY_USAGE 13 +#endif + +/*!FSE_MAX_SYMBOL_VALUE : +* Maximum symbol value authorized. +* Required for proper stack allocation */ +#ifndef FSE_MAX_SYMBOL_VALUE +# define FSE_MAX_SYMBOL_VALUE 255 +#endif + +/* ************************************************************** +* template functions type & suffix +****************************************************************/ +#define FSE_FUNCTION_TYPE BYTE +#define FSE_FUNCTION_EXTENSION +#define FSE_DECODE_TYPE FSE_decode_t + + +#endif /* !FSE_COMMONDEFS_ONLY */ + + +/* *************************************************************** +* Constants +*****************************************************************/ +#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) +#define FSE_MAX_TABLESIZE (1U< FSE_TABLELOG_ABSOLUTE_MAX +# error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" +#endif + +#define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3) + + +#endif /* FSE_STATIC_LINKING_ONLY */ + + +#if defined (__cplusplus) +} +#endif diff --git a/deps/SZ/zstd/common/fse_decompress.c b/deps/SZ/zstd/common/fse_decompress.c new file mode 100644 index 0000000000000000000000000000000000000000..72bbead5beea3d5e73e5b4eaaa689ee8b98533d1 --- /dev/null +++ b/deps/SZ/zstd/common/fse_decompress.c @@ -0,0 +1,309 @@ +/* ****************************************************************** + FSE : Finite State Entropy decoder + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + + +/* ************************************************************** +* Includes +****************************************************************/ +#include /* malloc, free, qsort */ +#include /* memcpy, memset */ +#include "bitstream.h" +#include "compiler.h" +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" +#include "error_private.h" + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define FSE_isError ERR_isError +#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ + +/* check and forward error code */ +#define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; } + + +/* ************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ +FSE_DTable* FSE_createDTable (unsigned tableLog) +{ + if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; + return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); +} + +void FSE_freeDTable (FSE_DTable* dt) +{ + free(dt); +} + +size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ + FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr); + U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; + + U32 const maxSV1 = maxSymbolValue + 1; + U32 const tableSize = 1 << tableLog; + U32 highThreshold = tableSize-1; + + /* Sanity Checks */ + if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + + /* Init, lay down lowprob symbols */ + { FSE_DTableHeader DTableH; + DTableH.tableLog = (U16)tableLog; + DTableH.fastMode = 1; + { S16 const largeLimit= (S16)(1 << (tableLog-1)); + U32 s; + for (s=0; s= largeLimit) DTableH.fastMode=0; + symbolNext[s] = normalizedCounter[s]; + } } } + memcpy(dt, &DTableH, sizeof(DTableH)); + } + + /* Spread symbols */ + { U32 const tableMask = tableSize-1; + U32 const step = FSE_TABLESTEP(tableSize); + U32 s, position = 0; + for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } } + if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + } + + /* Build Decoding table */ + { U32 u; + for (u=0; utableLog = 0; + DTableH->fastMode = 0; + + cell->newState = 0; + cell->symbol = symbolValue; + cell->nbBits = 0; + + return 0; +} + + +size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + void* dPtr = dt + 1; + FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr; + const unsigned tableSize = 1 << nbBits; + const unsigned tableMask = tableSize - 1; + const unsigned maxSV1 = tableMask+1; + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ + + /* Build Decoding Table */ + DTableH->tableLog = (U16)nbBits; + DTableH->fastMode = 1; + for (s=0; s sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[1] = FSE_GETSYMBOL(&state2); + + if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } + + op[2] = FSE_GETSYMBOL(&state1); + + if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[3] = FSE_GETSYMBOL(&state2); + } + + /* tail */ + /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ + while (1) { + if (op>(omax-2)) return ERROR(dstSize_tooSmall); + *op++ = FSE_GETSYMBOL(&state1); + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state2); + break; + } + + if (op>(omax-2)) return ERROR(dstSize_tooSmall); + *op++ = FSE_GETSYMBOL(&state2); + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state1); + break; + } } + + return op-ostart; +} + + +size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; + const U32 fastMode = DTableH->fastMode; + + /* select fast mode (static) */ + if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); + return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); +} + + +size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog) +{ + const BYTE* const istart = (const BYTE*)cSrc; + const BYTE* ip = istart; + short counting[FSE_MAX_SYMBOL_VALUE+1]; + unsigned tableLog; + unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; + + /* normal FSE decoding mode */ + size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); + if (FSE_isError(NCountLength)) return NCountLength; + //if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */ + if (tableLog > maxLog) return ERROR(tableLog_tooLarge); + ip += NCountLength; + cSrcSize -= NCountLength; + + CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) ); + + return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace); /* always return, even if it is an error code */ +} + + +typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; + +size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize) +{ + DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ + return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG); +} + + + +#endif /* FSE_COMMONDEFS_ONLY */ diff --git a/deps/SZ/zstd/common/huf.h b/deps/SZ/zstd/common/huf.h new file mode 100644 index 0000000000000000000000000000000000000000..de94641110641e094d741517e0676b291f7194e0 --- /dev/null +++ b/deps/SZ/zstd/common/huf.h @@ -0,0 +1,334 @@ +/* ****************************************************************** + huff0 huffman codec, + part of Finite State Entropy library + Copyright (C) 2013-present, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy +****************************************************************** */ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef HUF_H_298734234 +#define HUF_H_298734234 + +/* *** Dependencies *** */ +#include /* size_t */ + + +/* *** library symbols visibility *** */ +/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual, + * HUF symbols remain "private" (internal symbols for library only). + * Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */ +#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) +# define HUF_PUBLIC_API __attribute__ ((visibility ("default"))) +#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ +# define HUF_PUBLIC_API __declspec(dllexport) +#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) +# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */ +#else +# define HUF_PUBLIC_API +#endif + + +/* ========================== */ +/* *** simple functions *** */ +/* ========================== */ + +/** HUF_compress() : + * Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. + * 'dst' buffer must be already allocated. + * Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). + * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. + * @return : size of compressed data (<= `dstCapacity`). + * Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! + * if HUF_isError(return), compression failed (more details using HUF_getErrorName()) + */ +HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +/** HUF_decompress() : + * Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', + * into already allocated buffer 'dst', of minimum size 'dstSize'. + * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data. + * Note : in contrast with FSE, HUF_decompress can regenerate + * RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, + * because it knows size to regenerate (originalSize). + * @return : size of regenerated data (== originalSize), + * or an error code, which can be tested using HUF_isError() + */ +HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize); + + +/* *** Tool functions *** */ +#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ +HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ + +/* Error Management */ +HUF_PUBLIC_API unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ +HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */ + + +/* *** Advanced function *** */ + +/** HUF_compress2() : + * Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`. + * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX . + * `tableLog` must be `<= HUF_TABLELOG_MAX` . */ +HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog); + +/** HUF_compress4X_wksp() : + * Same as HUF_compress2(), but uses externally allocated `workSpace`. + * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */ +#define HUF_WORKSPACE_SIZE (6 << 10) +#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) +HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize); + +#endif /* HUF_H_298734234 */ + +/* ****************************************************************** + * WARNING !! + * The following section contains advanced and experimental definitions + * which shall never be used in the context of a dynamic library, + * because they are not guaranteed to remain stable in the future. + * Only consider them in association with static linking. + * *****************************************************************/ +#if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY) +#define HUF_H_HUF_STATIC_LINKING_ONLY + +/* *** Dependencies *** */ +#include "mem.h" /* U32 */ + + +/* *** Constants *** */ +#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ +#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */ +#define HUF_SYMBOLVALUE_MAX 255 + +#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ +#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) +# error "HUF_TABLELOG_MAX is too large !" +#endif + + +/* **************************************** +* Static allocation +******************************************/ +/* HUF buffer bounds */ +#define HUF_CTABLEBOUND 129 +#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true when incompressible is pre-filtered with fast heuristic */ +#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* static allocation of HUF's Compression Table */ +#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */ +#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32)) +#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ + U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \ + void* name##hv = &(name##hb); \ + HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */ + +/* static allocation of HUF's DTable */ +typedef U32 HUF_DTable; +#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog))) +#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \ + HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) } +#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ + HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) } + + +/* **************************************** +* Advanced decompression functions +******************************************/ +size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ + +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */ +size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */ +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */ +size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ + + +/* **************************************** + * HUF detailed API + * ****************************************/ + +/*! HUF_compress() does the following: + * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h") + * 2. (optional) refine tableLog using HUF_optimalTableLog() + * 3. build Huffman table from count using HUF_buildCTable() + * 4. save Huffman table to memory buffer using HUF_writeCTable() + * 5. encode the data stream using HUF_compress4X_usingCTable() + * + * The following API allows targeting specific sub-functions for advanced tasks. + * For example, it's possible to compress several blocks using the same 'CTable', + * or to save and regenerate 'CTable' using external methods. + */ +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); +typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ +size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ +size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); + +typedef enum { + HUF_repeat_none, /**< Cannot use the previous table */ + HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ + HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */ + } HUF_repeat; +/** HUF_compress4X_repeat() : + * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. + * If it uses hufTable it does not modify hufTable or repeat. + * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. + * If preferRepeat then the old table will always be used if valid. */ +size_t HUF_compress4X_repeat(void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); + +/** HUF_buildCTable_wksp() : + * Same as HUF_buildCTable(), but using externally allocated scratch buffer. + * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE. + */ +#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) +#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) +size_t HUF_buildCTable_wksp (HUF_CElt* tree, + const U32* count, U32 maxSymbolValue, U32 maxNbBits, + void* workSpace, size_t wkspSize); + +/*! HUF_readStats() : + * Read compact Huffman tree, saved by HUF_writeCTable(). + * `huffWeight` is destination buffer. + * @return : size read from `src` , or an error Code . + * Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, + U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize); + +/** HUF_readCTable() : + * Loading a CTable saved with HUF_writeCTable() */ +size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); + +/** HUF_getNbBits() : + * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX + * Note 1 : is not inlined, as HUF_CElt definition is private + * Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */ +U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue); + +/* + * HUF_decompress() does the following: + * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics + * 2. build Huffman table from save, using HUF_readDTableX?() + * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable() + */ + +/** HUF_selectDecoder() : + * Tells which decoder is likely to decode faster, + * based on a set of pre-computed metrics. + * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . + * Assumption : 0 < dstSize <= 128 KB */ +U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); + +/** + * The minimum workspace size for the `workSpace` used in + * HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp(). + * + * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when + * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15. + * Buffer overflow errors may potentially occur if code modifications result in + * a required workspace size greater than that specified in the following + * macro. + */ +#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10) +#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) + +size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize); +size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); +size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); +size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); + +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); + + +/* ====================== */ +/* single stream variants */ +/* ====================== */ + +size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); +size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +/** HUF_compress1X_repeat() : + * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. + * If it uses hufTable it does not modify hufTable or repeat. + * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. + * If preferRepeat then the old table will always be used if valid. */ +size_t HUF_compress1X_repeat(void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); + +size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ + +size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); +size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); +size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ +size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ + +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ +size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); + +/* BMI2 variants. + * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. + */ +size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); +size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); +size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); + +#endif /* HUF_STATIC_LINKING_ONLY */ + +#if defined (__cplusplus) +} +#endif diff --git a/deps/SZ/zstd/common/mem.h b/deps/SZ/zstd/common/mem.h new file mode 100644 index 0000000000000000000000000000000000000000..47d2300177c0a1e618f95890b51077f2ab0a5c9c --- /dev/null +++ b/deps/SZ/zstd/common/mem.h @@ -0,0 +1,362 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef MEM_H_MODULE +#define MEM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-**************************************** +* Dependencies +******************************************/ +#include /* size_t, ptrdiff_t */ +#include /* memcpy */ + + +/*-**************************************** +* Compiler specifics +******************************************/ +#if defined(_MSC_VER) /* Visual Studio */ +# include /* _byteswap_ulong */ +# include /* _byteswap_* */ +#endif +#if defined(__GNUC__) +# define MEM_STATIC static __inline __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define MEM_STATIC static inline +#elif defined(_MSC_VER) +# define MEM_STATIC static __inline +#else +# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + +/* code only tested on 32 and 64 bits systems */ +#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; } +MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } + + +/*-************************************************************** +* Basic Types +*****************************************************************/ +#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef int16_t S16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef int64_t S64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef signed short S16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; + typedef signed long long S64; +#endif + + +/*-************************************************************** +* Memory I/O +*****************************************************************/ +/* MEM_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets depending on alignment. + * In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6) + * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define MEM_FORCE_MEMORY_ACCESS 2 +# elif defined(__INTEL_COMPILER) || defined(__GNUC__) +# define MEM_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; } +MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; } + +MEM_STATIC unsigned MEM_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + +#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) + +/* violates C standard, by lying on structure alignment. +Only use if no other choice to achieve best performance on target platform */ +MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } +MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } +MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } +MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } + +#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) + __pragma( pack(push, 1) ) + typedef struct { U16 v; } unalign16; + typedef struct { U32 v; } unalign32; + typedef struct { U64 v; } unalign64; + typedef struct { size_t v; } unalignArch; + __pragma( pack(pop) ) +#else + typedef struct { U16 v; } __attribute__((packed)) unalign16; + typedef struct { U32 v; } __attribute__((packed)) unalign32; + typedef struct { U64 v; } __attribute__((packed)) unalign64; + typedef struct { size_t v; } __attribute__((packed)) unalignArch; +#endif + +MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; } +MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; } + +#else + +/* default method, safe and standard. + can sometimes prove slower */ + +MEM_STATIC U16 MEM_read16(const void* memPtr) +{ + U16 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U32 MEM_read32(const void* memPtr) +{ + U32 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U64 MEM_read64(const void* memPtr) +{ + U64 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC size_t MEM_readST(const void* memPtr) +{ + size_t val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write32(void* memPtr, U32 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write64(void* memPtr, U64 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +#endif /* MEM_FORCE_MEMORY_ACCESS */ + +MEM_STATIC U32 MEM_swap32(U32 in) +{ +#if defined(_MSC_VER) /* Visual Studio */ + return _byteswap_ulong(in); +#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403) + return __builtin_bswap32(in); +#else + return ((in << 24) & 0xff000000 ) | + ((in << 8) & 0x00ff0000 ) | + ((in >> 8) & 0x0000ff00 ) | + ((in >> 24) & 0x000000ff ); +#endif +} + +MEM_STATIC U64 MEM_swap64(U64 in) +{ +#if defined(_MSC_VER) /* Visual Studio */ + return _byteswap_uint64(in); +#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403) + return __builtin_bswap64(in); +#else + return ((in << 56) & 0xff00000000000000ULL) | + ((in << 40) & 0x00ff000000000000ULL) | + ((in << 24) & 0x0000ff0000000000ULL) | + ((in << 8) & 0x000000ff00000000ULL) | + ((in >> 8) & 0x00000000ff000000ULL) | + ((in >> 24) & 0x0000000000ff0000ULL) | + ((in >> 40) & 0x000000000000ff00ULL) | + ((in >> 56) & 0x00000000000000ffULL); +#endif +} + +MEM_STATIC size_t MEM_swapST(size_t in) +{ + if (MEM_32bits()) + return (size_t)MEM_swap32((U32)in); + else + return (size_t)MEM_swap64((U64)in); +} + +/*=== Little endian r/w ===*/ + +MEM_STATIC U16 MEM_readLE16(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read16(memPtr); + else { + const BYTE* p = (const BYTE*)memPtr; + return (U16)(p[0] + (p[1]<<8)); + } +} + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) +{ + if (MEM_isLittleEndian()) { + MEM_write16(memPtr, val); + } else { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val; + p[1] = (BYTE)(val>>8); + } +} + +MEM_STATIC U32 MEM_readLE24(const void* memPtr) +{ + return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); +} + +MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val) +{ + MEM_writeLE16(memPtr, (U16)val); + ((BYTE*)memPtr)[2] = (BYTE)(val>>16); +} + +MEM_STATIC U32 MEM_readLE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read32(memPtr); + else + return MEM_swap32(MEM_read32(memPtr)); +} + +MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + MEM_write32(memPtr, val32); + else + MEM_write32(memPtr, MEM_swap32(val32)); +} + +MEM_STATIC U64 MEM_readLE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read64(memPtr); + else + return MEM_swap64(MEM_read64(memPtr)); +} + +MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + MEM_write64(memPtr, val64); + else + MEM_write64(memPtr, MEM_swap64(val64)); +} + +MEM_STATIC size_t MEM_readLEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readLE32(memPtr); + else + return (size_t)MEM_readLE64(memPtr); +} + +MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeLE32(memPtr, (U32)val); + else + MEM_writeLE64(memPtr, (U64)val); +} + +/*=== Big endian r/w ===*/ + +MEM_STATIC U32 MEM_readBE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_swap32(MEM_read32(memPtr)); + else + return MEM_read32(memPtr); +} + +MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + MEM_write32(memPtr, MEM_swap32(val32)); + else + MEM_write32(memPtr, val32); +} + +MEM_STATIC U64 MEM_readBE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_swap64(MEM_read64(memPtr)); + else + return MEM_read64(memPtr); +} + +MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + MEM_write64(memPtr, MEM_swap64(val64)); + else + MEM_write64(memPtr, val64); +} + +MEM_STATIC size_t MEM_readBEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readBE32(memPtr); + else + return (size_t)MEM_readBE64(memPtr); +} + +MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeBE32(memPtr, (U32)val); + else + MEM_writeBE64(memPtr, (U64)val); +} + + +#if defined (__cplusplus) +} +#endif + +#endif /* MEM_H_MODULE */ diff --git a/deps/SZ/zstd/common/pool.c b/deps/SZ/zstd/common/pool.c new file mode 100644 index 0000000000000000000000000000000000000000..281b3824ac4dae59a993be1cc673fdf838466b55 --- /dev/null +++ b/deps/SZ/zstd/common/pool.c @@ -0,0 +1,340 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* ====== Dependencies ======= */ +#include /* size_t */ +#include "debug.h" /* assert */ +#include "zstd_internal.h" /* ZSTD_malloc, ZSTD_free */ +#include "pool.h" + +/* ====== Compiler specifics ====== */ +#if defined(_MSC_VER) +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +#endif + + +#ifdef ZSTD_MULTITHREAD + +#include "threading.h" /* pthread adaptation */ + +/* A job is a function and an opaque argument */ +typedef struct POOL_job_s { + POOL_function function; + void *opaque; +} POOL_job; + +struct POOL_ctx_s { + ZSTD_customMem customMem; + /* Keep track of the threads */ + ZSTD_pthread_t* threads; + size_t threadCapacity; + size_t threadLimit; + + /* The queue is a circular buffer */ + POOL_job *queue; + size_t queueHead; + size_t queueTail; + size_t queueSize; + + /* The number of threads working on jobs */ + size_t numThreadsBusy; + /* Indicates if the queue is empty */ + int queueEmpty; + + /* The mutex protects the queue */ + ZSTD_pthread_mutex_t queueMutex; + /* Condition variable for pushers to wait on when the queue is full */ + ZSTD_pthread_cond_t queuePushCond; + /* Condition variables for poppers to wait on when the queue is empty */ + ZSTD_pthread_cond_t queuePopCond; + /* Indicates if the queue is shutting down */ + int shutdown; +}; + +/* POOL_thread() : + * Work thread for the thread pool. + * Waits for jobs and executes them. + * @returns : NULL on failure else non-null. + */ +static void* POOL_thread(void* opaque) { + POOL_ctx* const ctx = (POOL_ctx*)opaque; + if (!ctx) { return NULL; } + for (;;) { + /* Lock the mutex and wait for a non-empty queue or until shutdown */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + + while ( ctx->queueEmpty + || (ctx->numThreadsBusy >= ctx->threadLimit) ) { + if (ctx->shutdown) { + /* even if !queueEmpty, (possible if numThreadsBusy >= threadLimit), + * a few threads will be shutdown while !queueEmpty, + * but enough threads will remain active to finish the queue */ + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return opaque; + } + ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex); + } + /* Pop a job off the queue */ + { POOL_job const job = ctx->queue[ctx->queueHead]; + ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize; + ctx->numThreadsBusy++; + ctx->queueEmpty = ctx->queueHead == ctx->queueTail; + /* Unlock the mutex, signal a pusher, and run the job */ + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + ZSTD_pthread_cond_signal(&ctx->queuePushCond); + + job.function(job.opaque); + + /* If the intended queue size was 0, signal after finishing job */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + ctx->numThreadsBusy--; + if (ctx->queueSize == 1) { + ZSTD_pthread_cond_signal(&ctx->queuePushCond); + } + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + } + } /* for (;;) */ + assert(0); /* Unreachable */ +} + +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) { + return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem); +} + +POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, + ZSTD_customMem customMem) { + POOL_ctx* ctx; + /* Check parameters */ + if (!numThreads) { return NULL; } + /* Allocate the context and zero initialize */ + ctx = (POOL_ctx*)ZSTD_calloc(sizeof(POOL_ctx), customMem); + if (!ctx) { return NULL; } + /* Initialize the job queue. + * It needs one extra space since one space is wasted to differentiate + * empty and full queues. + */ + ctx->queueSize = queueSize + 1; + ctx->queue = (POOL_job*)ZSTD_malloc(ctx->queueSize * sizeof(POOL_job), customMem); + ctx->queueHead = 0; + ctx->queueTail = 0; + ctx->numThreadsBusy = 0; + ctx->queueEmpty = 1; + (void)ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL); + (void)ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL); + (void)ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL); + ctx->shutdown = 0; + /* Allocate space for the thread handles */ + ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem); + ctx->threadCapacity = 0; + ctx->customMem = customMem; + /* Check for errors */ + if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; } + /* Initialize the threads */ + { size_t i; + for (i = 0; i < numThreads; ++i) { + if (ZSTD_pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) { + ctx->threadCapacity = i; + POOL_free(ctx); + return NULL; + } } + ctx->threadCapacity = numThreads; + ctx->threadLimit = numThreads; + } + return ctx; +} + +/*! POOL_join() : + Shutdown the queue, wake any sleeping threads, and join all of the threads. +*/ +static void POOL_join(POOL_ctx* ctx) { + /* Shut down the queue */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + ctx->shutdown = 1; + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + /* Wake up sleeping threads */ + ZSTD_pthread_cond_broadcast(&ctx->queuePushCond); + ZSTD_pthread_cond_broadcast(&ctx->queuePopCond); + /* Join all of the threads */ + { size_t i; + for (i = 0; i < ctx->threadCapacity; ++i) { + ZSTD_pthread_join(ctx->threads[i], NULL); /* note : could fail */ + } } +} + +void POOL_free(POOL_ctx *ctx) { + if (!ctx) { return; } + POOL_join(ctx); + ZSTD_pthread_mutex_destroy(&ctx->queueMutex); + ZSTD_pthread_cond_destroy(&ctx->queuePushCond); + ZSTD_pthread_cond_destroy(&ctx->queuePopCond); + ZSTD_free(ctx->queue, ctx->customMem); + ZSTD_free(ctx->threads, ctx->customMem); + ZSTD_free(ctx, ctx->customMem); +} + + + +size_t POOL_sizeof(POOL_ctx *ctx) { + if (ctx==NULL) return 0; /* supports sizeof NULL */ + return sizeof(*ctx) + + ctx->queueSize * sizeof(POOL_job) + + ctx->threadCapacity * sizeof(ZSTD_pthread_t); +} + + +/* @return : 0 on success, 1 on error */ +static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads) +{ + if (numThreads <= ctx->threadCapacity) { + if (!numThreads) return 1; + ctx->threadLimit = numThreads; + return 0; + } + /* numThreads > threadCapacity */ + { ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem); + if (!threadPool) return 1; + /* replace existing thread pool */ + memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool)); + ZSTD_free(ctx->threads, ctx->customMem); + ctx->threads = threadPool; + /* Initialize additional threads */ + { size_t threadId; + for (threadId = ctx->threadCapacity; threadId < numThreads; ++threadId) { + if (ZSTD_pthread_create(&threadPool[threadId], NULL, &POOL_thread, ctx)) { + ctx->threadCapacity = threadId; + return 1; + } } + } } + /* successfully expanded */ + ctx->threadCapacity = numThreads; + ctx->threadLimit = numThreads; + return 0; +} + +/* @return : 0 on success, 1 on error */ +int POOL_resize(POOL_ctx* ctx, size_t numThreads) +{ + int result; + if (ctx==NULL) return 1; + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + result = POOL_resize_internal(ctx, numThreads); + ZSTD_pthread_cond_broadcast(&ctx->queuePopCond); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return result; +} + +/** + * Returns 1 if the queue is full and 0 otherwise. + * + * When queueSize is 1 (pool was created with an intended queueSize of 0), + * then a queue is empty if there is a thread free _and_ no job is waiting. + */ +static int isQueueFull(POOL_ctx const* ctx) { + if (ctx->queueSize > 1) { + return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize); + } else { + return (ctx->numThreadsBusy == ctx->threadLimit) || + !ctx->queueEmpty; + } +} + + +static void POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque) +{ + POOL_job const job = {function, opaque}; + assert(ctx != NULL); + if (ctx->shutdown) return; + + ctx->queueEmpty = 0; + ctx->queue[ctx->queueTail] = job; + ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize; + ZSTD_pthread_cond_signal(&ctx->queuePopCond); +} + +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) +{ + assert(ctx != NULL); + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + /* Wait until there is space in the queue for the new job */ + while (isQueueFull(ctx) && (!ctx->shutdown)) { + ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex); + } + POOL_add_internal(ctx, function, opaque); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); +} + + +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) +{ + assert(ctx != NULL); + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + if (isQueueFull(ctx)) { + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return 0; + } + POOL_add_internal(ctx, function, opaque); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return 1; +} + + +#else /* ZSTD_MULTITHREAD not defined */ + +/* ========================== */ +/* No multi-threading support */ +/* ========================== */ + + +/* We don't need any data, but if it is empty, malloc() might return NULL. */ +struct POOL_ctx_s { + int dummy; +}; +static POOL_ctx g_ctx; + +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) { + return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem); +} + +POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) { + (void)numThreads; + (void)queueSize; + (void)customMem; + return &g_ctx; +} + +void POOL_free(POOL_ctx* ctx) { + assert(!ctx || ctx == &g_ctx); + (void)ctx; +} + +int POOL_resize(POOL_ctx* ctx, size_t numThreads) { + (void)ctx; (void)numThreads; + return 0; +} + +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) { + (void)ctx; + function(opaque); +} + +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) { + (void)ctx; + function(opaque); + return 1; +} + +size_t POOL_sizeof(POOL_ctx* ctx) { + if (ctx==NULL) return 0; /* supports sizeof NULL */ + assert(ctx == &g_ctx); + return sizeof(*ctx); +} + +#endif /* ZSTD_MULTITHREAD */ diff --git a/deps/SZ/zstd/common/pool.h b/deps/SZ/zstd/common/pool.h new file mode 100644 index 0000000000000000000000000000000000000000..458d37f13c3e805d0705871d6f762c6bf4a5263d --- /dev/null +++ b/deps/SZ/zstd/common/pool.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef POOL_H +#define POOL_H + +#if defined (__cplusplus) +extern "C" { +#endif + + +#include /* size_t */ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_customMem */ +#include "zstd.h" + +typedef struct POOL_ctx_s POOL_ctx; + +/*! POOL_create() : + * Create a thread pool with at most `numThreads` threads. + * `numThreads` must be at least 1. + * The maximum number of queued jobs before blocking is `queueSize`. + * @return : POOL_ctx pointer on success, else NULL. +*/ +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize); + +POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, + ZSTD_customMem customMem); + +/*! POOL_free() : + * Free a thread pool returned by POOL_create(). + */ +void POOL_free(POOL_ctx* ctx); + +/*! POOL_resize() : + * Expands or shrinks pool's number of threads. + * This is more efficient than releasing + creating a new context, + * since it tries to preserve and re-use existing threads. + * `numThreads` must be at least 1. + * @return : 0 when resize was successful, + * !0 (typically 1) if there is an error. + * note : only numThreads can be resized, queueSize remains unchanged. + */ +int POOL_resize(POOL_ctx* ctx, size_t numThreads); + +/*! POOL_sizeof() : + * @return threadpool memory usage + * note : compatible with NULL (returns 0 in this case) + */ +size_t POOL_sizeof(POOL_ctx* ctx); + +/*! POOL_function : + * The function type that can be added to a thread pool. + */ +typedef void (*POOL_function)(void*); + +/*! POOL_add() : + * Add the job `function(opaque)` to the thread pool. `ctx` must be valid. + * Possibly blocks until there is room in the queue. + * Note : The function may be executed asynchronously, + * therefore, `opaque` must live until function has been completed. + */ +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque); + + +/*! POOL_tryAdd() : + * Add the job `function(opaque)` to thread pool _if_ a worker is available. + * Returns immediately even if not (does not block). + * @return : 1 if successful, 0 if not. + */ +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque); + + +#if defined (__cplusplus) +} +#endif + +#endif diff --git a/deps/SZ/zstd/common/threading.c b/deps/SZ/zstd/common/threading.c new file mode 100644 index 0000000000000000000000000000000000000000..8be8c8da948a86fe9e7e0af9b6110fea277c1f2a --- /dev/null +++ b/deps/SZ/zstd/common/threading.c @@ -0,0 +1,75 @@ +/** + * Copyright (c) 2016 Tino Reichardt + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + */ + +/** + * This file will hold wrapper for systems, which do not support pthreads + */ + +/* create fake symbol to avoid empty trnaslation unit warning */ +int g_ZSTD_threading_useles_symbol; + +#if defined(ZSTD_MULTITHREAD) && defined(_WIN32) + +/** + * Windows minimalist Pthread Wrapper, based on : + * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html + */ + + +/* === Dependencies === */ +#include +#include +#include "threading.h" + + +/* === Implementation === */ + +static unsigned __stdcall worker(void *arg) +{ + ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg; + thread->arg = thread->start_routine(thread->arg); + return 0; +} + +int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, + void* (*start_routine) (void*), void* arg) +{ + (void)unused; + thread->arg = arg; + thread->start_routine = start_routine; + thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL); + + if (!thread->handle) + return errno; + else + return 0; +} + +int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr) +{ + DWORD result; + + if (!thread.handle) return 0; + + result = WaitForSingleObject(thread.handle, INFINITE); + switch (result) { + case WAIT_OBJECT_0: + if (value_ptr) *value_ptr = thread.arg; + return 0; + case WAIT_ABANDONED: + return EINVAL; + default: + return GetLastError(); + } +} + +#endif /* ZSTD_MULTITHREAD */ diff --git a/deps/SZ/zstd/common/threading.h b/deps/SZ/zstd/common/threading.h new file mode 100644 index 0000000000000000000000000000000000000000..d806c89d01c9cc907f9c0aad946064c9fd82b1b4 --- /dev/null +++ b/deps/SZ/zstd/common/threading.h @@ -0,0 +1,123 @@ +/** + * Copyright (c) 2016 Tino Reichardt + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + */ + +#ifndef THREADING_H_938743 +#define THREADING_H_938743 + +#if defined (__cplusplus) +extern "C" { +#endif + +#if defined(ZSTD_MULTITHREAD) && defined(_WIN32) + +/** + * Windows minimalist Pthread Wrapper, based on : + * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html + */ +#ifdef WINVER +# undef WINVER +#endif +#define WINVER 0x0600 + +#ifdef _WIN32_WINNT +# undef _WIN32_WINNT +#endif +#define _WIN32_WINNT 0x0600 + +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif + +#undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */ +#include +#undef ERROR +#define ERROR(name) ZSTD_ERROR(name) + + +/* mutex */ +#define ZSTD_pthread_mutex_t CRITICAL_SECTION +#define ZSTD_pthread_mutex_init(a, b) ((void)(b), InitializeCriticalSection((a)), 0) +#define ZSTD_pthread_mutex_destroy(a) DeleteCriticalSection((a)) +#define ZSTD_pthread_mutex_lock(a) EnterCriticalSection((a)) +#define ZSTD_pthread_mutex_unlock(a) LeaveCriticalSection((a)) + +/* condition variable */ +#define ZSTD_pthread_cond_t CONDITION_VARIABLE +#define ZSTD_pthread_cond_init(a, b) ((void)(b), InitializeConditionVariable((a)), 0) +#define ZSTD_pthread_cond_destroy(a) ((void)(a)) +#define ZSTD_pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE) +#define ZSTD_pthread_cond_signal(a) WakeConditionVariable((a)) +#define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a)) + +/* ZSTD_pthread_create() and ZSTD_pthread_join() */ +typedef struct { + HANDLE handle; + void* (*start_routine)(void*); + void* arg; +} ZSTD_pthread_t; + +int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, + void* (*start_routine) (void*), void* arg); + +int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr); + +/** + * add here more wrappers as required + */ + + +#elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */ +/* === POSIX Systems === */ +# include + +#define ZSTD_pthread_mutex_t pthread_mutex_t +#define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b)) +#define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a)) +#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock((a)) +#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock((a)) + +#define ZSTD_pthread_cond_t pthread_cond_t +#define ZSTD_pthread_cond_init(a, b) pthread_cond_init((a), (b)) +#define ZSTD_pthread_cond_destroy(a) pthread_cond_destroy((a)) +#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait((a), (b)) +#define ZSTD_pthread_cond_signal(a) pthread_cond_signal((a)) +#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast((a)) + +#define ZSTD_pthread_t pthread_t +#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) +#define ZSTD_pthread_join(a, b) pthread_join((a),(b)) + +#else /* ZSTD_MULTITHREAD not defined */ +/* No multithreading support */ + +typedef int ZSTD_pthread_mutex_t; +#define ZSTD_pthread_mutex_init(a, b) ((void)(a), (void)(b), 0) +#define ZSTD_pthread_mutex_destroy(a) ((void)(a)) +#define ZSTD_pthread_mutex_lock(a) ((void)(a)) +#define ZSTD_pthread_mutex_unlock(a) ((void)(a)) + +typedef int ZSTD_pthread_cond_t; +#define ZSTD_pthread_cond_init(a, b) ((void)(a), (void)(b), 0) +#define ZSTD_pthread_cond_destroy(a) ((void)(a)) +#define ZSTD_pthread_cond_wait(a, b) ((void)(a), (void)(b)) +#define ZSTD_pthread_cond_signal(a) ((void)(a)) +#define ZSTD_pthread_cond_broadcast(a) ((void)(a)) + +/* do not use ZSTD_pthread_t */ + +#endif /* ZSTD_MULTITHREAD */ + +#if defined (__cplusplus) +} +#endif + +#endif /* THREADING_H_938743 */ diff --git a/deps/SZ/zstd/common/xxhash.c b/deps/SZ/zstd/common/xxhash.c new file mode 100644 index 0000000000000000000000000000000000000000..9d9c0e963cbf5f09bc9f91bdd33ae5f75287c7cc --- /dev/null +++ b/deps/SZ/zstd/common/xxhash.c @@ -0,0 +1,875 @@ +/* +* xxHash - Fast Hash algorithm +* Copyright (C) 2012-2016, Yann Collet +* +* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following disclaimer +* in the documentation and/or other materials provided with the +* distribution. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +* You can contact the author at : +* - xxHash homepage: http://www.xxhash.com +* - xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + + +/* ************************************* +* Tuning parameters +***************************************/ +/*!XXH_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. + * It can generate buggy code on targets which do not support unaligned memory accesses. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://stackoverflow.com/a/32095106/646947 for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define XXH_FORCE_MEMORY_ACCESS 2 +# elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) +# define XXH_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +/*!XXH_ACCEPT_NULL_INPUT_POINTER : + * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. + * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. + * By default, this option is disabled. To enable it, uncomment below define : + */ +/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ + +/*!XXH_FORCE_NATIVE_FORMAT : + * By default, xxHash library provides endian-independant Hash values, based on little-endian convention. + * Results are therefore identical for little-endian and big-endian CPU. + * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. + * Should endian-independance be of no importance for your application, you may set the #define below to 1, + * to improve speed for Big-endian CPU. + * This option has no impact on Little_Endian CPU. + */ +#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */ +# define XXH_FORCE_NATIVE_FORMAT 0 +#endif + +/*!XXH_FORCE_ALIGN_CHECK : + * This is a minor performance trick, only useful with lots of very small keys. + * It means : check for aligned/unaligned input. + * The check costs one initial branch per hash; set to 0 when the input data + * is guaranteed to be aligned. + */ +#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ +# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +# define XXH_FORCE_ALIGN_CHECK 0 +# else +# define XXH_FORCE_ALIGN_CHECK 1 +# endif +#endif + + +/* ************************************* +* Includes & Memory related functions +***************************************/ +/* Modify the local functions below should you wish to use some other memory routines */ +/* for malloc(), free() */ +#include +static void* XXH_malloc(size_t s) { return malloc(s); } +static void XXH_free (void* p) { free(p); } +/* for memcpy() */ +#include +static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } + +#ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY +#endif +#include "xxhash.h" + + +/* ************************************* +* Compiler Specific Options +***************************************/ +#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# define INLINE_KEYWORD inline +#else +# define INLINE_KEYWORD +#endif + +#if defined(__GNUC__) +# define FORCE_INLINE_ATTR __attribute__((always_inline)) +#elif defined(_MSC_VER) +# define FORCE_INLINE_ATTR __forceinline +#else +# define FORCE_INLINE_ATTR +#endif + +#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR + + +#ifdef _MSC_VER +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif + + +/* ************************************* +* Basic Types +***************************************/ +#ifndef MEM_MODULE +# define MEM_MODULE +# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; +# else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */ +# endif +#endif + + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } +static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign; + +static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } + +#else + +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ + +static U32 XXH_read32(const void* memPtr) +{ + U32 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +static U64 XXH_read64(const void* memPtr) +{ + U64 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + + +/* **************************************** +* Compiler-specific Functions and Macros +******************************************/ +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ +#if defined(_MSC_VER) +# define XXH_rotl32(x,r) _rotl(x,r) +# define XXH_rotl64(x,r) _rotl64(x,r) +#else +# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) +# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) +#endif + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap32 _byteswap_ulong +# define XXH_swap64 _byteswap_uint64 +#elif GCC_VERSION >= 403 +# define XXH_swap32 __builtin_bswap32 +# define XXH_swap64 __builtin_bswap64 +#else +static U32 XXH_swap32 (U32 x) +{ + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); +} +static U64 XXH_swap64 (U64 x) +{ + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); +} +#endif + + +/* ************************************* +* Architecture Macros +***************************************/ +typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; + +/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ +#ifndef XXH_CPU_LITTLE_ENDIAN + static const int g_one = 1; +# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one)) +#endif + + +/* *************************** +* Memory reads +*****************************/ +typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; + +FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); + else + return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); +} + +FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE32_align(ptr, endian, XXH_unaligned); +} + +static U32 XXH_readBE32(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); +} + +FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); + else + return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); +} + +FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE64_align(ptr, endian, XXH_unaligned); +} + +static U64 XXH_readBE64(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); +} + + +/* ************************************* +* Macros +***************************************/ +#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/* ************************************* +* Constants +***************************************/ +static const U32 PRIME32_1 = 2654435761U; +static const U32 PRIME32_2 = 2246822519U; +static const U32 PRIME32_3 = 3266489917U; +static const U32 PRIME32_4 = 668265263U; +static const U32 PRIME32_5 = 374761393U; + +static const U64 PRIME64_1 = 11400714785074694791ULL; +static const U64 PRIME64_2 = 14029467366897019727ULL; +static const U64 PRIME64_3 = 1609587929392839161ULL; +static const U64 PRIME64_4 = 9650029242287828579ULL; +static const U64 PRIME64_5 = 2870177450012600261ULL; + +XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } + + +/* ************************** +* Utils +****************************/ +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState) +{ + memcpy(dstState, srcState, sizeof(*dstState)); +} + +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState) +{ + memcpy(dstState, srcState, sizeof(*dstState)); +} + + +/* *************************** +* Simple Hash Functions +*****************************/ + +static U32 XXH32_round(U32 seed, U32 input) +{ + seed += input * PRIME32_2; + seed = XXH_rotl32(seed, 13); + seed *= PRIME32_1; + return seed; +} + +FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U32 h32; +#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { + len=0; + bEnd=p=(const BYTE*)(size_t)16; + } +#endif + + if (len>=16) { + const BYTE* const limit = bEnd - 16; + U32 v1 = seed + PRIME32_1 + PRIME32_2; + U32 v2 = seed + PRIME32_2; + U32 v3 = seed + 0; + U32 v4 = seed - PRIME32_1; + + do { + v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4; + v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4; + v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4; + v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4; + } while (p<=limit); + + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + } else { + h32 = seed + PRIME32_5; + } + + h32 += (U32) len; + + while (p+4<=bEnd) { + h32 += XXH_get32bits(p) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; + p+=4; + } + + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH32_CREATESTATE_STATIC(state); + XXH32_reset(state, seed); + XXH32_update(state, input, len); + return XXH32_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } } + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + + +static U64 XXH64_round(U64 acc, U64 input) +{ + acc += input * PRIME64_2; + acc = XXH_rotl64(acc, 31); + acc *= PRIME64_1; + return acc; +} + +static U64 XXH64_mergeRound(U64 acc, U64 val) +{ + val = XXH64_round(0, val); + acc ^= val; + acc = acc * PRIME64_1 + PRIME64_4; + return acc; +} + +FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + U64 h64; +#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { + len=0; + bEnd=p=(const BYTE*)(size_t)32; + } +#endif + + if (len>=32) { + const BYTE* const limit = bEnd - 32; + U64 v1 = seed + PRIME64_1 + PRIME64_2; + U64 v2 = seed + PRIME64_2; + U64 v3 = seed + 0; + U64 v4 = seed - PRIME64_1; + + do { + v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; + v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; + v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; + v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; + } while (p<=limit); + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + + } else { + h64 = seed + PRIME64_5; + } + + h64 += (U64) len; + + while (p+8<=bEnd) { + U64 const k1 = XXH64_round(0, XXH_get64bits(p)); + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p+4<=bEnd) { + h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + + +XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH64_CREATESTATE_STATIC(state); + XXH64_reset(state, seed); + XXH64_update(state, input, len); + return XXH64_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } } + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + + +/* ************************************************** +* Advanced Hash Functions +****************************************************/ + +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) +{ + return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) +{ + return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + + +/*** Hash feed ***/ + +XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) +{ + XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */ + state.v1 = seed + PRIME32_1 + PRIME32_2; + state.v2 = seed + PRIME32_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME32_1; + memcpy(statePtr, &state, sizeof(state)); + return XXH_OK; +} + + +XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) +{ + XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */ + state.v1 = seed + PRIME64_1 + PRIME64_2; + state.v2 = seed + PRIME64_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME64_1; + memcpy(statePtr, &state, sizeof(state)); + return XXH_OK; +} + + +FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len_32 += (unsigned)len; + state->large_len |= (len>=16) | (state->total_len_32>=16); + + if (state->memsize + len < 16) { /* fill in tmp buffer */ + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); + state->memsize += (unsigned)len; + return XXH_OK; + } + + if (state->memsize) { /* some data left from previous update */ + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); + { const U32* p32 = state->mem32; + state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++; + state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++; + state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++; + state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++; + } + p += 16-state->memsize; + state->memsize = 0; + } + + if (p <= bEnd-16) { + const BYTE* const limit = bEnd - 16; + U32 v1 = state->v1; + U32 v2 = state->v2; + U32 v3 = state->v3; + U32 v4 = state->v4; + + do { + v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4; + v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4; + v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4; + v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH32_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) +{ + const BYTE * p = (const BYTE*)state->mem32; + const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize; + U32 h32; + + if (state->large_len) { + h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); + } else { + h32 = state->v3 /* == seed */ + PRIME32_5; + } + + h32 += state->total_len_32; + + while (p+4<=bEnd) { + h32 += XXH_readLE32(p, endian) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4; + p+=4; + } + + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_digest_endian(state_in, XXH_littleEndian); + else + return XXH32_digest_endian(state_in, XXH_bigEndian); +} + + + +/* **** XXH64 **** */ + +FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len += len; + + if (state->memsize + len < 32) { /* fill in tmp buffer */ + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); + state->memsize += (U32)len; + return XXH_OK; + } + + if (state->memsize) { /* tmp buffer is full */ + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); + state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian)); + state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian)); + state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian)); + state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian)); + p += 32-state->memsize; + state->memsize = 0; + } + + if (p+32 <= bEnd) { + const BYTE* const limit = bEnd - 32; + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + do { + v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8; + v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8; + v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8; + v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH64_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) +{ + const BYTE * p = (const BYTE*)state->mem64; + const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize; + U64 h64; + + if (state->total_len >= 32) { + U64 const v1 = state->v1; + U64 const v2 = state->v2; + U64 const v3 = state->v3; + U64 const v4 = state->v4; + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + } else { + h64 = state->v3 + PRIME64_5; + } + + h64 += (U64) state->total_len; + + while (p+8<=bEnd) { + U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian)); + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p+4<=bEnd) { + h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + + +XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_digest_endian(state_in, XXH_littleEndian); + else + return XXH64_digest_endian(state_in, XXH_bigEndian); +} + + +/* ************************** +* Canonical representation +****************************/ + +/*! Default XXH result types are basic unsigned 32 and 64 bits. +* The canonical representation follows human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs. +*/ + +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); + memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); + memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) +{ + return XXH_readBE32(src); +} + +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) +{ + return XXH_readBE64(src); +} diff --git a/deps/SZ/zstd/common/xxhash.h b/deps/SZ/zstd/common/xxhash.h new file mode 100644 index 0000000000000000000000000000000000000000..9bad1f59f63a93308341a1348efa9fbcaddbaa7c --- /dev/null +++ b/deps/SZ/zstd/common/xxhash.h @@ -0,0 +1,305 @@ +/* + xxHash - Extremely Fast Hash algorithm + Header File + Copyright (C) 2012-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + +/* Notice extracted from xxHash homepage : + +xxHash is an extremely fast Hash algorithm, running at RAM speed limits. +It also successfully passes all tests from the SMHasher suite. + +Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) + +Name Speed Q.Score Author +xxHash 5.4 GB/s 10 +CrapWow 3.2 GB/s 2 Andrew +MumurHash 3a 2.7 GB/s 10 Austin Appleby +SpookyHash 2.0 GB/s 10 Bob Jenkins +SBox 1.4 GB/s 9 Bret Mulvey +Lookup3 1.2 GB/s 9 Bob Jenkins +SuperFastHash 1.2 GB/s 1 Paul Hsieh +CityHash64 1.05 GB/s 10 Pike & Alakuijala +FNV 0.55 GB/s 5 Fowler, Noll, Vo +CRC32 0.43 GB/s 9 +MD5-32 0.33 GB/s 10 Ronald L. Rivest +SHA1-32 0.28 GB/s 10 + +Q.Score is a measure of quality of the hash function. +It depends on successfully passing SMHasher test set. +10 is a perfect score. + +A 64-bits version, named XXH64, is available since r35. +It offers much better speed, but for 64-bits applications only. +Name Speed on 64 bits Speed on 32 bits +XXH64 13.8 GB/s 1.9 GB/s +XXH32 6.8 GB/s 6.0 GB/s +*/ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef XXHASH_H_5627135585666179 +#define XXHASH_H_5627135585666179 1 + + +/* **************************** +* Definitions +******************************/ +#include /* size_t */ +typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; + + +/* **************************** +* API modifier +******************************/ +/** XXH_PRIVATE_API +* This is useful if you want to include xxhash functions in `static` mode +* in order to inline them, and remove their symbol from the public list. +* Methodology : +* #define XXH_PRIVATE_API +* #include "xxhash.h" +* `xxhash.c` is automatically included. +* It's not useful to compile and link it as a separate module anymore. +*/ +#ifdef XXH_PRIVATE_API +# ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY +# endif +# if defined(__GNUC__) +# define XXH_PUBLIC_API static __inline __attribute__((unused)) +# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define XXH_PUBLIC_API static inline +# elif defined(_MSC_VER) +# define XXH_PUBLIC_API static __inline +# else +# define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */ +# endif +#else +# define XXH_PUBLIC_API /* do nothing */ +#endif /* XXH_PRIVATE_API */ + +/*!XXH_NAMESPACE, aka Namespace Emulation : + +If you want to include _and expose_ xxHash functions from within your own library, +but also want to avoid symbol collisions with another library which also includes xxHash, + +you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library +with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values). + +Note that no change is required within the calling program as long as it includes `xxhash.h` : +regular symbol name will be automatically translated by this header. +*/ +#ifdef XXH_NAMESPACE +# define XXH_CAT(A,B) A##B +# define XXH_NAME2(A,B) XXH_CAT(A,B) +# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) +# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) +# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) +# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) +# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) +# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) +# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) +# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) +# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) +# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) +# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) +# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) +# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) +# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) +# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) +# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) +# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) +# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) +# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) +#endif + + +/* ************************************* +* Version +***************************************/ +#define XXH_VERSION_MAJOR 0 +#define XXH_VERSION_MINOR 6 +#define XXH_VERSION_RELEASE 2 +#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) +XXH_PUBLIC_API unsigned XXH_versionNumber (void); + + +/* **************************** +* Simple Hash Functions +******************************/ +typedef unsigned int XXH32_hash_t; +typedef unsigned long long XXH64_hash_t; + +XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); +XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); + +/*! +XXH32() : + Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". + The memory between input & input+length must be valid (allocated and read-accessible). + "seed" can be used to alter the result predictably. + Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s +XXH64() : + Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". + "seed" can be used to alter the result predictably. + This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark). +*/ + + +/* **************************** +* Streaming Hash Functions +******************************/ +typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ +typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ + +/*! State allocation, compatible with dynamic libraries */ + +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); + + +/* hash streaming */ + +XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); + +XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); + +/* +These functions generate the xxHash of an input provided in multiple segments. +Note that, for small input, they are slower than single-call functions, due to state management. +For small input, prefer `XXH32()` and `XXH64()` . + +XXH state must first be allocated, using XXH*_createState() . + +Start a new hash by initializing state with a seed, using XXH*_reset(). + +Then, feed the hash state by calling XXH*_update() as many times as necessary. +Obviously, input must be allocated and read accessible. +The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. + +Finally, a hash value can be produced anytime, by using XXH*_digest(). +This function returns the nn-bits hash as an int or long long. + +It's still possible to continue inserting input into the hash state after a digest, +and generate some new hashes later on, by calling again XXH*_digest(). + +When done, free XXH state space if it was allocated dynamically. +*/ + + +/* ************************** +* Utils +****************************/ +#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* ! C99 */ +# define restrict /* disable restrict */ +#endif + +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state); +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state); + + +/* ************************** +* Canonical representation +****************************/ +/* Default result type for XXH functions are primitive unsigned 32 and 64 bits. +* The canonical representation uses human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. +*/ +typedef struct { unsigned char digest[4]; } XXH32_canonical_t; +typedef struct { unsigned char digest[8]; } XXH64_canonical_t; + +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); + +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); + +#endif /* XXHASH_H_5627135585666179 */ + + + +/* ================================================================================================ + This section contains definitions which are not guaranteed to remain stable. + They may change in future versions, becoming incompatible with a different version of the library. + They shall only be used with static linking. + Never use these definitions in association with dynamic linking ! +=================================================================================================== */ +#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345) +#define XXH_STATIC_H_3543687687345 + +/* These definitions are only meant to allow allocation of XXH state + statically, on stack, or in a struct for example. + Do not use members directly. */ + + struct XXH32_state_s { + unsigned total_len_32; + unsigned large_len; + unsigned v1; + unsigned v2; + unsigned v3; + unsigned v4; + unsigned mem32[4]; /* buffer defined as U32 for alignment */ + unsigned memsize; + unsigned reserved; /* never read nor write, will be removed in a future version */ + }; /* typedef'd to XXH32_state_t */ + + struct XXH64_state_s { + unsigned long long total_len; + unsigned long long v1; + unsigned long long v2; + unsigned long long v3; + unsigned long long v4; + unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ + unsigned memsize; + unsigned reserved[2]; /* never read nor write, will be removed in a future version */ + }; /* typedef'd to XXH64_state_t */ + + +# ifdef XXH_PRIVATE_API +# include "xxhash.c" /* include xxhash functions as `static`, for inlining */ +# endif + +#endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */ + + +#if defined (__cplusplus) +} +#endif diff --git a/deps/SZ/zstd/common/zstd_common.c b/deps/SZ/zstd/common/zstd_common.c new file mode 100644 index 0000000000000000000000000000000000000000..6f05d240e43cfdbec4009a916a7c6710c2418b30 --- /dev/null +++ b/deps/SZ/zstd/common/zstd_common.c @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +/*-************************************* +* Dependencies +***************************************/ +#include /* malloc, calloc, free */ +#include /* memset */ +#include "error_private.h" +#include "zstd_internal.h" + + +/*-**************************************** +* Version +******************************************/ +unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; } + +const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } + + +/*-**************************************** +* ZSTD Error Management +******************************************/ +/*! ZSTD_isError() : + * tells if a return value is an error code */ +unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } + +/*! ZSTD_getErrorName() : + * provides error code string from function result (useful for debugging) */ +const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } + +/*! ZSTD_getError() : + * convert a `size_t` function result into a proper ZSTD_errorCode enum */ +ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } + +/*! ZSTD_getErrorString() : + * provides error code string from enum */ +const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } + + + +/*=************************************************************** +* Custom allocator +****************************************************************/ +void* ZSTD_malloc(size_t size, ZSTD_customMem customMem) +{ + if (customMem.customAlloc) + return customMem.customAlloc(customMem.opaque, size); + return malloc(size); +} + +void* ZSTD_calloc(size_t size, ZSTD_customMem customMem) +{ + if (customMem.customAlloc) { + /* calloc implemented as malloc+memset; + * not as efficient as calloc, but next best guess for custom malloc */ + void* const ptr = customMem.customAlloc(customMem.opaque, size); + memset(ptr, 0, size); + return ptr; + } + return calloc(1, size); +} + +void ZSTD_free(void* ptr, ZSTD_customMem customMem) +{ + if (ptr!=NULL) { + if (customMem.customFree) + customMem.customFree(customMem.opaque, ptr); + else + free(ptr); + } +} diff --git a/deps/SZ/zstd/common/zstd_errors.h b/deps/SZ/zstd/common/zstd_errors.h new file mode 100644 index 0000000000000000000000000000000000000000..57533f28696b38626162f8a22d3e609485eda8ba --- /dev/null +++ b/deps/SZ/zstd/common/zstd_errors.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_ERRORS_H_398273423 +#define ZSTD_ERRORS_H_398273423 + +#if defined (__cplusplus) +extern "C" { +#endif + +/*===== dependency =====*/ +#include /* size_t */ + + +/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ +#ifndef ZSTDERRORLIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define ZSTDERRORLIB_VISIBILITY +# endif +#endif +#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY +#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) +# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY +#endif + +/*-********************************************* + * Error codes list + *-********************************************* + * Error codes _values_ are pinned down since v1.3.1 only. + * Therefore, don't rely on values if you may link to any version < v1.3.1. + * + * Only values < 100 are considered stable. + * + * note 1 : this API shall be used with static linking only. + * dynamic linking is not yet officially supported. + * note 2 : Prefer relying on the enum than on its value whenever possible + * This is the only supported way to use the error list < v1.3.1 + * note 3 : ZSTD_isError() is always correct, whatever the library version. + **********************************************/ +typedef enum { + ZSTD_error_no_error = 0, + ZSTD_error_GENERIC = 1, + ZSTD_error_prefix_unknown = 10, + ZSTD_error_version_unsupported = 12, + ZSTD_error_frameParameter_unsupported = 14, + ZSTD_error_frameParameter_windowTooLarge = 16, + ZSTD_error_corruption_detected = 20, + ZSTD_error_checksum_wrong = 22, + ZSTD_error_dictionary_corrupted = 30, + ZSTD_error_dictionary_wrong = 32, + ZSTD_error_dictionaryCreation_failed = 34, + ZSTD_error_parameter_unsupported = 40, + ZSTD_error_parameter_outOfBound = 42, + ZSTD_error_tableLog_tooLarge = 44, + ZSTD_error_maxSymbolValue_tooLarge = 46, + ZSTD_error_maxSymbolValue_tooSmall = 48, + ZSTD_error_stage_wrong = 60, + ZSTD_error_init_missing = 62, + ZSTD_error_memory_allocation = 64, + ZSTD_error_workSpace_tooSmall= 66, + ZSTD_error_dstSize_tooSmall = 70, + ZSTD_error_srcSize_wrong = 72, + /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ + ZSTD_error_frameIndex_tooLarge = 100, + ZSTD_error_seekableIO = 102, + ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ +} ZSTD_ErrorCode; + +/*! ZSTD_getErrorCode() : + convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, + which can be used to compare with enum list published above */ +ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); +ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_ERRORS_H_398273423 */ diff --git a/deps/SZ/zstd/common/zstd_internal.h b/deps/SZ/zstd/common/zstd_internal.h new file mode 100644 index 0000000000000000000000000000000000000000..b4c1af53f9b1fbc4a487574a6f10f3f47a2e298e --- /dev/null +++ b/deps/SZ/zstd/common/zstd_internal.h @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_CCOMMON_H_MODULE +#define ZSTD_CCOMMON_H_MODULE + +/* this module contains definitions which must be identical + * across compression, decompression and dictBuilder. + * It also contains a few functions useful to at least 2 of them + * and which benefit from being inlined */ + +/*-************************************* +* Dependencies +***************************************/ +#include "compiler.h" +#include "mem.h" +#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */ +#include "error_private.h" +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" +#ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ +#endif +#include "xxhash.h" /* XXH_reset, update, digest */ + + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ---- static assert (debug) --- */ +#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) + + +/*-************************************* +* shared macros +***************************************/ +#undef MIN +#undef MAX +#define MIN(a,b) ((a)<(b) ? (a) : (b)) +#define MAX(a,b) ((a)>(b) ? (a) : (b)) +#define CHECK_F(f) { size_t const errcod = f; if (ERR_isError(errcod)) return errcod; } /* check and Forward error code */ +#define CHECK_E(f, e) { size_t const errcod = f; if (ERR_isError(errcod)) return ERROR(e); } /* check and send Error code */ + + +/*-************************************* +* Common constants +***************************************/ +#define ZSTD_OPT_NUM (1<<12) + +#define ZSTD_REP_NUM 3 /* number of repcodes */ +#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) +static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define BIT7 128 +#define BIT6 64 +#define BIT5 32 +#define BIT4 16 +#define BIT1 2 +#define BIT0 1 + +#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 +#define ZSTD_WINDOWLOG_DEFAULTMAX 27 /* Default maximum allowed window log */ +static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; +static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; + +#define ZSTD_FRAMEIDSIZE 4 +static const size_t ZSTD_frameIdSize = ZSTD_FRAMEIDSIZE; /* magic number size */ + +#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ +static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; +typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; + +#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ +#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ + +#define HufLog 12 +typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; + +#define LONGNBSEQ 0x7F00 + +#define MINMATCH 3 + +#define Litbits 8 +#define MaxLit ((1<= 3) /* GCC Intrinsic */ + return 31 - __builtin_clz(val); +# else /* Software version */ + static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; +# endif + } +} + + +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */ + + +typedef struct { + blockType_e blockType; + U32 lastBlock; + U32 origSize; +} blockProperties_t; + +/*! ZSTD_getcBlockSize() : + * Provides the size of compressed block from block header `src` */ +/* Used by: decompress, fullbench (does not get its definition from here) */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, + blockProperties_t* bpPtr); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_CCOMMON_H_MODULE */ diff --git a/deps/SZ/zstd/compress/fse_compress.c b/deps/SZ/zstd/compress/fse_compress.c new file mode 100644 index 0000000000000000000000000000000000000000..07b3ab89bd7c02e573377d2952cd8fbd9ed79227 --- /dev/null +++ b/deps/SZ/zstd/compress/fse_compress.c @@ -0,0 +1,714 @@ +/* ****************************************************************** + FSE : Finite State Entropy encoder + Copyright (C) 2013-present, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +/* ************************************************************** +* Includes +****************************************************************/ +#include /* malloc, free, qsort */ +#include /* memcpy, memset */ +#include "compiler.h" +#include "mem.h" /* U32, U16, etc. */ +#include "debug.h" /* assert, DEBUGLOG */ +#include "hist.h" /* HIST_count_wksp */ +#include "bitstream.h" +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" +#include "error_private.h" + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define FSE_isError ERR_isError + + +/* ************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ + +/* FSE_buildCTable_wksp() : + * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). + * wkspSize should be sized to handle worst case situation, which is `1<>1 : 1) ; + FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); + U32 const step = FSE_TABLESTEP(tableSize); + U32 cumul[FSE_MAX_SYMBOL_VALUE+2]; + + FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace; + U32 highThreshold = tableSize-1; + + /* CTable header */ + if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge); + tableU16[-2] = (U16) tableLog; + tableU16[-1] = (U16) maxSymbolValue; + assert(tableLog < 16); /* required for the threshold strategy to work */ + + /* For explanations on how to distribute symbol values over the table : + * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ + + /* symbol start positions */ + { U32 u; + cumul[0] = 0; + for (u=1; u<=maxSymbolValue+1; u++) { + if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ + cumul[u] = cumul[u-1] + 1; + tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); + } else { + cumul[u] = cumul[u-1] + normalizedCounter[u-1]; + } } + cumul[maxSymbolValue+1] = tableSize+1; + } + + /* Spread symbols */ + { U32 position = 0; + U32 symbol; + for (symbol=0; symbol<=maxSymbolValue; symbol++) { + int nbOccurences; + for (nbOccurences=0; nbOccurences highThreshold) position = (position + step) & tableMask; /* Low proba area */ + } } + + if (position!=0) return ERROR(GENERIC); /* Must have gone through all positions */ + } + + /* Build table */ + { U32 u; for (u=0; u> 3) + 3; + return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ +} + +static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, + const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, + unsigned writeIsSafe) +{ + BYTE* const ostart = (BYTE*) header; + BYTE* out = ostart; + BYTE* const oend = ostart + headerBufferSize; + int nbBits; + const int tableSize = 1 << tableLog; + int remaining; + int threshold; + U32 bitStream; + int bitCount; + unsigned charnum = 0; + int previous0 = 0; + + bitStream = 0; + bitCount = 0; + /* Table Size */ + bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount; + bitCount += 4; + + /* Init */ + remaining = tableSize+1; /* +1 for extra accuracy */ + threshold = tableSize; + nbBits = tableLog+1; + + while (remaining>1) { /* stops at 1 */ + if (previous0) { + unsigned start = charnum; + while (!normalizedCounter[charnum]) charnum++; + while (charnum >= start+24) { + start+=24; + bitStream += 0xFFFFU << bitCount; + if ((!writeIsSafe) && (out > oend-2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE) bitStream; + out[1] = (BYTE)(bitStream>>8); + out+=2; + bitStream>>=16; + } + while (charnum >= start+3) { + start+=3; + bitStream += 3 << bitCount; + bitCount += 2; + } + bitStream += (charnum-start) << bitCount; + bitCount += 2; + if (bitCount>16) { + if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } } + { int count = normalizedCounter[charnum++]; + int const max = (2*threshold-1)-remaining; + remaining -= count < 0 ? -count : count; + count++; /* +1 for extra accuracy */ + if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ + bitStream += count << bitCount; + bitCount += nbBits; + bitCount -= (count>=1; } + } + if (bitCount>16) { + if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } } + + /* flush remaining bitStream */ + if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out+= (bitCount+7) /8; + + if (charnum > maxSymbolValue + 1) return ERROR(GENERIC); + + return (out-ostart); +} + + +size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */ + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */ + + if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); + + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1); +} + + +/*-************************************************************** +* FSE Compression Code +****************************************************************/ +/*! FSE_sizeof_CTable() : + FSE_CTable is a variable size structure which contains : + `U16 tableLog;` + `U16 maxSymbolValue;` + `U16 nextStateNumber[1 << tableLog];` // This size is variable + `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable +Allocation is manual (C standard does not support variable-size structures). +*/ +size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog) +{ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + return FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); +} + +FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) +{ + size_t size; + if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; + size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); + return (FSE_CTable*)malloc(size); +} + +void FSE_freeCTable (FSE_CTable* ct) { free(ct); } + +/* provides the minimum logSize to safely represent a distribution */ +static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) +{ + U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1; + U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; + U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; + assert(srcSize > 1); /* Not supported, RLE should be used instead */ + return minBits; +} + +unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) +{ + U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; + U32 tableLog = maxTableLog; + U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); + assert(srcSize > 1); /* Not supported, RLE should be used instead */ + if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; + if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */ + if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */ + if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG; + if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG; + return tableLog; +} + +unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); +} + + +/* Secondary normalization method. + To be used when primary method fails. */ + +static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue) +{ + short const NOT_YET_ASSIGNED = -2; + U32 s; + U32 distributed = 0; + U32 ToDistribute; + + /* Init */ + U32 const lowThreshold = (U32)(total >> tableLog); + U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); + + for (s=0; s<=maxSymbolValue; s++) { + if (count[s] == 0) { + norm[s]=0; + continue; + } + if (count[s] <= lowThreshold) { + norm[s] = -1; + distributed++; + total -= count[s]; + continue; + } + if (count[s] <= lowOne) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } + + norm[s]=NOT_YET_ASSIGNED; + } + ToDistribute = (1 << tableLog) - distributed; + + if ((total / ToDistribute) > lowOne) { + /* risk of rounding to zero */ + lowOne = (U32)((total * 3) / (ToDistribute * 2)); + for (s=0; s<=maxSymbolValue; s++) { + if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } } + ToDistribute = (1 << tableLog) - distributed; + } + + if (distributed == maxSymbolValue+1) { + /* all values are pretty poor; + probably incompressible data (should have already been detected); + find max, then give all remaining points to max */ + U32 maxV = 0, maxC = 0; + for (s=0; s<=maxSymbolValue; s++) + if (count[s] > maxC) { maxV=s; maxC=count[s]; } + norm[maxV] += (short)ToDistribute; + return 0; + } + + if (total == 0) { + /* all of the symbols were low enough for the lowOne or lowThreshold */ + for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1)) + if (norm[s] > 0) { ToDistribute--; norm[s]++; } + return 0; + } + + { U64 const vStepLog = 62 - tableLog; + U64 const mid = (1ULL << (vStepLog-1)) - 1; + U64 const rStep = ((((U64)1<> vStepLog); + U32 const sEnd = (U32)(end >> vStepLog); + U32 const weight = sEnd - sStart; + if (weight < 1) + return ERROR(GENERIC); + norm[s] = (short)weight; + tmpTotal = end; + } } } + + return 0; +} + + +size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, + const unsigned* count, size_t total, + unsigned maxSymbolValue) +{ + /* Sanity checks */ + if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */ + if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ + + { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; + U64 const scale = 62 - tableLog; + U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */ + U64 const vStep = 1ULL<<(scale-20); + int stillToDistribute = 1<> tableLog); + + for (s=0; s<=maxSymbolValue; s++) { + if (count[s] == total) return 0; /* rle special case */ + if (count[s] == 0) { normalizedCounter[s]=0; continue; } + if (count[s] <= lowThreshold) { + normalizedCounter[s] = -1; + stillToDistribute--; + } else { + short proba = (short)((count[s]*step) >> scale); + if (proba<8) { + U64 restToBeat = vStep * rtbTable[proba]; + proba += (count[s]*step) - ((U64)proba< restToBeat; + } + if (proba > largestP) { largestP=proba; largest=s; } + normalizedCounter[s] = proba; + stillToDistribute -= proba; + } } + if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { + /* corner case, need another normalization method */ + size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue); + if (FSE_isError(errorCode)) return errorCode; + } + else normalizedCounter[largest] += (short)stillToDistribute; + } + +#if 0 + { /* Print Table (debug) */ + U32 s; + U32 nTotal = 0; + for (s=0; s<=maxSymbolValue; s++) + RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]); + for (s=0; s<=maxSymbolValue; s++) + nTotal += abs(normalizedCounter[s]); + if (nTotal != (1U<>1); /* assumption : tableLog >= 1 */ + FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ + + /* header */ + tableU16[-2] = (U16) nbBits; + tableU16[-1] = (U16) maxSymbolValue; + + /* Build table */ + for (s=0; s FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + FSE_FLUSHBITS(&bitC); + } + + /* 2 or 4 encoding per loop */ + while ( ip>istart ) { + + FSE_encodeSymbol(&bitC, &CState2, *--ip); + + if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */ + FSE_FLUSHBITS(&bitC); + + FSE_encodeSymbol(&bitC, &CState1, *--ip); + + if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + } + + FSE_FLUSHBITS(&bitC); + } + + FSE_flushCState(&bitC, &CState2); + FSE_flushCState(&bitC, &CState1); + return BIT_closeCStream(&bitC); +} + +size_t FSE_compress_usingCTable (void* dst, size_t dstSize, + const void* src, size_t srcSize, + const FSE_CTable* ct) +{ + unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); + + if (fast) + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); + else + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); +} + + +size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } + +#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e +#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } + +/* FSE_compress_wksp() : + * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). + * `wkspSize` size must be `(1< not compressible */ + if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ + } + + tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue); + CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) ); + + /* Write table description header */ + { CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); + op += nc_err; + } + + /* Compress */ + CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) ); + { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) ); + if (cSize == 0) return 0; /* not enough space for compressed data */ + op += cSize; + } + + /* check compressibility */ + if ( (size_t)(op-ostart) >= srcSize-1 ) return 0; + + return op-ostart; +} + +typedef struct { + FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; + BYTE scratchBuffer[1 << FSE_MAX_TABLELOG]; +} fseWkspMax_t; + +size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) +{ + fseWkspMax_t scratchBuffer; + DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer)); +} + +size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG); +} + + +#endif /* FSE_COMMONDEFS_ONLY */ diff --git a/deps/SZ/zstd/compress/hist.c b/deps/SZ/zstd/compress/hist.c new file mode 100644 index 0000000000000000000000000000000000000000..16524756b8dc987f92883c1c1c560efa513e41f6 --- /dev/null +++ b/deps/SZ/zstd/compress/hist.c @@ -0,0 +1,195 @@ +/* ****************************************************************** + hist : Histogram functions + part of Finite State Entropy project + Copyright (C) 2013-present, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +/* --- dependencies --- */ +#include "mem.h" /* U32, BYTE, etc. */ +#include "debug.h" /* assert, DEBUGLOG */ +#include "error_private.h" /* ERROR */ +#include "hist.h" + + +/* --- Error management --- */ +unsigned HIST_isError(size_t code) { return ERR_isError(code); } + +/*-************************************************************** + * Histogram functions + ****************************************************************/ +unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + const BYTE* const end = ip + srcSize; + unsigned maxSymbolValue = *maxSymbolValuePtr; + unsigned largestCount=0; + + memset(count, 0, (maxSymbolValue+1) * sizeof(*count)); + if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } + + while (ip largestCount) largestCount = count[s]; + } + + return largestCount; +} + + +/* HIST_count_parallel_wksp() : + * store histogram into 4 intermediate tables, recombined at the end. + * this design makes better use of OoO cpus, + * and is noticeably faster when some values are heavily repeated. + * But it needs some additional workspace for intermediate tables. + * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32. + * @return : largest histogram frequency, + * or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */ +static size_t HIST_count_parallel_wksp( + unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + unsigned checkMax, + unsigned* const workSpace) +{ + const BYTE* ip = (const BYTE*)source; + const BYTE* const iend = ip+sourceSize; + unsigned maxSymbolValue = *maxSymbolValuePtr; + unsigned max=0; + U32* const Counting1 = workSpace; + U32* const Counting2 = Counting1 + 256; + U32* const Counting3 = Counting2 + 256; + U32* const Counting4 = Counting3 + 256; + + memset(workSpace, 0, 4*256*sizeof(unsigned)); + + /* safety checks */ + if (!sourceSize) { + memset(count, 0, maxSymbolValue + 1); + *maxSymbolValuePtr = 0; + return 0; + } + if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ + + /* by stripes of 16 bytes */ + { U32 cached = MEM_read32(ip); ip += 4; + while (ip < iend-15) { + U32 c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + } + ip-=4; + } + + /* finish last symbols */ + while (ipmaxSymbolValue; s--) { + Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; + if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); + } } + + { U32 s; + if (maxSymbolValue > 255) maxSymbolValue = 255; + for (s=0; s<=maxSymbolValue; s++) { + count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; + if (count[s] > max) max = count[s]; + } } + + while (!count[maxSymbolValue]) maxSymbolValue--; + *maxSymbolValuePtr = maxSymbolValue; + return (size_t)max; +} + +/* HIST_countFast_wksp() : + * Same as HIST_countFast(), but using an externally provided scratch buffer. + * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ +size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + unsigned* workSpace) +{ + if (sourceSize < 1500) /* heuristic threshold */ + return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize); + return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace); +} + +/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ +size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize) +{ + unsigned tmpCounters[HIST_WKSP_SIZE_U32]; + return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters); +} + +/* HIST_count_wksp() : + * Same as HIST_count(), but using an externally provided scratch buffer. + * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ +size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, unsigned* workSpace) +{ + if (*maxSymbolValuePtr < 255) + return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace); + *maxSymbolValuePtr = 255; + return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace); +} + +size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) +{ + unsigned tmpCounters[HIST_WKSP_SIZE_U32]; + return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters); +} diff --git a/deps/SZ/zstd/compress/hist.h b/deps/SZ/zstd/compress/hist.h new file mode 100644 index 0000000000000000000000000000000000000000..788470da7f732c53f6bfe1a361b4224d73a780fa --- /dev/null +++ b/deps/SZ/zstd/compress/hist.h @@ -0,0 +1,92 @@ +/* ****************************************************************** + hist : Histogram functions + part of Finite State Entropy project + Copyright (C) 2013-present, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +/* --- dependencies --- */ +#include /* size_t */ + + +/* --- simple histogram functions --- */ + +/*! HIST_count(): + * Provides the precise count of each byte within a table 'count'. + * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). + * Updates *maxSymbolValuePtr with actual largest symbol value detected. + * @return : count of the most frequent symbol (which isn't identified). + * or an error code, which can be tested using HIST_isError(). + * note : if return == srcSize, there is only one symbol. + */ +size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); + +unsigned HIST_isError(size_t code); /*< tells if a return value is an error code */ + + +/* --- advanced histogram functions --- */ + +#define HIST_WKSP_SIZE_U32 1024 +/** HIST_count_wksp() : + * Same as HIST_count(), but using an externally provided scratch buffer. + * Benefit is this function will use very little stack space. + * `workSpace` must be a table of unsigned of size >= HIST_WKSP_SIZE_U32 + */ +size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize, + unsigned* workSpace); + +/** HIST_countFast() : + * same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr. + * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` + */ +size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); + +/** HIST_countFast_wksp() : + * Same as HIST_countFast(), but using an externally provided scratch buffer. + * `workSpace` must be a table of unsigned of size >= HIST_WKSP_SIZE_U32 + */ +size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize, + unsigned* workSpace); + +/*! HIST_count_simple() : + * Same as HIST_countFast(), this function is unsafe, + * and will segfault if any value within `src` is `> *maxSymbolValuePtr`. + * It is also a bit slower for large inputs. + * However, it does not need any additional memory (not even on stack). + * @return : count of the most frequent symbol. + * Note this function doesn't produce any error (i.e. it must succeed). + */ +unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); diff --git a/deps/SZ/zstd/compress/huf_compress.c b/deps/SZ/zstd/compress/huf_compress.c new file mode 100644 index 0000000000000000000000000000000000000000..9cdaa5d796f2e55e9cc8ed6d0556d0c8cab9adfd --- /dev/null +++ b/deps/SZ/zstd/compress/huf_compress.c @@ -0,0 +1,796 @@ +/* ****************************************************************** + Huffman encoder, part of New Generation Entropy library + Copyright (C) 2013-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +/* ************************************************************** +* Compiler specifics +****************************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif + + +/* ************************************************************** +* Includes +****************************************************************/ +#include /* memcpy, memset */ +#include /* printf (debug) */ +#include "compiler.h" +#include "bitstream.h" +#include "hist.h" +#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ +#include "fse.h" /* header compression */ +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" +#include "error_private.h" + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define HUF_isError ERR_isError +#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ +#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e +#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } + + +/* ************************************************************** +* Utils +****************************************************************/ +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); +} + + +/* ******************************************************* +* HUF : Huffman block compression +*********************************************************/ +/* HUF_compressWeights() : + * Same as FSE_compress(), but dedicated to huff0's weights compression. + * The use case needs much less stack memory. + * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. + */ +#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 +size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) +{ + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const oend = ostart + dstSize; + + U32 maxSymbolValue = HUF_TABLELOG_MAX; + U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; + + FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; + BYTE scratchBuffer[1< not compressible */ + } + + tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); + CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) ); + + /* Write table description header */ + { CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); + op += hSize; + } + + /* Compress */ + CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) ); + { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) ); + if (cSize == 0) return 0; /* not enough space for compressed data */ + op += cSize; + } + + return op-ostart; +} + + +struct HUF_CElt_s { + U16 val; + BYTE nbBits; +}; /* typedef'd to HUF_CElt within "huf.h" */ + +/*! HUF_writeCTable() : + `CTable` : Huffman tree to save, using huf representation. + @return : size of saved CTable */ +size_t HUF_writeCTable (void* dst, size_t maxDstSize, + const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog) +{ + BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ + BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; + BYTE* op = (BYTE*)dst; + U32 n; + + /* check conditions */ + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); + + /* convert to weight */ + bitsToWeight[0] = 0; + for (n=1; n1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ + op[0] = (BYTE)hSize; + return hSize+1; + } } + + /* write raw values as 4-bits (max : 15) */ + if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ + if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ + op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); + huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ + for (n=0; n HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); + + /* Prepare base value per rank */ + { U32 n, nextRankStart = 0; + for (n=1; n<=tableLog; n++) { + U32 current = nextRankStart; + nextRankStart += (rankVal[n] << (n-1)); + rankVal[n] = current; + } } + + /* fill nbBits */ + { U32 n; for (n=0; nn=tableLog+1 */ + U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; + { U32 n; for (n=0; n0; n--) { /* start at n=tablelog <-> w=1 */ + valPerRank[n] = min; /* get starting value within each rank */ + min += nbPerRank[n]; + min >>= 1; + } } + /* assign value within rank, symbol order */ + { U32 n; for (n=0; n maxNbBits */ + + /* there are several too large elements (at least >= 2) */ + { int totalCost = 0; + const U32 baseCost = 1 << (largestBits - maxNbBits); + U32 n = lastNonNull; + + while (huffNode[n].nbBits > maxNbBits) { + totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); + huffNode[n].nbBits = (BYTE)maxNbBits; + n --; + } /* n stops at huffNode[n].nbBits <= maxNbBits */ + while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */ + + /* renorm totalCost */ + totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */ + + /* repay normalized cost */ + { U32 const noSymbol = 0xF0F0F0F0; + U32 rankLast[HUF_TABLELOG_MAX+2]; + int pos; + + /* Get pos of last (smallest) symbol per rank */ + memset(rankLast, 0xF0, sizeof(rankLast)); + { U32 currentNbBits = maxNbBits; + for (pos=n ; pos >= 0; pos--) { + if (huffNode[pos].nbBits >= currentNbBits) continue; + currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ + rankLast[maxNbBits-currentNbBits] = pos; + } } + + while (totalCost > 0) { + U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1; + for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { + U32 highPos = rankLast[nBitsToDecrease]; + U32 lowPos = rankLast[nBitsToDecrease-1]; + if (highPos == noSymbol) continue; + if (lowPos == noSymbol) break; + { U32 const highTotal = huffNode[highPos].count; + U32 const lowTotal = 2 * huffNode[lowPos].count; + if (highTotal <= lowTotal) break; + } } + /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ + /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ + while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) + nBitsToDecrease ++; + totalCost -= 1 << (nBitsToDecrease-1); + if (rankLast[nBitsToDecrease-1] == noSymbol) + rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */ + huffNode[rankLast[nBitsToDecrease]].nbBits ++; + if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ + rankLast[nBitsToDecrease] = noSymbol; + else { + rankLast[nBitsToDecrease]--; + if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) + rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ + } } /* while (totalCost > 0) */ + + while (totalCost < 0) { /* Sometimes, cost correction overshoot */ + if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ + while (huffNode[n].nbBits == maxNbBits) n--; + huffNode[n+1].nbBits--; + rankLast[1] = n+1; + totalCost++; + continue; + } + huffNode[ rankLast[1] + 1 ].nbBits--; + rankLast[1]++; + totalCost ++; + } } } /* there are several too large elements (at least >= 2) */ + + return maxNbBits; +} + + +typedef struct { + U32 base; + U32 current; +} rankPos; + +static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue) +{ + rankPos rank[32]; + U32 n; + + memset(rank, 0, sizeof(rank)); + for (n=0; n<=maxSymbolValue; n++) { + U32 r = BIT_highbit32(count[n] + 1); + rank[r].base ++; + } + for (n=30; n>0; n--) rank[n-1].base += rank[n].base; + for (n=0; n<32; n++) rank[n].current = rank[n].base; + for (n=0; n<=maxSymbolValue; n++) { + U32 const c = count[n]; + U32 const r = BIT_highbit32(c+1) + 1; + U32 pos = rank[r].current++; + while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) { + huffNode[pos] = huffNode[pos-1]; + pos--; + } + huffNode[pos].count = c; + huffNode[pos].byte = (BYTE)n; + } +} + + +/** HUF_buildCTable_wksp() : + * Same as HUF_buildCTable(), but using externally allocated scratch buffer. + * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned. + */ +#define STARTNODE (HUF_SYMBOLVALUE_MAX+1) +typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; +size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) +{ + nodeElt* const huffNode0 = (nodeElt*)workSpace; + nodeElt* const huffNode = huffNode0+1; + U32 n, nonNullRank; + int lowS, lowN; + U16 nodeNb = STARTNODE; + U32 nodeRoot; + + /* safety checks */ + if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall); + if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); + memset(huffNode0, 0, sizeof(huffNodeTable)); + + /* sort, decreasing order */ + HUF_sort(huffNode, count, maxSymbolValue); + + /* init for parents */ + nonNullRank = maxSymbolValue; + while(huffNode[nonNullRank].count == 0) nonNullRank--; + lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; + huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; + huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb; + nodeNb++; lowS-=2; + for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); + huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ + + /* create parents */ + while (nodeNb <= nodeRoot) { + U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; + huffNode[n1].parent = huffNode[n2].parent = nodeNb; + nodeNb++; + } + + /* distribute weights (unlimited tree height) */ + huffNode[nodeRoot].nbBits = 0; + for (n=nodeRoot-1; n>=STARTNODE; n--) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + for (n=0; n<=nonNullRank; n++) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + + /* enforce maxTableLog */ + maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); + + /* fill result into tree (val, nbBits) */ + { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; + U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; + if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ + for (n=0; n<=nonNullRank; n++) + nbPerRank[huffNode[n].nbBits]++; + /* determine stating value per rank */ + { U16 min = 0; + for (n=maxNbBits; n>0; n--) { + valPerRank[n] = min; /* get starting value within each rank */ + min += nbPerRank[n]; + min >>= 1; + } } + for (n=0; n<=maxSymbolValue; n++) + tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */ + for (n=0; n<=maxSymbolValue; n++) + tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */ + } + + return maxNbBits; +} + +/** HUF_buildCTable() : + * @return : maxNbBits + * Note : count is used before tree is written, so they can safely overlap + */ +size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits) +{ + huffNodeTable nodeTable; + return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable)); +} + +static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) +{ + size_t nbBits = 0; + int s; + for (s = 0; s <= (int)maxSymbolValue; ++s) { + nbBits += CTable[s].nbBits * count[s]; + } + return nbBits >> 3; +} + +static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { + int bad = 0; + int s; + for (s = 0; s <= (int)maxSymbolValue; ++s) { + bad |= (count[s] != 0) & (CTable[s].nbBits == 0); + } + return !bad; +} + +size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } + +FORCE_INLINE_TEMPLATE void +HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) +{ + BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); +} + +#define HUF_FLUSHBITS(s) BIT_flushBits(s) + +#define HUF_FLUSHBITS_1(stream) \ + if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) + +#define HUF_FLUSHBITS_2(stream) \ + if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) + +FORCE_INLINE_TEMPLATE size_t +HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + const BYTE* ip = (const BYTE*) src; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + size_t n; + BIT_CStream_t bitC; + + /* init */ + if (dstSize < 8) return 0; /* not enough space to compress */ + { size_t const initErr = BIT_initCStream(&bitC, op, oend-op); + if (HUF_isError(initErr)) return 0; } + + n = srcSize & ~3; /* join to mod 4 */ + switch (srcSize & 3) + { + case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); + HUF_FLUSHBITS_2(&bitC); + /* fall-through */ + case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); + HUF_FLUSHBITS_1(&bitC); + /* fall-through */ + case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); + HUF_FLUSHBITS(&bitC); + /* fall-through */ + case 0 : /* fall-through */ + default: break; + } + + for (; n>0; n-=4) { /* note : n&3==0 at this stage */ + HUF_encodeSymbol(&bitC, ip[n- 1], CTable); + HUF_FLUSHBITS_1(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 2], CTable); + HUF_FLUSHBITS_2(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 3], CTable); + HUF_FLUSHBITS_1(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 4], CTable); + HUF_FLUSHBITS(&bitC); + } + + return BIT_closeCStream(&bitC); +} + +#if DYNAMIC_BMI2 + +static TARGET_ATTRIBUTE("bmi2") size_t +HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +static size_t +HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +static size_t +HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, const int bmi2) +{ + if (bmi2) { + return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); + } + return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); +} + +#else + +static size_t +HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, const int bmi2) +{ + (void)bmi2; + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +#endif + +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); +} + + +static size_t +HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, int bmi2) +{ + size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ + const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + + if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ + if (srcSize < 12) return 0; /* no saving possible : too small input */ + op += 6; /* jumpTable */ + + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); + if (cSize==0) return 0; + assert(cSize <= 65535); + MEM_writeLE16(ostart, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); + if (cSize==0) return 0; + assert(cSize <= 65535); + MEM_writeLE16(ostart+2, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); + if (cSize==0) return 0; + assert(cSize <= 65535); + MEM_writeLE16(ostart+4, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) ); + if (cSize==0) return 0; + op += cSize; + } + + return op-ostart; +} + +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); +} + + +static size_t HUF_compressCTable_internal( + BYTE* const ostart, BYTE* op, BYTE* const oend, + const void* src, size_t srcSize, + unsigned singleStream, const HUF_CElt* CTable, const int bmi2) +{ + size_t const cSize = singleStream ? + HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) : + HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2); + if (HUF_isError(cSize)) { return cSize; } + if (cSize==0) { return 0; } /* uncompressible */ + op += cSize; + /* check compressibility */ + if ((size_t)(op-ostart) >= srcSize-1) { return 0; } + return op-ostart; +} + +typedef struct { + U32 count[HUF_SYMBOLVALUE_MAX + 1]; + HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; + huffNodeTable nodeTable; +} HUF_compress_tables_t; + +/* HUF_compress_internal() : + * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ +static size_t HUF_compress_internal ( + void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + unsigned singleStream, + void* workSpace, size_t wkspSize, + HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, + const int bmi2) +{ + HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + + /* checks & inits */ + if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall); + if (!srcSize) return 0; /* Uncompressed */ + if (!dstSize) return 0; /* cannot fit anything within dst budget */ + if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ + if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); + if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; + if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; + + /* Heuristic : If old table is valid, use it for small inputs */ + if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + singleStream, oldHufTable, bmi2); + } + + /* Scan input and build symbol stats */ + { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) ); + if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ + if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ + } + + /* Check validity of previous table */ + if ( repeat + && *repeat == HUF_repeat_check + && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { + *repeat = HUF_repeat_none; + } + /* Heuristic : use existing table for small inputs */ + if (preferRepeat && repeat && *repeat != HUF_repeat_none) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + singleStream, oldHufTable, bmi2); + } + + /* Build Huffman Tree */ + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + { CHECK_V_F(maxBits, HUF_buildCTable_wksp(table->CTable, table->count, + maxSymbolValue, huffLog, + table->nodeTable, sizeof(table->nodeTable)) ); + huffLog = (U32)maxBits; + /* Zero unused symbols in CTable, so we can check it for validity */ + memset(table->CTable + (maxSymbolValue + 1), 0, + sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); + } + + /* Write table description header */ + { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) ); + /* Check if using previous huffman table is beneficial */ + if (repeat && *repeat != HUF_repeat_none) { + size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); + size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); + if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + singleStream, oldHufTable, bmi2); + } } + + /* Use the new huffman table */ + if (hSize + 12ul >= srcSize) { return 0; } + op += hSize; + if (repeat) { *repeat = HUF_repeat_none; } + if (oldHufTable) + memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ + } + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + singleStream, table->CTable, bmi2); +} + + +size_t HUF_compress1X_wksp (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, 1 /*single stream*/, + workSpace, wkspSize, + NULL, NULL, 0, 0 /*bmi2*/); +} + +size_t HUF_compress1X_repeat (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize, + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, 1 /*single stream*/, + workSpace, wkspSize, hufTable, + repeat, preferRepeat, bmi2); +} + +size_t HUF_compress1X (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog) +{ + unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; + return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); +} + +/* HUF_compress4X_repeat(): + * compress input using 4 streams. + * provide workspace to generate compression tables */ +size_t HUF_compress4X_wksp (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, 0 /*4 streams*/, + workSpace, wkspSize, + NULL, NULL, 0, 0 /*bmi2*/); +} + +/* HUF_compress4X_repeat(): + * compress input using 4 streams. + * re-use an existing huffman compression table */ +size_t HUF_compress4X_repeat (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize, + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, 0 /* 4 streams */, + workSpace, wkspSize, + hufTable, repeat, preferRepeat, bmi2); +} + +size_t HUF_compress2 (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog) +{ + unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; + return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); +} + +size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT); +} diff --git a/deps/SZ/zstd/compress/zstd_compress.c b/deps/SZ/zstd/compress/zstd_compress.c new file mode 100644 index 0000000000000000000000000000000000000000..7592bf623d581334ae1f01c099d42370bda770ba --- /dev/null +++ b/deps/SZ/zstd/compress/zstd_compress.c @@ -0,0 +1,3900 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/*-************************************* +* Dependencies +***************************************/ +#include /* memset */ +#include "cpu.h" +#include "mem.h" +#include "hist.h" /* HIST_countFast_wksp */ +#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ +#include "fse.h" +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" +#include "zstd_compress_internal.h" +#include "zstd_fast.h" +#include "zstd_double_fast.h" +#include "zstd_lazy.h" +#include "zstd_opt.h" +#include "zstd_ldm.h" + +void showme(){ + printf(" hello show me."); +} + +/*-************************************* +* Helper functions +***************************************/ +size_t ZSTD_compressBound(size_t srcSize) { + return ZSTD_COMPRESSBOUND(srcSize); +} + + +/*-************************************* +* Context memory management +***************************************/ +struct ZSTD_CDict_s { + void* dictBuffer; + const void* dictContent; + size_t dictContentSize; + void* workspace; + size_t workspaceSize; + ZSTD_matchState_t matchState; + ZSTD_compressedBlockState_t cBlockState; + ZSTD_compressionParameters cParams; + ZSTD_customMem customMem; + U32 dictID; +}; /* typedef'd to ZSTD_CDict within "zstd.h" */ + +ZSTD_CCtx* ZSTD_createCCtx(void) +{ + return ZSTD_createCCtx_advanced(ZSTD_defaultCMem); +} + +static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager) +{ + assert(cctx != NULL); + memset(cctx, 0, sizeof(*cctx)); + cctx->customMem = memManager; + cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + { size_t const err = ZSTD_CCtx_resetParameters(cctx); + assert(!ZSTD_isError(err)); + (void)err; + } +} + +ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) +{ + ZSTD_STATIC_ASSERT(zcss_init==0); + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1)); + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_malloc(sizeof(ZSTD_CCtx), customMem); + if (!cctx) return NULL; + ZSTD_initCCtx(cctx, customMem); + return cctx; + } +} + +ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize) +{ + ZSTD_CCtx* const cctx = (ZSTD_CCtx*) workspace; + if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */ + if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */ + memset(workspace, 0, workspaceSize); /* may be a bit generous, could memset be smaller ? */ + cctx->staticSize = workspaceSize; + cctx->workSpace = (void*)(cctx+1); + cctx->workSpaceSize = workspaceSize - sizeof(ZSTD_CCtx); + + /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ + if (cctx->workSpaceSize < HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t)) return NULL; + assert(((size_t)cctx->workSpace & (sizeof(void*)-1)) == 0); /* ensure correct alignment */ + cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)cctx->workSpace; + cctx->blockState.nextCBlock = cctx->blockState.prevCBlock + 1; + { + void* const ptr = cctx->blockState.nextCBlock + 1; + cctx->entropyWorkspace = (U32*)ptr; + } + cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + return cctx; +} + +static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) +{ + assert(cctx != NULL); + assert(cctx->staticSize == 0); + ZSTD_free(cctx->workSpace, cctx->customMem); cctx->workSpace = NULL; + ZSTD_freeCDict(cctx->cdictLocal); cctx->cdictLocal = NULL; +#ifdef ZSTD_MULTITHREAD + ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; +#endif +} + +size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return 0; /* support free on NULL */ + if (cctx->staticSize) return ERROR(memory_allocation); /* not compatible with static CCtx */ + ZSTD_freeCCtxContent(cctx); + ZSTD_free(cctx, cctx->customMem); + return 0; +} + + +static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + return ZSTDMT_sizeof_CCtx(cctx->mtctx); +#else + (void) cctx; + return 0; +#endif +} + + +size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return 0; /* support sizeof on NULL */ + return sizeof(*cctx) + cctx->workSpaceSize + + ZSTD_sizeof_CDict(cctx->cdictLocal) + + ZSTD_sizeof_mtctx(cctx); +} + +size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) +{ + return ZSTD_sizeof_CCtx(zcs); /* same object */ +} + +/* private API call, for dictBuilder only */ +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } + +static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( + ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params cctxParams; + memset(&cctxParams, 0, sizeof(cctxParams)); + cctxParams.cParams = cParams; + cctxParams.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ + assert(!ZSTD_checkCParams(cParams)); + cctxParams.fParams.contentSizeFlag = 1; + return cctxParams; +} + +static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced( + ZSTD_customMem customMem) +{ + ZSTD_CCtx_params* params; + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + params = (ZSTD_CCtx_params*)ZSTD_calloc( + sizeof(ZSTD_CCtx_params), customMem); + if (!params) { return NULL; } + params->customMem = customMem; + params->compressionLevel = ZSTD_CLEVEL_DEFAULT; + params->fParams.contentSizeFlag = 1; + return params; +} + +ZSTD_CCtx_params* ZSTD_createCCtxParams(void) +{ + return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem); +} + +size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params) +{ + if (params == NULL) { return 0; } + ZSTD_free(params, params->customMem); + return 0; +} + +size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params) +{ + return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); +} + +size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { + if (!cctxParams) { return ERROR(GENERIC); } + memset(cctxParams, 0, sizeof(*cctxParams)); + cctxParams->compressionLevel = compressionLevel; + cctxParams->fParams.contentSizeFlag = 1; + return 0; +} + +size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) +{ + if (!cctxParams) { return ERROR(GENERIC); } + CHECK_F( ZSTD_checkCParams(params.cParams) ); + memset(cctxParams, 0, sizeof(*cctxParams)); + cctxParams->cParams = params.cParams; + cctxParams->fParams = params.fParams; + cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ + assert(!ZSTD_checkCParams(params.cParams)); + return 0; +} + +/* ZSTD_assignParamsToCCtxParams() : + * params is presumed valid at this stage */ +static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams( + ZSTD_CCtx_params cctxParams, ZSTD_parameters params) +{ + ZSTD_CCtx_params ret = cctxParams; + ret.cParams = params.cParams; + ret.fParams = params.fParams; + ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ + assert(!ZSTD_checkCParams(params.cParams)); + return ret; +} + +#define CLAMPCHECK(val,min,max) { \ + if (((val)<(min)) | ((val)>(max))) { \ + return ERROR(parameter_outOfBound); \ +} } + + +static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) +{ + switch(param) + { + case ZSTD_p_compressionLevel: + case ZSTD_p_hashLog: + case ZSTD_p_chainLog: + case ZSTD_p_searchLog: + case ZSTD_p_minMatch: + case ZSTD_p_targetLength: + case ZSTD_p_compressionStrategy: + return 1; + + case ZSTD_p_format: + case ZSTD_p_windowLog: + case ZSTD_p_contentSizeFlag: + case ZSTD_p_checksumFlag: + case ZSTD_p_dictIDFlag: + case ZSTD_p_forceMaxWindow : + case ZSTD_p_nbWorkers: + case ZSTD_p_jobSize: + case ZSTD_p_overlapSizeLog: + case ZSTD_p_enableLongDistanceMatching: + case ZSTD_p_ldmHashLog: + case ZSTD_p_ldmMinMatch: + case ZSTD_p_ldmBucketSizeLog: + case ZSTD_p_ldmHashEveryLog: + case ZSTD_p_forceAttachDict: + default: + return 0; + } +} + +size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value) +{ + DEBUGLOG(4, "ZSTD_CCtx_setParameter (%u, %u)", (U32)param, value); + if (cctx->streamStage != zcss_init) { + if (ZSTD_isUpdateAuthorized(param)) { + cctx->cParamsChanged = 1; + } else { + return ERROR(stage_wrong); + } } + + switch(param) + { + case ZSTD_p_format : + return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + + case ZSTD_p_compressionLevel: + if (cctx->cdict) return ERROR(stage_wrong); + return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + + case ZSTD_p_windowLog: + case ZSTD_p_hashLog: + case ZSTD_p_chainLog: + case ZSTD_p_searchLog: + case ZSTD_p_minMatch: + case ZSTD_p_targetLength: + case ZSTD_p_compressionStrategy: + if (cctx->cdict) return ERROR(stage_wrong); + return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + + case ZSTD_p_contentSizeFlag: + case ZSTD_p_checksumFlag: + case ZSTD_p_dictIDFlag: + return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + + case ZSTD_p_forceMaxWindow : /* Force back-references to remain < windowSize, + * even when referencing into Dictionary content. + * default : 0 when using a CDict, 1 when using a Prefix */ + return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + + case ZSTD_p_forceAttachDict: + return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + + case ZSTD_p_nbWorkers: + if ((value>0) && cctx->staticSize) { + return ERROR(parameter_unsupported); /* MT not compatible with static alloc */ + } + return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + + case ZSTD_p_jobSize: + case ZSTD_p_overlapSizeLog: + return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + + case ZSTD_p_enableLongDistanceMatching: + case ZSTD_p_ldmHashLog: + case ZSTD_p_ldmMinMatch: + case ZSTD_p_ldmBucketSizeLog: + case ZSTD_p_ldmHashEveryLog: + if (cctx->cdict) return ERROR(stage_wrong); + return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + + default: return ERROR(parameter_unsupported); + } +} + +size_t ZSTD_CCtxParam_setParameter( + ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, unsigned value) +{ + DEBUGLOG(4, "ZSTD_CCtxParam_setParameter (%u, %u)", (U32)param, value); + switch(param) + { + case ZSTD_p_format : + if (value > (unsigned)ZSTD_f_zstd1_magicless) + return ERROR(parameter_unsupported); + CCtxParams->format = (ZSTD_format_e)value; + return (size_t)CCtxParams->format; + + case ZSTD_p_compressionLevel : { + int cLevel = (int)value; /* cast expected to restore negative sign */ + if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel(); + if (cLevel) { /* 0 : does not change current level */ + CCtxParams->compressionLevel = cLevel; + } + if (CCtxParams->compressionLevel >= 0) return CCtxParams->compressionLevel; + return 0; /* return type (size_t) cannot represent negative values */ + } + + case ZSTD_p_windowLog : + if (value>0) /* 0 => use default */ + CLAMPCHECK(value, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); + CCtxParams->cParams.windowLog = value; + return CCtxParams->cParams.windowLog; + + case ZSTD_p_hashLog : + if (value>0) /* 0 => use default */ + CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); + CCtxParams->cParams.hashLog = value; + return CCtxParams->cParams.hashLog; + + case ZSTD_p_chainLog : + if (value>0) /* 0 => use default */ + CLAMPCHECK(value, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); + CCtxParams->cParams.chainLog = value; + return CCtxParams->cParams.chainLog; + + case ZSTD_p_searchLog : + if (value>0) /* 0 => use default */ + CLAMPCHECK(value, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); + CCtxParams->cParams.searchLog = value; + return value; + + case ZSTD_p_minMatch : + if (value>0) /* 0 => use default */ + CLAMPCHECK(value, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX); + CCtxParams->cParams.searchLength = value; + return CCtxParams->cParams.searchLength; + + case ZSTD_p_targetLength : + /* all values are valid. 0 => use default */ + CCtxParams->cParams.targetLength = value; + return CCtxParams->cParams.targetLength; + + case ZSTD_p_compressionStrategy : + if (value>0) /* 0 => use default */ + CLAMPCHECK(value, (unsigned)ZSTD_fast, (unsigned)ZSTD_btultra); + CCtxParams->cParams.strategy = (ZSTD_strategy)value; + return (size_t)CCtxParams->cParams.strategy; + + case ZSTD_p_contentSizeFlag : + /* Content size written in frame header _when known_ (default:1) */ + DEBUGLOG(4, "set content size flag = %u", (value>0)); + CCtxParams->fParams.contentSizeFlag = value > 0; + return CCtxParams->fParams.contentSizeFlag; + + case ZSTD_p_checksumFlag : + /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ + CCtxParams->fParams.checksumFlag = value > 0; + return CCtxParams->fParams.checksumFlag; + + case ZSTD_p_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ + DEBUGLOG(4, "set dictIDFlag = %u", (value>0)); + CCtxParams->fParams.noDictIDFlag = !value; + return !CCtxParams->fParams.noDictIDFlag; + + case ZSTD_p_forceMaxWindow : + CCtxParams->forceWindow = (value > 0); + return CCtxParams->forceWindow; + + case ZSTD_p_forceAttachDict : + CCtxParams->attachDictPref = value ? + (value > 0 ? ZSTD_dictForceAttach : ZSTD_dictForceCopy) : + ZSTD_dictDefaultAttach; + return CCtxParams->attachDictPref; + + case ZSTD_p_nbWorkers : +#ifndef ZSTD_MULTITHREAD + if (value>0) return ERROR(parameter_unsupported); + return 0; +#else + return ZSTDMT_CCtxParam_setNbWorkers(CCtxParams, value); +#endif + + case ZSTD_p_jobSize : +#ifndef ZSTD_MULTITHREAD + return ERROR(parameter_unsupported); +#else + return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_jobSize, value); +#endif + + case ZSTD_p_overlapSizeLog : +#ifndef ZSTD_MULTITHREAD + return ERROR(parameter_unsupported); +#else + return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_overlapSectionLog, value); +#endif + + case ZSTD_p_enableLongDistanceMatching : + CCtxParams->ldmParams.enableLdm = (value>0); + return CCtxParams->ldmParams.enableLdm; + + case ZSTD_p_ldmHashLog : + if (value>0) /* 0 ==> auto */ + CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); + CCtxParams->ldmParams.hashLog = value; + return CCtxParams->ldmParams.hashLog; + + case ZSTD_p_ldmMinMatch : + if (value>0) /* 0 ==> default */ + CLAMPCHECK(value, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX); + CCtxParams->ldmParams.minMatchLength = value; + return CCtxParams->ldmParams.minMatchLength; + + case ZSTD_p_ldmBucketSizeLog : + if (value > ZSTD_LDM_BUCKETSIZELOG_MAX) + return ERROR(parameter_outOfBound); + CCtxParams->ldmParams.bucketSizeLog = value; + return CCtxParams->ldmParams.bucketSizeLog; + + case ZSTD_p_ldmHashEveryLog : + if (value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) + return ERROR(parameter_outOfBound); + CCtxParams->ldmParams.hashEveryLog = value; + return CCtxParams->ldmParams.hashEveryLog; + + default: return ERROR(parameter_unsupported); + } +} + +size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned* value) +{ + return ZSTD_CCtxParam_getParameter(&cctx->requestedParams, param, value); +} + +size_t ZSTD_CCtxParam_getParameter( + ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, unsigned* value) +{ + switch(param) + { + case ZSTD_p_format : + *value = CCtxParams->format; + break; + case ZSTD_p_compressionLevel : + *value = CCtxParams->compressionLevel; + break; + case ZSTD_p_windowLog : + *value = CCtxParams->cParams.windowLog; + break; + case ZSTD_p_hashLog : + *value = CCtxParams->cParams.hashLog; + break; + case ZSTD_p_chainLog : + *value = CCtxParams->cParams.chainLog; + break; + case ZSTD_p_searchLog : + *value = CCtxParams->cParams.searchLog; + break; + case ZSTD_p_minMatch : + *value = CCtxParams->cParams.searchLength; + break; + case ZSTD_p_targetLength : + *value = CCtxParams->cParams.targetLength; + break; + case ZSTD_p_compressionStrategy : + *value = (unsigned)CCtxParams->cParams.strategy; + break; + case ZSTD_p_contentSizeFlag : + *value = CCtxParams->fParams.contentSizeFlag; + break; + case ZSTD_p_checksumFlag : + *value = CCtxParams->fParams.checksumFlag; + break; + case ZSTD_p_dictIDFlag : + *value = !CCtxParams->fParams.noDictIDFlag; + break; + case ZSTD_p_forceMaxWindow : + *value = CCtxParams->forceWindow; + break; + case ZSTD_p_forceAttachDict : + *value = CCtxParams->attachDictPref; + break; + case ZSTD_p_nbWorkers : +#ifndef ZSTD_MULTITHREAD + assert(CCtxParams->nbWorkers == 0); +#endif + *value = CCtxParams->nbWorkers; + break; + case ZSTD_p_jobSize : +#ifndef ZSTD_MULTITHREAD + return ERROR(parameter_unsupported); +#else + *value = CCtxParams->jobSize; + break; +#endif + case ZSTD_p_overlapSizeLog : +#ifndef ZSTD_MULTITHREAD + return ERROR(parameter_unsupported); +#else + *value = CCtxParams->overlapSizeLog; + break; +#endif + case ZSTD_p_enableLongDistanceMatching : + *value = CCtxParams->ldmParams.enableLdm; + break; + case ZSTD_p_ldmHashLog : + *value = CCtxParams->ldmParams.hashLog; + break; + case ZSTD_p_ldmMinMatch : + *value = CCtxParams->ldmParams.minMatchLength; + break; + case ZSTD_p_ldmBucketSizeLog : + *value = CCtxParams->ldmParams.bucketSizeLog; + break; + case ZSTD_p_ldmHashEveryLog : + *value = CCtxParams->ldmParams.hashEveryLog; + break; + default: return ERROR(parameter_unsupported); + } + return 0; +} + +/** ZSTD_CCtx_setParametersUsingCCtxParams() : + * just applies `params` into `cctx` + * no action is performed, parameters are merely stored. + * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx. + * This is possible even if a compression is ongoing. + * In which case, new parameters will be applied on the fly, starting with next compression job. + */ +size_t ZSTD_CCtx_setParametersUsingCCtxParams( + ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) +{ + DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams"); + if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); + if (cctx->cdict) return ERROR(stage_wrong); + + cctx->requestedParams = *params; + return 0; +} + +ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); + if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); + cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; + return 0; +} + +size_t ZSTD_CCtx_loadDictionary_advanced( + ZSTD_CCtx* cctx, const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) +{ + if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); + if (cctx->staticSize) return ERROR(memory_allocation); /* no malloc for static CCtx */ + DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); + ZSTD_freeCDict(cctx->cdictLocal); /* in case one already exists */ + if (dict==NULL || dictSize==0) { /* no dictionary mode */ + cctx->cdictLocal = NULL; + cctx->cdict = NULL; + } else { + ZSTD_compressionParameters const cParams = + ZSTD_getCParamsFromCCtxParams(&cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, dictSize); + cctx->cdictLocal = ZSTD_createCDict_advanced( + dict, dictSize, + dictLoadMethod, dictContentType, + cParams, cctx->customMem); + cctx->cdict = cctx->cdictLocal; + if (cctx->cdictLocal == NULL) + return ERROR(memory_allocation); + } + return 0; +} + +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference( + ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +{ + return ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); +} + +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +{ + return ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); +} + + +size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) +{ + if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); + cctx->cdict = cdict; + memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* exclusive */ + return 0; +} + +size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize) +{ + return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent); +} + +size_t ZSTD_CCtx_refPrefix_advanced( + ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) +{ + if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); + cctx->cdict = NULL; /* prefix discards any prior cdict */ + cctx->prefixDict.dict = prefix; + cctx->prefixDict.dictSize = prefixSize; + cctx->prefixDict.dictContentType = dictContentType; + return 0; +} + +/*! ZSTD_CCtx_reset() : + * Also dumps dictionary */ +void ZSTD_CCtx_reset(ZSTD_CCtx* cctx) +{ + cctx->streamStage = zcss_init; + cctx->pledgedSrcSizePlusOne = 0; +} + +size_t ZSTD_CCtx_resetParameters(ZSTD_CCtx* cctx) +{ + if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); + cctx->cdict = NULL; + return ZSTD_CCtxParams_reset(&cctx->requestedParams); +} + +/** ZSTD_checkCParams() : + control CParam values remain within authorized range. + @return : 0, or an error code if one value is beyond authorized range */ +size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) +{ + CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); + CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); + CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); + CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); + CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX); + if ((U32)(cParams.strategy) > (U32)ZSTD_btultra) + return ERROR(parameter_unsupported); + return 0; +} + +/** ZSTD_clampCParams() : + * make CParam values within valid range. + * @return : valid CParams */ +static ZSTD_compressionParameters +ZSTD_clampCParams(ZSTD_compressionParameters cParams) +{ +# define CLAMP(val,min,max) { \ + if (valmax) val=max; \ + } + CLAMP(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); + CLAMP(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); + CLAMP(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); + CLAMP(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); + CLAMP(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX); + CLAMP(cParams.strategy, ZSTD_fast, ZSTD_btultra); + return cParams; +} + +/** ZSTD_cycleLog() : + * condition for correct operation : hashLog > 1 */ +static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) +{ + U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); + return hashLog - btScale; +} + +/** ZSTD_adjustCParams_internal() : + optimize `cPar` for a given input (`srcSize` and `dictSize`). + mostly downsizing to reduce memory consumption and initialization latency. + Both `srcSize` and `dictSize` are optional (use 0 if unknown). + Note : cPar is assumed validated. Use ZSTD_checkCParams() to ensure this condition. */ +static ZSTD_compressionParameters +ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, + unsigned long long srcSize, + size_t dictSize) +{ + static const U64 minSrcSize = 513; /* (1<<9) + 1 */ + static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); + assert(ZSTD_checkCParams(cPar)==0); + + if (dictSize && (srcSize+1<2) /* srcSize unknown */ ) + srcSize = minSrcSize; /* presumed small when there is a dictionary */ + else if (srcSize == 0) + srcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* 0 == unknown : presumed large */ + + /* resize windowLog if input is small enough, to use less memory */ + if ( (srcSize < maxWindowResize) + && (dictSize < maxWindowResize) ) { + U32 const tSize = (U32)(srcSize + dictSize); + static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; + U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : + ZSTD_highbit32(tSize-1) + 1; + if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; + } + if (cPar.hashLog > cPar.windowLog+1) cPar.hashLog = cPar.windowLog+1; + { U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); + if (cycleLog > cPar.windowLog) + cPar.chainLog -= (cycleLog - cPar.windowLog); + } + + if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) + cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */ + + return cPar; +} + +ZSTD_compressionParameters +ZSTD_adjustCParams(ZSTD_compressionParameters cPar, + unsigned long long srcSize, + size_t dictSize) +{ + cPar = ZSTD_clampCParams(cPar); + return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize); +} + +ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize); + if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; + if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog; + if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog; + if (CCtxParams->cParams.chainLog) cParams.chainLog = CCtxParams->cParams.chainLog; + if (CCtxParams->cParams.searchLog) cParams.searchLog = CCtxParams->cParams.searchLog; + if (CCtxParams->cParams.searchLength) cParams.searchLength = CCtxParams->cParams.searchLength; + if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength; + if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy; + assert(!ZSTD_checkCParams(cParams)); + return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize); +} + +static size_t +ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, + const U32 forCCtx) +{ + size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); + size_t const hSize = ((size_t)1) << cParams->hashLog; + U32 const hashLog3 = (forCCtx && cParams->searchLength==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; + size_t const h3Size = ((size_t)1) << hashLog3; + size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); + size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<strategy == ZSTD_btopt) || + (cParams->strategy == ZSTD_btultra))) + ? optPotentialSpace + : 0; + DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u", + (U32)chainSize, (U32)hSize, (U32)h3Size); + return tableSpace + optSpace; +} + +size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) +{ + /* Estimate CCtx size is supported for single-threaded compression only. */ + if (params->nbWorkers > 0) { return ERROR(GENERIC); } + { ZSTD_compressionParameters const cParams = + ZSTD_getCParamsFromCCtxParams(params, 0, 0); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); + U32 const divider = (cParams.searchLength==3) ? 3 : 4; + size_t const maxNbSeq = blockSize / divider; + size_t const tokenSpace = blockSize + 11*maxNbSeq; + size_t const entropySpace = HUF_WORKSPACE_SIZE; + size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t); + size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1); + + size_t const ldmSpace = ZSTD_ldm_getTableSize(params->ldmParams); + size_t const ldmSeqSpace = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq); + + size_t const neededSpace = entropySpace + blockStateSpace + tokenSpace + + matchStateSize + ldmSpace + ldmSeqSpace; + + DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)sizeof(ZSTD_CCtx)); + DEBUGLOG(5, "estimate workSpace : %u", (U32)neededSpace); + return sizeof(ZSTD_CCtx) + neededSpace; + } +} + +size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); + return ZSTD_estimateCCtxSize_usingCCtxParams(¶ms); +} + +static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) +{ + ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0); + return ZSTD_estimateCCtxSize_usingCParams(cParams); +} + +size_t ZSTD_estimateCCtxSize(int compressionLevel) +{ + int level; + size_t memBudget = 0; + for (level=1; level<=compressionLevel; level++) { + size_t const newMB = ZSTD_estimateCCtxSize_internal(level); + if (newMB > memBudget) memBudget = newMB; + } + return memBudget; +} + +size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) +{ + if (params->nbWorkers > 0) { return ERROR(GENERIC); } + { size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params->cParams.windowLog); + size_t const inBuffSize = ((size_t)1 << params->cParams.windowLog) + blockSize; + size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1; + size_t const streamingSize = inBuffSize + outBuffSize; + + return CCtxSize + streamingSize; + } +} + +size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); + return ZSTD_estimateCStreamSize_usingCCtxParams(¶ms); +} + +static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) +{ + ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0); + return ZSTD_estimateCStreamSize_usingCParams(cParams); +} + +size_t ZSTD_estimateCStreamSize(int compressionLevel) +{ + int level; + size_t memBudget = 0; + for (level=1; level<=compressionLevel; level++) { + size_t const newMB = ZSTD_estimateCStreamSize_internal(level); + if (newMB > memBudget) memBudget = newMB; + } + return memBudget; +} + +/* ZSTD_getFrameProgression(): + * tells how much data has been consumed (input) and produced (output) for current frame. + * able to count progression inside worker threads (non-blocking mode). + */ +ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers > 0) { + return ZSTDMT_getFrameProgression(cctx->mtctx); + } +#endif + { ZSTD_frameProgression fp; + size_t const buffered = (cctx->inBuff == NULL) ? 0 : + cctx->inBuffPos - cctx->inToCompress; + if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress); + assert(buffered <= ZSTD_BLOCKSIZE_MAX); + fp.ingested = cctx->consumedSrcSize + buffered; + fp.consumed = cctx->consumedSrcSize; + fp.produced = cctx->producedCSize; + return fp; +} } + + +static U32 ZSTD_equivalentCParams(ZSTD_compressionParameters cParams1, + ZSTD_compressionParameters cParams2) +{ + return (cParams1.hashLog == cParams2.hashLog) + & (cParams1.chainLog == cParams2.chainLog) + & (cParams1.strategy == cParams2.strategy) /* opt parser space */ + & ((cParams1.searchLength==3) == (cParams2.searchLength==3)); /* hashlog3 space */ +} + +/** The parameters are equivalent if ldm is not enabled in both sets or + * all the parameters are equivalent. */ +static U32 ZSTD_equivalentLdmParams(ldmParams_t ldmParams1, + ldmParams_t ldmParams2) +{ + return (!ldmParams1.enableLdm && !ldmParams2.enableLdm) || + (ldmParams1.enableLdm == ldmParams2.enableLdm && + ldmParams1.hashLog == ldmParams2.hashLog && + ldmParams1.bucketSizeLog == ldmParams2.bucketSizeLog && + ldmParams1.minMatchLength == ldmParams2.minMatchLength && + ldmParams1.hashEveryLog == ldmParams2.hashEveryLog); +} + +typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e; + +/* ZSTD_sufficientBuff() : + * check internal buffers exist for streaming if buffPol == ZSTDb_buffered . + * Note : they are assumed to be correctly sized if ZSTD_equivalentCParams()==1 */ +static U32 ZSTD_sufficientBuff(size_t bufferSize1, size_t blockSize1, + ZSTD_buffered_policy_e buffPol2, + ZSTD_compressionParameters cParams2, + U64 pledgedSrcSize) +{ + size_t const windowSize2 = MAX(1, (size_t)MIN(((U64)1 << cParams2.windowLog), pledgedSrcSize)); + size_t const blockSize2 = MIN(ZSTD_BLOCKSIZE_MAX, windowSize2); + size_t const neededBufferSize2 = (buffPol2==ZSTDb_buffered) ? windowSize2 + blockSize2 : 0; + DEBUGLOG(4, "ZSTD_sufficientBuff: is windowSize2=%u <= wlog1=%u", + (U32)windowSize2, cParams2.windowLog); + DEBUGLOG(4, "ZSTD_sufficientBuff: is blockSize2=%u <= blockSize1=%u", + (U32)blockSize2, (U32)blockSize1); + return (blockSize2 <= blockSize1) /* seqStore space depends on blockSize */ + & (neededBufferSize2 <= bufferSize1); +} + +/** Equivalence for resetCCtx purposes */ +static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1, + ZSTD_CCtx_params params2, + size_t buffSize1, size_t blockSize1, + ZSTD_buffered_policy_e buffPol2, + U64 pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_equivalentParams: pledgedSrcSize=%u", (U32)pledgedSrcSize); + return ZSTD_equivalentCParams(params1.cParams, params2.cParams) && + ZSTD_equivalentLdmParams(params1.ldmParams, params2.ldmParams) && + ZSTD_sufficientBuff(buffSize1, blockSize1, buffPol2, params2.cParams, pledgedSrcSize); +} + +static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) +{ + int i; + for (i = 0; i < ZSTD_REP_NUM; ++i) + bs->rep[i] = repStartValue[i]; + bs->entropy.huf.repeatMode = HUF_repeat_none; + bs->entropy.fse.offcode_repeatMode = FSE_repeat_none; + bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none; + bs->entropy.fse.litlength_repeatMode = FSE_repeat_none; +} + +/*! ZSTD_invalidateMatchState() + * Invalidate all the matches in the match finder tables. + * Requires nextSrc and base to be set (can be NULL). + */ +static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) +{ + ZSTD_window_clear(&ms->window); + + ms->nextToUpdate = ms->window.dictLimit + 1; + ms->nextToUpdate3 = ms->window.dictLimit + 1; + ms->loadedDictEnd = 0; + ms->opt.litLengthSum = 0; /* force reset of btopt stats */ + ms->dictMatchState = NULL; +} + +/*! ZSTD_continueCCtx() : + * reuse CCtx without reset (note : requires no dictionary) */ +static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pledgedSrcSize) +{ + size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); + DEBUGLOG(4, "ZSTD_continueCCtx: re-use context in place"); + + cctx->blockSize = blockSize; /* previous block size could be different even for same windowLog, due to pledgedSrcSize */ + cctx->appliedParams = params; + cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; + cctx->consumedSrcSize = 0; + cctx->producedCSize = 0; + if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) + cctx->appliedParams.fParams.contentSizeFlag = 0; + DEBUGLOG(4, "pledged content size : %u ; flag : %u", + (U32)pledgedSrcSize, cctx->appliedParams.fParams.contentSizeFlag); + cctx->stage = ZSTDcs_init; + cctx->dictID = 0; + if (params.ldmParams.enableLdm) + ZSTD_window_clear(&cctx->ldmState.window); + ZSTD_referenceExternalSequences(cctx, NULL, 0); + ZSTD_invalidateMatchState(&cctx->blockState.matchState); + ZSTD_reset_compressedBlockState(cctx->blockState.prevCBlock); + XXH64_reset(&cctx->xxhState, 0); + return 0; +} + +typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e; + +static void* +ZSTD_reset_matchState(ZSTD_matchState_t* ms, + void* ptr, + const ZSTD_compressionParameters* cParams, + ZSTD_compResetPolicy_e const crp, U32 const forCCtx) +{ + size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); + size_t const hSize = ((size_t)1) << cParams->hashLog; + U32 const hashLog3 = (forCCtx && cParams->searchLength==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; + size_t const h3Size = ((size_t)1) << hashLog3; + size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); + + assert(((size_t)ptr & 3) == 0); + + ms->hashLog3 = hashLog3; + memset(&ms->window, 0, sizeof(ms->window)); + ZSTD_invalidateMatchState(ms); + + /* opt parser space */ + if (forCCtx && ((cParams->strategy == ZSTD_btopt) | (cParams->strategy == ZSTD_btultra))) { + DEBUGLOG(4, "reserving optimal parser space"); + ms->opt.litFreq = (U32*)ptr; + ms->opt.litLengthFreq = ms->opt.litFreq + (1<opt.matchLengthFreq = ms->opt.litLengthFreq + (MaxLL+1); + ms->opt.offCodeFreq = ms->opt.matchLengthFreq + (MaxML+1); + ptr = ms->opt.offCodeFreq + (MaxOff+1); + ms->opt.matchTable = (ZSTD_match_t*)ptr; + ptr = ms->opt.matchTable + ZSTD_OPT_NUM+1; + ms->opt.priceTable = (ZSTD_optimal_t*)ptr; + ptr = ms->opt.priceTable + ZSTD_OPT_NUM+1; + } + + /* table Space */ + DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_noMemset); + assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ + if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace); /* reset tables only */ + ms->hashTable = (U32*)(ptr); + ms->chainTable = ms->hashTable + hSize; + ms->hashTable3 = ms->chainTable + chainSize; + ptr = ms->hashTable3 + h3Size; + + assert(((size_t)ptr & 3) == 0); + return ptr; +} + +#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 /* define "workspace is too large" as this number of times larger than needed */ +#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 /* when workspace is continuously too large + * during at least this number of times, + * context's memory usage is considered wasteful, + * because it's sized to handle a worst case scenario which rarely happens. + * In which case, resize it down to free some memory */ + +/*! ZSTD_resetCCtx_internal() : + note : `params` are assumed fully validated at this stage */ +static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, + ZSTD_CCtx_params params, + U64 pledgedSrcSize, + ZSTD_compResetPolicy_e const crp, + ZSTD_buffered_policy_e const zbuff) +{ + DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u", + (U32)pledgedSrcSize, params.cParams.windowLog); + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + + if (crp == ZSTDcrp_continue) { + if (ZSTD_equivalentParams(zc->appliedParams, params, + zc->inBuffSize, zc->blockSize, + zbuff, pledgedSrcSize)) { + DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode (wLog1=%u, blockSize1=%zu)", + zc->appliedParams.cParams.windowLog, zc->blockSize); + zc->workSpaceOversizedDuration += (zc->workSpaceOversizedDuration > 0); /* if it was too large, it still is */ + if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION) + return ZSTD_continueCCtx(zc, params, pledgedSrcSize); + } } + DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx"); + + if (params.ldmParams.enableLdm) { + /* Adjust long distance matching parameters */ + ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); + assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); + assert(params.ldmParams.hashEveryLog < 32); + zc->ldmState.hashPower = ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength); + } + + { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); + U32 const divider = (params.cParams.searchLength==3) ? 3 : 4; + size_t const maxNbSeq = blockSize / divider; + size_t const tokenSpace = blockSize + 11*maxNbSeq; + size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0; + size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0; + size_t const matchStateSize = ZSTD_sizeof_matchState(¶ms.cParams, /* forCCtx */ 1); + size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize); + void* ptr; /* used to partition workSpace */ + + /* Check if workSpace is large enough, alloc a new one if needed */ + { size_t const entropySpace = HUF_WORKSPACE_SIZE; + size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t); + size_t const bufferSpace = buffInSize + buffOutSize; + size_t const ldmSpace = ZSTD_ldm_getTableSize(params.ldmParams); + size_t const ldmSeqSpace = maxNbLdmSeq * sizeof(rawSeq); + + size_t const neededSpace = entropySpace + blockStateSpace + ldmSpace + + ldmSeqSpace + matchStateSize + tokenSpace + + bufferSpace; + + int const workSpaceTooSmall = zc->workSpaceSize < neededSpace; + int const workSpaceTooLarge = zc->workSpaceSize > ZSTD_WORKSPACETOOLARGE_FACTOR * neededSpace; + int const workSpaceWasteful = workSpaceTooLarge && (zc->workSpaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION); + zc->workSpaceOversizedDuration = workSpaceTooLarge ? zc->workSpaceOversizedDuration+1 : 0; + + DEBUGLOG(4, "Need %zuKB workspace, including %zuKB for match state, and %zuKB for buffers", + neededSpace>>10, matchStateSize>>10, bufferSpace>>10); + DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); + + if (workSpaceTooSmall || workSpaceWasteful) { + DEBUGLOG(4, "Need to resize workSpaceSize from %zuKB to %zuKB", + zc->workSpaceSize >> 10, + neededSpace >> 10); + /* static cctx : no resize, error out */ + if (zc->staticSize) return ERROR(memory_allocation); + + zc->workSpaceSize = 0; + ZSTD_free(zc->workSpace, zc->customMem); + zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem); + if (zc->workSpace == NULL) return ERROR(memory_allocation); + zc->workSpaceSize = neededSpace; + zc->workSpaceOversizedDuration = 0; + ptr = zc->workSpace; + + /* Statically sized space. + * entropyWorkspace never moves, + * though prev/next block swap places */ + assert(((size_t)zc->workSpace & 3) == 0); /* ensure correct alignment */ + assert(zc->workSpaceSize >= 2 * sizeof(ZSTD_compressedBlockState_t)); + zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)zc->workSpace; + zc->blockState.nextCBlock = zc->blockState.prevCBlock + 1; + ptr = zc->blockState.nextCBlock + 1; + zc->entropyWorkspace = (U32*)ptr; + } } + + /* init params */ + zc->appliedParams = params; + zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; + zc->consumedSrcSize = 0; + zc->producedCSize = 0; + if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) + zc->appliedParams.fParams.contentSizeFlag = 0; + DEBUGLOG(4, "pledged content size : %u ; flag : %u", + (U32)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag); + zc->blockSize = blockSize; + + XXH64_reset(&zc->xxhState, 0); + zc->stage = ZSTDcs_init; + zc->dictID = 0; + + ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); + + ptr = zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32; + + /* ldm hash table */ + /* initialize bucketOffsets table later for pointer alignment */ + if (params.ldmParams.enableLdm) { + size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog; + memset(ptr, 0, ldmHSize * sizeof(ldmEntry_t)); + assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ + zc->ldmState.hashTable = (ldmEntry_t*)ptr; + ptr = zc->ldmState.hashTable + ldmHSize; + zc->ldmSequences = (rawSeq*)ptr; + ptr = zc->ldmSequences + maxNbLdmSeq; + zc->maxNbLdmSequences = maxNbLdmSeq; + + memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window)); + } + assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ + + ptr = ZSTD_reset_matchState(&zc->blockState.matchState, ptr, ¶ms.cParams, crp, /* forCCtx */ 1); + + /* sequences storage */ + zc->seqStore.sequencesStart = (seqDef*)ptr; + ptr = zc->seqStore.sequencesStart + maxNbSeq; + zc->seqStore.llCode = (BYTE*) ptr; + zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq; + zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq; + zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq; + ptr = zc->seqStore.litStart + blockSize; + + /* ldm bucketOffsets table */ + if (params.ldmParams.enableLdm) { + size_t const ldmBucketSize = + ((size_t)1) << (params.ldmParams.hashLog - + params.ldmParams.bucketSizeLog); + memset(ptr, 0, ldmBucketSize); + zc->ldmState.bucketOffsets = (BYTE*)ptr; + ptr = zc->ldmState.bucketOffsets + ldmBucketSize; + ZSTD_window_clear(&zc->ldmState.window); + } + ZSTD_referenceExternalSequences(zc, NULL, 0); + + /* buffers */ + zc->inBuffSize = buffInSize; + zc->inBuff = (char*)ptr; + zc->outBuffSize = buffOutSize; + zc->outBuff = zc->inBuff + buffInSize; + + return 0; + } +} + +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { + int i; + for (i=0; iblockState.prevCBlock->rep[i] = 0; + assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window)); +} + +static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + /* We have a choice between copying the dictionary context into the working + * context, or referencing the dictionary context from the working context + * in-place. We decide here which strategy to use. */ + const U64 attachDictSizeCutoffs[(unsigned)ZSTD_btultra+1] = { + 8 KB, /* unused */ + 8 KB, /* ZSTD_fast */ + 16 KB, /* ZSTD_dfast */ + 32 KB, /* ZSTD_greedy */ + 32 KB, /* ZSTD_lazy */ + 32 KB, /* ZSTD_lazy2 */ + 32 KB, /* ZSTD_btlazy2 */ + 32 KB, /* ZSTD_btopt */ + 8 KB /* ZSTD_btultra */ + }; + const int attachDict = ( pledgedSrcSize <= attachDictSizeCutoffs[cdict->cParams.strategy] + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || params.attachDictPref == ZSTD_dictForceAttach ) + && params.attachDictPref != ZSTD_dictForceCopy + && !params.forceWindow /* dictMatchState isn't correctly + * handled in _enforceMaxDist */ + && ZSTD_equivalentCParams(cctx->appliedParams.cParams, + cdict->cParams); + + DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)", (U32)pledgedSrcSize); + + + { unsigned const windowLog = params.cParams.windowLog; + assert(windowLog != 0); + /* Copy only compression parameters related to tables. */ + params.cParams = cdict->cParams; + params.cParams.windowLog = windowLog; + ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + attachDict ? ZSTDcrp_continue : ZSTDcrp_noMemset, + zbuff); + assert(cctx->appliedParams.cParams.strategy == cdict->cParams.strategy); + assert(cctx->appliedParams.cParams.hashLog == cdict->cParams.hashLog); + assert(cctx->appliedParams.cParams.chainLog == cdict->cParams.chainLog); + } + + if (attachDict) { + const U32 cdictLen = (U32)( cdict->matchState.window.nextSrc + - cdict->matchState.window.base); + if (cdictLen == 0) { + /* don't even attach dictionaries with no contents */ + DEBUGLOG(4, "skipping attaching empty dictionary"); + } else { + DEBUGLOG(4, "attaching dictionary into context"); + cctx->blockState.matchState.dictMatchState = &cdict->matchState; + + /* prep working match state so dict matches never have negative indices + * when they are translated to the working context's index space. */ + if (cctx->blockState.matchState.window.dictLimit < cdictLen) { + cctx->blockState.matchState.window.nextSrc = + cctx->blockState.matchState.window.base + cdictLen; + ZSTD_window_clear(&cctx->blockState.matchState.window); + } + cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; + } + } else { + DEBUGLOG(4, "copying dictionary into context"); + /* copy tables */ + { size_t const chainSize = (cdict->cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict->cParams.chainLog); + size_t const hSize = (size_t)1 << cdict->cParams.hashLog; + size_t const tableSpace = (chainSize + hSize) * sizeof(U32); + assert((U32*)cctx->blockState.matchState.chainTable == (U32*)cctx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */ + assert((U32*)cctx->blockState.matchState.hashTable3 == (U32*)cctx->blockState.matchState.chainTable + chainSize); + assert((U32*)cdict->matchState.chainTable == (U32*)cdict->matchState.hashTable + hSize); /* chainTable must follow hashTable */ + assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize); + memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */ + } + + /* Zero the hashTable3, since the cdict never fills it */ + { size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3; + assert(cdict->matchState.hashLog3 == 0); + memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); + } + + /* copy dictionary offsets */ + { + ZSTD_matchState_t const* srcMatchState = &cdict->matchState; + ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; + dstMatchState->window = srcMatchState->window; + dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; + dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3; + dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; + } + } + + cctx->dictID = cdict->dictID; + + /* copy block state */ + memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); + + return 0; +} + +/*! ZSTD_copyCCtx_internal() : + * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. + * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). + * The "context", in this case, refers to the hash and chain tables, + * entropy tables, and dictionary references. + * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx. + * @return : 0, or an error code */ +static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, + const ZSTD_CCtx* srcCCtx, + ZSTD_frameParameters fParams, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + DEBUGLOG(5, "ZSTD_copyCCtx_internal"); + if (srcCCtx->stage!=ZSTDcs_init) return ERROR(stage_wrong); + + memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); + { ZSTD_CCtx_params params = dstCCtx->requestedParams; + /* Copy only compression parameters related to tables. */ + params.cParams = srcCCtx->appliedParams.cParams; + params.fParams = fParams; + ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, + ZSTDcrp_noMemset, zbuff); + assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); + assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); + assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog); + assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog); + assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3); + } + + /* copy tables */ + { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog); + size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; + size_t const h3Size = (size_t)1 << srcCCtx->blockState.matchState.hashLog3; + size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); + assert((U32*)dstCCtx->blockState.matchState.chainTable == (U32*)dstCCtx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */ + assert((U32*)dstCCtx->blockState.matchState.hashTable3 == (U32*)dstCCtx->blockState.matchState.chainTable + chainSize); + memcpy(dstCCtx->blockState.matchState.hashTable, srcCCtx->blockState.matchState.hashTable, tableSpace); /* presumes all tables follow each other */ + } + + /* copy dictionary offsets */ + { + const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState; + ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; + dstMatchState->window = srcMatchState->window; + dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; + dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3; + dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; + } + dstCCtx->dictID = srcCCtx->dictID; + + /* copy block state */ + memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock)); + + return 0; +} + +/*! ZSTD_copyCCtx() : + * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. + * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). + * pledgedSrcSize==0 means "unknown". +* @return : 0, or an error code */ +size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize) +{ + ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + ZSTD_buffered_policy_e const zbuff = (ZSTD_buffered_policy_e)(srcCCtx->inBuffSize>0); + ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1); + if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; + fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN); + + return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, + fParams, pledgedSrcSize, + zbuff); +} + + +#define ZSTD_ROWSIZE 16 +/*! ZSTD_reduceTable() : + * reduce table indexes by `reducerValue`, or squash to zero. + * PreserveMark preserves "unsorted mark" for btlazy2 strategy. + * It must be set to a clear 0/1 value, to remove branch during inlining. + * Presume table size is a multiple of ZSTD_ROWSIZE + * to help auto-vectorization */ +FORCE_INLINE_TEMPLATE void +ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark) +{ + int const nbRows = (int)size / ZSTD_ROWSIZE; + int cellNb = 0; + int rowNb; + assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ + assert(size < (1U<<31)); /* can be casted to int */ + for (rowNb=0 ; rowNb < nbRows ; rowNb++) { + int column; + for (column=0; columnblockState.matchState; + { U32 const hSize = (U32)1 << zc->appliedParams.cParams.hashLog; + ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); + } + + if (zc->appliedParams.cParams.strategy != ZSTD_fast) { + U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog; + if (zc->appliedParams.cParams.strategy == ZSTD_btlazy2) + ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); + else + ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); + } + + if (ms->hashLog3) { + U32 const h3Size = (U32)1 << ms->hashLog3; + ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue); + } +} + + +/*-******************************************************* +* Block entropic compression +*********************************************************/ + +/* See doc/zstd_compression_format.md for detailed format description */ + +size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall); + memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); + MEM_writeLE24(dst, (U32)(srcSize << 2) + (U32)bt_raw); + return ZSTD_blockHeaderSize+srcSize; +} + + +static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + if (srcSize + flSize > dstCapacity) return ERROR(dstSize_tooSmall); + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); + break; + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); + break; + default: /* not necessary : flSize is {1,2,3} */ + assert(0); + } + + memcpy(ostart + flSize, src, srcSize); + return srcSize + flSize; +} + +static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); + break; + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); + break; + default: /* not necessary : flSize is {1,2,3} */ + assert(0); + } + + ostart[flSize] = *(const BYTE*)src; + return flSize+1; +} + + +/* ZSTD_minGain() : + * minimum compression required + * to generate a compress block or a compressed literals section. + * note : use same formula for both situations */ +static size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) +{ + U32 const minlog = (strat==ZSTD_btultra) ? 7 : 6; + return (srcSize >> minlog) + 2; +} + +static size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_strategy strategy, int disableLiteralCompression, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32* workspace, const int bmi2) +{ + size_t const minGain = ZSTD_minGain(srcSize, strategy); + size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); + BYTE* const ostart = (BYTE*)dst; + U32 singleStream = srcSize < 256; + symbolEncodingType_e hType = set_compressed; + size_t cLitSize; + + DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)", + disableLiteralCompression); + + /* Prepare nextEntropy assuming reusing the existing table */ + memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + + if (disableLiteralCompression) + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + + /* small ? don't even attempt compression (speed opt) */ +# define COMPRESS_LITERALS_SIZE_MIN 63 + { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; + if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } + + if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */ + { HUF_repeat repeat = prevHuf->repeatMode; + int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; + if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; + cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, + workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) + : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, + workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); + if (repeat != HUF_repeat_none) { + /* reused the existing table */ + hType = set_repeat; + } + } + + if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { + memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } + if (cLitSize==1) { + memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); + } + + if (hType == set_compressed) { + /* using a newly constructed table */ + nextHuf->repeatMode = HUF_repeat_check; + } + + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); + MEM_writeLE24(ostart, lhc); + break; + } + case 4: /* 2 - 2 - 14 - 14 */ + { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); + MEM_writeLE32(ostart, lhc); + break; + } + case 5: /* 2 - 2 - 18 - 18 */ + { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); + MEM_writeLE32(ostart, lhc); + ostart[4] = (BYTE)(cLitSize >> 10); + break; + } + default: /* not possible : lhSize is {3,4,5} */ + assert(0); + } + return lhSize+cLitSize; +} + + +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) +{ + const seqDef* const sequences = seqStorePtr->sequencesStart; + BYTE* const llCodeTable = seqStorePtr->llCode; + BYTE* const ofCodeTable = seqStorePtr->ofCode; + BYTE* const mlCodeTable = seqStorePtr->mlCode; + U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + U32 u; + for (u=0; ulongLengthID==1) + llCodeTable[seqStorePtr->longLengthPos] = MaxLL; + if (seqStorePtr->longLengthID==2) + mlCodeTable[seqStorePtr->longLengthPos] = MaxML; +} + + +/** + * -log2(x / 256) lookup table for x in [0, 256). + * If x == 0: Return 0 + * Else: Return floor(-log2(x / 256) * 256) + */ +static unsigned const kInverseProbabiltyLog256[256] = { + 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162, + 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889, + 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734, + 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626, + 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542, + 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473, + 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415, + 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366, + 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322, + 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282, + 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247, + 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215, + 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185, + 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157, + 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132, + 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108, + 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85, + 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64, + 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44, + 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25, + 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7, + 5, 4, 2, 1, +}; + + +/** + * Returns the cost in bits of encoding the distribution described by count + * using the entropy bound. + */ +static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total) +{ + unsigned cost = 0; + unsigned s; + for (s = 0; s <= max; ++s) { + unsigned norm = (unsigned)((256 * count[s]) / total); + if (count[s] != 0 && norm == 0) + norm = 1; + assert(count[s] < total); + cost += count[s] * kInverseProbabiltyLog256[norm]; + } + return cost >> 8; +} + + +/** + * Returns the cost in bits of encoding the distribution in count using the + * table described by norm. The max symbol support by norm is assumed >= max. + * norm must be valid for every symbol with non-zero probability in count. + */ +static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, + unsigned const* count, unsigned const max) +{ + unsigned const shift = 8 - accuracyLog; + size_t cost = 0; + unsigned s; + assert(accuracyLog <= 8); + for (s = 0; s <= max; ++s) { + unsigned const normAcc = norm[s] != -1 ? norm[s] : 1; + unsigned const norm256 = normAcc << shift; + assert(norm256 > 0); + assert(norm256 < 256); + cost += count[s] * kInverseProbabiltyLog256[norm256]; + } + return cost >> 8; +} + + +static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { + void const* ptr = ctable; + U16 const* u16ptr = (U16 const*)ptr; + U32 const maxSymbolValue = MEM_read16(u16ptr + 1); + return maxSymbolValue; +} + + +/** + * Returns the cost in bits of encoding the distribution in count using ctable. + * Returns an error if ctable cannot represent all the symbols in count. + */ +static size_t ZSTD_fseBitCost( + FSE_CTable const* ctable, + unsigned const* count, + unsigned const max) +{ + unsigned const kAccuracyLog = 8; + size_t cost = 0; + unsigned s; + FSE_CState_t cstate; + FSE_initCState(&cstate, ctable); + if (ZSTD_getFSEMaxSymbolValue(ctable) < max) { + DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u", + ZSTD_getFSEMaxSymbolValue(ctable), max); + return ERROR(GENERIC); + } + for (s = 0; s <= max; ++s) { + unsigned const tableLog = cstate.stateLog; + unsigned const badCost = (tableLog + 1) << kAccuracyLog; + unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); + if (count[s] == 0) + continue; + if (bitCost >= badCost) { + DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s); + return ERROR(GENERIC); + } + cost += count[s] * bitCost; + } + return cost >> kAccuracyLog; +} + +/** + * Returns the cost in bytes of encoding the normalized count header. + * Returns an error if any of the helper functions return an error. + */ +static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, + size_t const nbSeq, unsigned const FSELog) +{ + BYTE wksp[FSE_NCOUNTBOUND]; + S16 norm[MaxSeq + 1]; + const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); + CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq, max)); + return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); +} + + +typedef enum { + ZSTD_defaultDisallowed = 0, + ZSTD_defaultAllowed = 1 +} ZSTD_defaultPolicy_e; + +MEM_STATIC symbolEncodingType_e +ZSTD_selectEncodingType( + FSE_repeat* repeatMode, unsigned const* count, unsigned const max, + size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, + FSE_CTable const* prevCTable, + short const* defaultNorm, U32 defaultNormLog, + ZSTD_defaultPolicy_e const isDefaultAllowed, + ZSTD_strategy const strategy) +{ + ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); + if (mostFrequent == nbSeq) { + *repeatMode = FSE_repeat_none; + if (isDefaultAllowed && nbSeq <= 2) { + /* Prefer set_basic over set_rle when there are 2 or less symbols, + * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. + * If basic encoding isn't possible, always choose RLE. + */ + DEBUGLOG(5, "Selected set_basic"); + return set_basic; + } + DEBUGLOG(5, "Selected set_rle"); + return set_rle; + } + if (strategy < ZSTD_lazy) { + if (isDefaultAllowed) { + size_t const staticFse_nbSeq_max = 1000; + size_t const mult = 10 - strategy; + size_t const baseLog = 3; + size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */ + assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */ + assert(mult <= 9 && mult >= 7); + if ( (*repeatMode == FSE_repeat_valid) + && (nbSeq < staticFse_nbSeq_max) ) { + DEBUGLOG(5, "Selected set_repeat"); + return set_repeat; + } + if ( (nbSeq < dynamicFse_nbSeq_min) + || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) { + DEBUGLOG(5, "Selected set_basic"); + /* The format allows default tables to be repeated, but it isn't useful. + * When using simple heuristics to select encoding type, we don't want + * to confuse these tables with dictionaries. When running more careful + * analysis, we don't need to waste time checking both repeating tables + * and default tables. + */ + *repeatMode = FSE_repeat_none; + return set_basic; + } + } + } else { + size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC); + size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC); + size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog); + size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq); + + if (isDefaultAllowed) { + assert(!ZSTD_isError(basicCost)); + assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost))); + } + assert(!ZSTD_isError(NCountCost)); + assert(compressedCost < ERROR(maxCode)); + DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u", + (U32)basicCost, (U32)repeatCost, (U32)compressedCost); + if (basicCost <= repeatCost && basicCost <= compressedCost) { + DEBUGLOG(5, "Selected set_basic"); + assert(isDefaultAllowed); + *repeatMode = FSE_repeat_none; + return set_basic; + } + if (repeatCost <= compressedCost) { + DEBUGLOG(5, "Selected set_repeat"); + assert(!ZSTD_isError(repeatCost)); + return set_repeat; + } + assert(compressedCost < basicCost && compressedCost < repeatCost); + } + DEBUGLOG(5, "Selected set_compressed"); + *repeatMode = FSE_repeat_check; + return set_compressed; +} + +MEM_STATIC size_t +ZSTD_buildCTable(void* dst, size_t dstCapacity, + FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, + U32* count, U32 max, + const BYTE* codeTable, size_t nbSeq, + const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, + const FSE_CTable* prevCTable, size_t prevCTableSize, + void* workspace, size_t workspaceSize) +{ + BYTE* op = (BYTE*)dst; + const BYTE* const oend = op + dstCapacity; + + switch (type) { + case set_rle: + *op = codeTable[0]; + CHECK_F(FSE_buildCTable_rle(nextCTable, (BYTE)max)); + return 1; + case set_repeat: + memcpy(nextCTable, prevCTable, prevCTableSize); + return 0; + case set_basic: + CHECK_F(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */ + return 0; + case set_compressed: { + S16 norm[MaxSeq + 1]; + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); + if (count[codeTable[nbSeq-1]] > 1) { + count[codeTable[nbSeq-1]]--; + nbSeq_1--; + } + assert(nbSeq_1 > 1); + CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max)); + { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return NCountSize; + CHECK_F(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize)); + return NCountSize; + } + } + default: return assert(0), ERROR(GENERIC); + } +} + +FORCE_INLINE_TEMPLATE size_t +ZSTD_encodeSequences_body( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + BIT_CStream_t blockStream; + FSE_CState_t stateMatchLength; + FSE_CState_t stateOffsetBits; + FSE_CState_t stateLitLength; + + CHECK_E(BIT_initCStream(&blockStream, dst, dstCapacity), dstSize_tooSmall); /* not enough space remaining */ + + /* first symbols */ + FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); + FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); + FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); + BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + if (longOffsets) { + U32 const ofBits = ofCodeTable[nbSeq-1]; + int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); + BIT_flushBits(&blockStream); + } + BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, + ofBits - extraBits); + } else { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); + } + BIT_flushBits(&blockStream); + + { size_t n; + for (n=nbSeq-2 ; n= 64-7-(LLFSELog+MLFSELog+OffFSELog))) + BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, sequences[n].litLength, llBits); + if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); + if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); + if (longOffsets) { + int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[n].offset, extraBits); + BIT_flushBits(&blockStream); /* (7)*/ + } + BIT_addBits(&blockStream, sequences[n].offset >> extraBits, + ofBits - extraBits); /* 31 */ + } else { + BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ + } + BIT_flushBits(&blockStream); /* (7)*/ + } } + + DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog); + FSE_flushCState(&blockStream, &stateMatchLength); + DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog); + FSE_flushCState(&blockStream, &stateOffsetBits); + DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog); + FSE_flushCState(&blockStream, &stateLitLength); + + { size_t const streamSize = BIT_closeCStream(&blockStream); + if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ + return streamSize; + } +} + +static size_t +ZSTD_encodeSequences_default( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + return ZSTD_encodeSequences_body(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} + + +#if DYNAMIC_BMI2 + +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_encodeSequences_bmi2( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + return ZSTD_encodeSequences_body(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} + +#endif + +size_t ZSTD_encodeSequences( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return ZSTD_encodeSequences_bmi2(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); + } +#endif + (void)bmi2; + return ZSTD_encodeSequences_default(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} + +MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, + ZSTD_entropyCTables_t const* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + ZSTD_CCtx_params const* cctxParams, + void* dst, size_t dstCapacity, U32* workspace, + const int bmi2) +{ + const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; + ZSTD_strategy const strategy = cctxParams->cParams.strategy; + U32 count[MaxSeq+1]; + FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; + U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ + const seqDef* const sequences = seqStorePtr->sequencesStart; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; + BYTE* seqHead; + BYTE* lastNCount = NULL; + + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<litStart; + size_t const litSize = seqStorePtr->lit - literals; + int const disableLiteralCompression = (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); + size_t const cSize = ZSTD_compressLiterals( + &prevEntropy->huf, &nextEntropy->huf, + cctxParams->cParams.strategy, disableLiteralCompression, + op, dstCapacity, + literals, litSize, + workspace, bmi2); + if (ZSTD_isError(cSize)) + return cSize; + assert(cSize <= dstCapacity); + op += cSize; + } + + /* Sequences Header */ + if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/) return ERROR(dstSize_tooSmall); + if (nbSeq < 0x7F) + *op++ = (BYTE)nbSeq; + else if (nbSeq < LONGNBSEQ) + op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; + else + op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; + if (nbSeq==0) { + /* Copy the old tables over as if we repeated them */ + memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); + return op - ostart; + } + + /* seqHead : flags for FSE encoding type */ + seqHead = op++; + + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + /* build CTable for Literal Lengths */ + { U32 max = MaxLL; + size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace); /* can't fail */ + DEBUGLOG(5, "Building LL table"); + nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode; + LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, count, max, mostFrequent, nbSeq, LLFSELog, prevEntropy->fse.litlengthCTable, LL_defaultNorm, LL_defaultNormLog, ZSTD_defaultAllowed, strategy); + assert(set_basic < set_compressed && set_rle < set_compressed); + assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, + count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, + prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable), + workspace, HUF_WORKSPACE_SIZE); + if (ZSTD_isError(countSize)) return countSize; + if (LLtype == set_compressed) + lastNCount = op; + op += countSize; + } } + /* build CTable for Offsets */ + { U32 max = MaxOff; + size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace); /* can't fail */ + /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ + ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + DEBUGLOG(5, "Building OF table"); + nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode; + Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode, count, max, mostFrequent, nbSeq, OffFSELog, prevEntropy->fse.offcodeCTable, OF_defaultNorm, OF_defaultNormLog, defaultPolicy, strategy); + assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, + count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable), + workspace, HUF_WORKSPACE_SIZE); + if (ZSTD_isError(countSize)) return countSize; + if (Offtype == set_compressed) + lastNCount = op; + op += countSize; + } } + /* build CTable for MatchLengths */ + { U32 max = MaxML; + size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace); /* can't fail */ + DEBUGLOG(5, "Building ML table"); + nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode; + MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, count, max, mostFrequent, nbSeq, MLFSELog, prevEntropy->fse.matchlengthCTable, ML_defaultNorm, ML_defaultNormLog, ZSTD_defaultAllowed, strategy); + assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, + count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, + prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable), + workspace, HUF_WORKSPACE_SIZE); + if (ZSTD_isError(countSize)) return countSize; + if (MLtype == set_compressed) + lastNCount = op; + op += countSize; + } } + + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + + { size_t const bitstreamSize = ZSTD_encodeSequences( + op, oend - op, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, + longOffsets, bmi2); + if (ZSTD_isError(bitstreamSize)) return bitstreamSize; + op += bitstreamSize; + /* zstd versions <= 1.3.4 mistakenly report corruption when + * FSE_readNCount() recieves a buffer < 4 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1146. + * This can happen when the last set_compressed table present is 2 + * bytes and the bitstream is only one byte. + * In this exceedingly rare case, we will simply emit an uncompressed + * block, since it isn't worth optimizing. + */ + if (lastNCount && (op - lastNCount) < 4) { + /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ + assert(op - lastNCount == 3); + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " + "emitting an uncompressed block."); + return 0; + } + } + + return op - ostart; +} + +MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + size_t srcSize, U32* workspace, int bmi2) +{ + size_t const cSize = ZSTD_compressSequences_internal( + seqStorePtr, prevEntropy, nextEntropy, cctxParams, dst, dstCapacity, + workspace, bmi2); + if (cSize == 0) return 0; + /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. + * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. + */ + if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) + return 0; /* block not compressed */ + if (ZSTD_isError(cSize)) return cSize; + + /* Check compressibility */ + { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); + if (cSize >= maxCSize) return 0; /* block not compressed */ + } + + /* We check that dictionaries have offset codes available for the first + * block. After the first block, the offcode table might not have large + * enough codes to represent the offsets in the data. + */ + if (nextEntropy->fse.offcode_repeatMode == FSE_repeat_valid) + nextEntropy->fse.offcode_repeatMode = FSE_repeat_check; + + return cSize; +} + +/* ZSTD_selectBlockCompressor() : + * Not static, but internal use only (used by long distance matcher) + * assumption : strat is a valid strategy */ +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode) +{ + static const ZSTD_blockCompressor blockCompressor[3][(unsigned)ZSTD_btultra+1] = { + { ZSTD_compressBlock_fast /* default for 0 */, + ZSTD_compressBlock_fast, + ZSTD_compressBlock_doubleFast, + ZSTD_compressBlock_greedy, + ZSTD_compressBlock_lazy, + ZSTD_compressBlock_lazy2, + ZSTD_compressBlock_btlazy2, + ZSTD_compressBlock_btopt, + ZSTD_compressBlock_btultra }, + { ZSTD_compressBlock_fast_extDict /* default for 0 */, + ZSTD_compressBlock_fast_extDict, + ZSTD_compressBlock_doubleFast_extDict, + ZSTD_compressBlock_greedy_extDict, + ZSTD_compressBlock_lazy_extDict, + ZSTD_compressBlock_lazy2_extDict, + ZSTD_compressBlock_btlazy2_extDict, + ZSTD_compressBlock_btopt_extDict, + ZSTD_compressBlock_btultra_extDict }, + { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, + ZSTD_compressBlock_fast_dictMatchState, + ZSTD_compressBlock_doubleFast_dictMatchState, + ZSTD_compressBlock_greedy_dictMatchState, + ZSTD_compressBlock_lazy_dictMatchState, + ZSTD_compressBlock_lazy2_dictMatchState, + ZSTD_compressBlock_btlazy2_dictMatchState, + ZSTD_compressBlock_btopt_dictMatchState, + ZSTD_compressBlock_btultra_dictMatchState } + }; + ZSTD_blockCompressor selectedCompressor; + ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); + + assert((U32)strat >= (U32)ZSTD_fast); + assert((U32)strat <= (U32)ZSTD_btultra); + selectedCompressor = blockCompressor[(int)dictMode][(U32)strat]; + assert(selectedCompressor != NULL); + return selectedCompressor; +} + +static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, + const BYTE* anchor, size_t lastLLSize) +{ + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; +} + +void ZSTD_resetSeqStore(seqStore_t* ssPtr) +{ + ssPtr->lit = ssPtr->litStart; + ssPtr->sequences = ssPtr->sequencesStart; + ssPtr->longLengthID = 0; +} + +static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + ZSTD_matchState_t* const ms = &zc->blockState.matchState; + DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%zu, dictLimit=%u, nextToUpdate=%u)", + dstCapacity, ms->window.dictLimit, ms->nextToUpdate); + + if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { + ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.searchLength); + return 0; /* don't even attempt compression below a certain srcSize */ + } + ZSTD_resetSeqStore(&(zc->seqStore)); + ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; /* required for optimal parser to read stats from dictionary */ + + /* a gap between an attached dict and the current window is not safe, + * they must remain adjacent, and when that stops being the case, the dict + * must be unset */ + assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit); + + /* limited update after a very long match */ + { const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const U32 current = (U32)(istart-base); + if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */ + if (current > ms->nextToUpdate + 384) + ms->nextToUpdate = current - MIN(192, (U32)(current - ms->nextToUpdate - 384)); + } + + /* select and store sequences */ + { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms); + size_t lastLLSize; + { int i; + for (i = 0; i < ZSTD_REP_NUM; ++i) + zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; + } + if (zc->externSeqStore.pos < zc->externSeqStore.size) { + assert(!zc->appliedParams.ldmParams.enableLdm); + /* Updates ldmSeqStore.pos */ + lastLLSize = + ZSTD_ldm_blockCompress(&zc->externSeqStore, + ms, &zc->seqStore, + zc->blockState.nextCBlock->rep, + &zc->appliedParams.cParams, + src, srcSize); + assert(zc->externSeqStore.pos <= zc->externSeqStore.size); + } else if (zc->appliedParams.ldmParams.enableLdm) { + rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0}; + + ldmSeqStore.seq = zc->ldmSequences; + ldmSeqStore.capacity = zc->maxNbLdmSequences; + /* Updates ldmSeqStore.size */ + CHECK_F(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, + &zc->appliedParams.ldmParams, + src, srcSize)); + /* Updates ldmSeqStore.pos */ + lastLLSize = + ZSTD_ldm_blockCompress(&ldmSeqStore, + ms, &zc->seqStore, + zc->blockState.nextCBlock->rep, + &zc->appliedParams.cParams, + src, srcSize); + assert(ldmSeqStore.pos == ldmSeqStore.size); + } else { /* not long range mode */ + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode); + lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, src, srcSize); + } + { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; + ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); + } } + + /* encode sequences and literals */ + { size_t const cSize = ZSTD_compressSequences(&zc->seqStore, + &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + dst, dstCapacity, + srcSize, zc->entropyWorkspace, zc->bmi2); + if (ZSTD_isError(cSize) || cSize == 0) return cSize; + /* confirm repcodes and entropy tables */ + { ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; + zc->blockState.prevCBlock = zc->blockState.nextCBlock; + zc->blockState.nextCBlock = tmp; + } + return cSize; + } +} + + +/*! ZSTD_compress_frameChunk() : +* Compress a chunk of data into one or multiple blocks. +* All blocks will be terminated, all input will be consumed. +* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. +* Frame is supposed already started (header already produced) +* @return : compressed size, or an error code +*/ +static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastFrameChunk) +{ + size_t blockSize = cctx->blockSize; + size_t remaining = srcSize; + const BYTE* ip = (const BYTE*)src; + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; + assert(cctx->appliedParams.cParams.windowLog <= 31); + + DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (U32)blockSize); + if (cctx->appliedParams.fParams.checksumFlag && srcSize) + XXH64_update(&cctx->xxhState, src, srcSize); + + while (remaining) { + ZSTD_matchState_t* const ms = &cctx->blockState.matchState; + U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); + + if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) + return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */ + if (remaining < blockSize) blockSize = remaining; + + if (ZSTD_window_needOverflowCorrection(ms->window, ip + blockSize)) { + U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy); + U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); + ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); + + ZSTD_reduceIndex(cctx, correction); + if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; + else ms->nextToUpdate -= correction; + ms->loadedDictEnd = 0; + ms->dictMatchState = NULL; + } + ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); + if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; + + { size_t cSize = ZSTD_compressBlock_internal(cctx, + op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, + ip, blockSize); + if (ZSTD_isError(cSize)) return cSize; + + if (cSize == 0) { /* block is not compressible */ + U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(blockSize << 3); + if (blockSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall); + MEM_writeLE32(op, cBlockHeader24); /* 4th byte will be overwritten */ + memcpy(op + ZSTD_blockHeaderSize, ip, blockSize); + cSize = ZSTD_blockHeaderSize + blockSize; + } else { + U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(op, cBlockHeader24); + cSize += ZSTD_blockHeaderSize; + } + + ip += blockSize; + assert(remaining >= blockSize); + remaining -= blockSize; + op += cSize; + assert(dstCapacity >= cSize); + dstCapacity -= cSize; + DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", + (U32)cSize); + } } + + if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; + return op-ostart; +} + + +static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, + ZSTD_CCtx_params params, U64 pledgedSrcSize, U32 dictID) +{ BYTE* const op = (BYTE*)dst; + U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ + U32 const dictIDSizeCode = params.fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ + U32 const checksumFlag = params.fParams.checksumFlag>0; + U32 const windowSize = (U32)1 << params.cParams.windowLog; + U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); + BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); + U32 const fcsCode = params.fParams.contentSizeFlag ? + (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ + BYTE const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); + size_t pos=0; + + assert(!(params.fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); + if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall); + DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", + !params.fParams.noDictIDFlag, dictID, dictIDSizeCode); + + if (params.format == ZSTD_f_zstd1) { + MEM_writeLE32(dst, ZSTD_MAGICNUMBER); + pos = 4; + } + op[pos++] = frameHeaderDecriptionByte; + if (!singleSegment) op[pos++] = windowLogByte; + switch(dictIDSizeCode) + { + default: assert(0); /* impossible */ + case 0 : break; + case 1 : op[pos] = (BYTE)(dictID); pos++; break; + case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break; + case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break; + } + switch(fcsCode) + { + default: assert(0); /* impossible */ + case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break; + case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break; + case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break; + case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break; + } + return pos; +} + +/* ZSTD_writeLastEmptyBlock() : + * output an empty Block with end-of-frame mark to complete a frame + * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) + * or an error code if `dstCapcity` is too small (stage != ZSTDcs_init) + return ERROR(stage_wrong); + if (cctx->appliedParams.ldmParams.enableLdm) + return ERROR(parameter_unsupported); + cctx->externSeqStore.seq = seq; + cctx->externSeqStore.size = nbSeq; + cctx->externSeqStore.capacity = nbSeq; + cctx->externSeqStore.pos = 0; + return 0; +} + + +static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 frame, U32 lastFrameChunk) +{ + ZSTD_matchState_t* ms = &cctx->blockState.matchState; + size_t fhSize = 0; + + DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", + cctx->stage, (U32)srcSize); + if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */ + + if (frame && (cctx->stage==ZSTDcs_init)) { + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, + cctx->pledgedSrcSizePlusOne-1, cctx->dictID); + if (ZSTD_isError(fhSize)) return fhSize; + dstCapacity -= fhSize; + dst = (char*)dst + fhSize; + cctx->stage = ZSTDcs_ongoing; + } + + if (!srcSize) return fhSize; /* do not generate an empty block if no input */ + + if (!ZSTD_window_update(&ms->window, src, srcSize)) { + ms->nextToUpdate = ms->window.dictLimit; + } + if (cctx->appliedParams.ldmParams.enableLdm) + ZSTD_window_update(&cctx->ldmState.window, src, srcSize); + + DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (U32)cctx->blockSize); + { size_t const cSize = frame ? + ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : + ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize); + if (ZSTD_isError(cSize)) return cSize; + cctx->consumedSrcSize += srcSize; + cctx->producedCSize += (cSize + fhSize); + assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); + if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); + if (cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne) { + DEBUGLOG(4, "error : pledgedSrcSize = %u, while realSrcSize >= %u", + (U32)cctx->pledgedSrcSizePlusOne-1, (U32)cctx->consumedSrcSize); + return ERROR(srcSize_wrong); + } + } + return cSize + fhSize; + } +} + +size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (U32)srcSize); + return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */); +} + + +size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) +{ + ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams; + assert(!ZSTD_checkCParams(cParams)); + return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog); +} + +size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + size_t const blockSizeMax = ZSTD_getBlockSize(cctx); + if (srcSize > blockSizeMax) return ERROR(srcSize_wrong); + return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); +} + +/*! ZSTD_loadDictionaryContent() : + * @return : 0, or an error code + */ +static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, + ZSTD_CCtx_params const* params, + const void* src, size_t srcSize, + ZSTD_dictTableLoadMethod_e dtlm) +{ + const BYTE* const ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + ZSTD_compressionParameters const* cParams = ¶ms->cParams; + + ZSTD_window_update(&ms->window, src, srcSize); + ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); + + if (srcSize <= HASH_READ_SIZE) return 0; + + switch(params->cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(ms, cParams, iend, dtlm); + break; + case ZSTD_dfast: + ZSTD_fillDoubleHashTable(ms, cParams, iend, dtlm); + break; + + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + if (srcSize >= HASH_READ_SIZE) + ZSTD_insertAndFindFirstIndex(ms, cParams, iend-HASH_READ_SIZE); + break; + + case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ + case ZSTD_btopt: + case ZSTD_btultra: + if (srcSize >= HASH_READ_SIZE) + ZSTD_updateTree(ms, cParams, iend-HASH_READ_SIZE, iend); + break; + + default: + assert(0); /* not possible : not a valid strategy id */ + } + + ms->nextToUpdate = (U32)(iend - ms->window.base); + return 0; +} + + +/* Dictionaries that assign zero probability to symbols that show up causes problems + when FSE encoding. Refuse dictionaries that assign zero probability to symbols + that we may encounter during compression. + NOTE: This behavior is not standard and could be improved in the future. */ +static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) { + U32 s; + if (dictMaxSymbolValue < maxSymbolValue) return ERROR(dictionary_corrupted); + for (s = 0; s <= maxSymbolValue; ++s) { + if (normalizedCounter[s] == 0) return ERROR(dictionary_corrupted); + } + return 0; +} + + +/* Dictionary format : + * See : + * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format + */ +/*! ZSTD_loadZstdDictionary() : + * @return : dictID, or an error code + * assumptions : magic number supposed already checked + * dictSize supposed > 8 + */ +static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, + ZSTD_matchState_t* ms, + ZSTD_CCtx_params const* params, + const void* dict, size_t dictSize, + ZSTD_dictTableLoadMethod_e dtlm, + void* workspace) +{ + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff; + size_t dictID; + + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1< 8); + assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); + + dictPtr += 4; /* skip magic number */ + dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr); + dictPtr += 4; + + { unsigned maxSymbolValue = 255; + size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr); + if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted); + if (maxSymbolValue < 255) return ERROR(dictionary_corrupted); + dictPtr += hufHeaderSize; + } + + { unsigned offcodeLog; + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); + if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); + if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted); + /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ + /* fill all offset symbols to avoid garbage at end of table */ + CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.offcodeCTable, offcodeNCount, MaxOff, offcodeLog, workspace, HUF_WORKSPACE_SIZE), + dictionary_corrupted); + dictPtr += offcodeHeaderSize; + } + + { short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); + if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); + if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted); + /* Every match length code must have non-zero probability */ + CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML)); + CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, workspace, HUF_WORKSPACE_SIZE), + dictionary_corrupted); + dictPtr += matchlengthHeaderSize; + } + + { short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog; + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); + if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); + if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted); + /* Every literal length code must have non-zero probability */ + CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL)); + CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, workspace, HUF_WORKSPACE_SIZE), + dictionary_corrupted); + dictPtr += litlengthHeaderSize; + } + + if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted); + bs->rep[0] = MEM_readLE32(dictPtr+0); + bs->rep[1] = MEM_readLE32(dictPtr+4); + bs->rep[2] = MEM_readLE32(dictPtr+8); + dictPtr += 12; + + { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); + U32 offcodeMax = MaxOff; + if (dictContentSize <= ((U32)-1) - 128 KB) { + U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */ + offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ + } + /* All offset values <= dictContentSize + 128 KB must be representable */ + CHECK_F (ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff))); + /* All repCodes must be <= dictContentSize and != 0*/ + { U32 u; + for (u=0; u<3; u++) { + if (bs->rep[u] == 0) return ERROR(dictionary_corrupted); + if (bs->rep[u] > dictContentSize) return ERROR(dictionary_corrupted); + } } + + bs->entropy.huf.repeatMode = HUF_repeat_valid; + bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid; + bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid; + bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid; + CHECK_F(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize, dtlm)); + return dictID; + } +} + +/** ZSTD_compress_insertDictionary() : +* @return : dictID, or an error code */ +static size_t +ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, + ZSTD_matchState_t* ms, + const ZSTD_CCtx_params* params, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + void* workspace) +{ + DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); + if ((dict==NULL) || (dictSize<=8)) return 0; + + ZSTD_reset_compressedBlockState(bs); + + /* dict restricted modes */ + if (dictContentType == ZSTD_dct_rawContent) + return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm); + + if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { + if (dictContentType == ZSTD_dct_auto) { + DEBUGLOG(4, "raw content dictionary detected"); + return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm); + } + if (dictContentType == ZSTD_dct_fullDict) + return ERROR(dictionary_wrong); + assert(0); /* impossible */ + } + + /* dict as full zstd dictionary */ + return ZSTD_loadZstdDictionary(bs, ms, params, dict, dictSize, dtlm, workspace); +} + +/*! ZSTD_compressBegin_internal() : + * @return : 0, or an error code */ +size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params.cParams.windowLog); + /* params are supposed to be fully validated at this point */ + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + + if (cdict && cdict->dictContentSize>0) { + return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); + } + + CHECK_F( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + ZSTDcrp_continue, zbuff) ); + { + size_t const dictID = ZSTD_compress_insertDictionary( + cctx->blockState.prevCBlock, &cctx->blockState.matchState, + ¶ms, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace); + if (ZSTD_isError(dictID)) return dictID; + assert(dictID <= (size_t)(U32)-1); + cctx->dictID = (U32)dictID; + } + return 0; +} + +size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params.cParams.windowLog); + /* compression parameters verification and optimization */ + CHECK_F( ZSTD_checkCParams(params.cParams) ); + return ZSTD_compressBegin_internal(cctx, + dict, dictSize, dictContentType, dtlm, + cdict, + params, pledgedSrcSize, + ZSTDb_not_buffered); +} + +/*! ZSTD_compressBegin_advanced() : +* @return : 0, or an error code */ +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize) +{ + ZSTD_CCtx_params const cctxParams = + ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); + return ZSTD_compressBegin_advanced_internal(cctx, + dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, + NULL /*cdict*/, + cctxParams, pledgedSrcSize); +} + +size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); + ZSTD_CCtx_params const cctxParams = + ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); + DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (U32)dictSize); + return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, + cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); +} + +size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) +{ + return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); +} + + +/*! ZSTD_writeEpilogue() : +* Ends a frame. +* @return : nb of bytes written into dst (or an error code) */ +static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + size_t fhSize = 0; + + DEBUGLOG(4, "ZSTD_writeEpilogue"); + if (cctx->stage == ZSTDcs_created) return ERROR(stage_wrong); /* init missing */ + + /* special case : empty frame */ + if (cctx->stage == ZSTDcs_init) { + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, 0, 0); + if (ZSTD_isError(fhSize)) return fhSize; + dstCapacity -= fhSize; + op += fhSize; + cctx->stage = ZSTDcs_ongoing; + } + + if (cctx->stage != ZSTDcs_ending) { + /* write one last empty block, make it the "last" block */ + U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; + if (dstCapacity<4) return ERROR(dstSize_tooSmall); + MEM_writeLE32(op, cBlockHeader24); + op += ZSTD_blockHeaderSize; + dstCapacity -= ZSTD_blockHeaderSize; + } + + if (cctx->appliedParams.fParams.checksumFlag) { + U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); + if (dstCapacity<4) return ERROR(dstSize_tooSmall); + DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", checksum); + MEM_writeLE32(op, checksum); + op += 4; + } + + cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ + return op-ostart; +} + +size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t endResult; + size_t const cSize = ZSTD_compressContinue_internal(cctx, + dst, dstCapacity, src, srcSize, + 1 /* frame mode */, 1 /* last chunk */); + if (ZSTD_isError(cSize)) return cSize; + endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); + if (ZSTD_isError(endResult)) return endResult; + assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); + if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); + DEBUGLOG(4, "end of frame : controlling src size"); + if (cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1) { + DEBUGLOG(4, "error : pledgedSrcSize = %u, while realSrcSize = %u", + (U32)cctx->pledgedSrcSizePlusOne-1, (U32)cctx->consumedSrcSize); + return ERROR(srcSize_wrong); + } } + return cSize + endResult; +} + + +static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params) +{ + ZSTD_CCtx_params const cctxParams = + ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); + DEBUGLOG(4, "ZSTD_compress_internal"); + return ZSTD_compress_advanced_internal(cctx, + dst, dstCapacity, + src, srcSize, + dict, dictSize, + cctxParams); +} + +size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params) +{ + DEBUGLOG(4, "ZSTD_compress_advanced"); + CHECK_F(ZSTD_checkCParams(params.cParams)); + return ZSTD_compress_internal(cctx, + dst, dstCapacity, + src, srcSize, + dict, dictSize, + params); +} + +/* Internal */ +size_t ZSTD_compress_advanced_internal( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_CCtx_params params) +{ + DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (U32)srcSize); + CHECK_F( ZSTD_compressBegin_internal(cctx, + dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, + params, srcSize, ZSTDb_not_buffered) ); + return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); +} + +size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel) +{ + ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize + (!srcSize), dict ? dictSize : 0); + ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); + assert(params.fParams.contentSizeFlag == 1); + return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, cctxParams); +} + +size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel) +{ + DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (U32)srcSize); + assert(cctx != NULL); + return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); +} + +size_t ZSTD_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel) +{ + size_t result; + ZSTD_CCtx ctxBody; + ZSTD_initCCtx(&ctxBody, ZSTD_defaultCMem); + result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel); + ZSTD_freeCCtxContent(&ctxBody); /* can't free ctxBody itself, as it's on stack; free only heap content */ + return result; +} + + +/* ===== Dictionary API ===== */ + +/*! ZSTD_estimateCDictSize_advanced() : + * Estimate amount of memory that will be needed to create a dictionary with following arguments */ +size_t ZSTD_estimateCDictSize_advanced( + size_t dictSize, ZSTD_compressionParameters cParams, + ZSTD_dictLoadMethod_e dictLoadMethod) +{ + DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (U32)sizeof(ZSTD_CDict)); + return sizeof(ZSTD_CDict) + HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); +} + +size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); + return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); +} + +size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; /* support sizeof on NULL */ + DEBUGLOG(5, "sizeof(*cdict) : %u", (U32)sizeof(*cdict)); + return cdict->workspaceSize + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict); +} + +static size_t ZSTD_initCDict_internal( + ZSTD_CDict* cdict, + const void* dictBuffer, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams) +{ + DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (U32)dictContentType); + assert(!ZSTD_checkCParams(cParams)); + cdict->cParams = cParams; + if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { + cdict->dictBuffer = NULL; + cdict->dictContent = dictBuffer; + } else { + void* const internalBuffer = ZSTD_malloc(dictSize, cdict->customMem); + cdict->dictBuffer = internalBuffer; + cdict->dictContent = internalBuffer; + if (!internalBuffer) return ERROR(memory_allocation); + memcpy(internalBuffer, dictBuffer, dictSize); + } + cdict->dictContentSize = dictSize; + + /* Reset the state to no dictionary */ + ZSTD_reset_compressedBlockState(&cdict->cBlockState); + { void* const end = ZSTD_reset_matchState( + &cdict->matchState, + (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32, + &cParams, ZSTDcrp_continue, /* forCCtx */ 0); + assert(end == (char*)cdict->workspace + cdict->workspaceSize); + (void)end; + } + /* (Maybe) load the dictionary + * Skips loading the dictionary if it is <= 8 bytes. + */ + { ZSTD_CCtx_params params; + memset(¶ms, 0, sizeof(params)); + params.compressionLevel = ZSTD_CLEVEL_DEFAULT; + params.fParams.contentSizeFlag = 1; + params.cParams = cParams; + { size_t const dictID = ZSTD_compress_insertDictionary( + &cdict->cBlockState, &cdict->matchState, ¶ms, + cdict->dictContent, cdict->dictContentSize, + dictContentType, ZSTD_dtlm_full, cdict->workspace); + if (ZSTD_isError(dictID)) return dictID; + assert(dictID <= (size_t)(U32)-1); + cdict->dictID = (U32)dictID; + } + } + + return 0; +} + +ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams, ZSTD_customMem customMem) +{ + DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (U32)dictContentType); + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + + { ZSTD_CDict* const cdict = (ZSTD_CDict*)ZSTD_malloc(sizeof(ZSTD_CDict), customMem); + size_t const workspaceSize = HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); + void* const workspace = ZSTD_malloc(workspaceSize, customMem); + + if (!cdict || !workspace) { + ZSTD_free(cdict, customMem); + ZSTD_free(workspace, customMem); + return NULL; + } + cdict->customMem = customMem; + cdict->workspace = workspace; + cdict->workspaceSize = workspaceSize; + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, + dictBuffer, dictSize, + dictLoadMethod, dictContentType, + cParams) )) { + ZSTD_freeCDict(cdict); + return NULL; + } + + return cdict; + } +} + +ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); + return ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byCopy, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); +} + +ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); + return ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byRef, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); +} + +size_t ZSTD_freeCDict(ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem const cMem = cdict->customMem; + ZSTD_free(cdict->workspace, cMem); + ZSTD_free(cdict->dictBuffer, cMem); + ZSTD_free(cdict, cMem); + return 0; + } +} + +/*! ZSTD_initStaticCDict_advanced() : + * Generate a digested dictionary in provided memory area. + * workspace: The memory area to emplace the dictionary into. + * Provided pointer must 8-bytes aligned. + * It must outlive dictionary usage. + * workspaceSize: Use ZSTD_estimateCDictSize() + * to determine how large workspace must be. + * cParams : use ZSTD_getCParams() to transform a compression level + * into its relevants cParams. + * @return : pointer to ZSTD_CDict*, or NULL if error (size too small) + * Note : there is no corresponding "free" function. + * Since workspace was allocated externally, it must be freed externally. + */ +const ZSTD_CDict* ZSTD_initStaticCDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams) +{ + size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); + size_t const neededSize = sizeof(ZSTD_CDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize) + + HUF_WORKSPACE_SIZE + matchStateSize; + ZSTD_CDict* const cdict = (ZSTD_CDict*) workspace; + void* ptr; + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ + DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", + (U32)workspaceSize, (U32)neededSize, (U32)(workspaceSize < neededSize)); + if (workspaceSize < neededSize) return NULL; + + if (dictLoadMethod == ZSTD_dlm_byCopy) { + memcpy(cdict+1, dict, dictSize); + dict = cdict+1; + ptr = (char*)workspace + sizeof(ZSTD_CDict) + dictSize; + } else { + ptr = cdict+1; + } + cdict->workspace = ptr; + cdict->workspaceSize = HUF_WORKSPACE_SIZE + matchStateSize; + + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, + dict, dictSize, + ZSTD_dlm_byRef, dictContentType, + cParams) )) + return NULL; + + return cdict; +} + +ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) +{ + assert(cdict != NULL); + return cdict->cParams; +} + +/* ZSTD_compressBegin_usingCDict_advanced() : + * cdict must be != NULL */ +size_t ZSTD_compressBegin_usingCDict_advanced( + ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, + ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); + if (cdict==NULL) return ERROR(dictionary_wrong); + { ZSTD_CCtx_params params = cctx->requestedParams; + params.cParams = ZSTD_getCParamsFromCDict(cdict); + /* Increase window log to fit the entire dictionary and source if the + * source size is known. Limit the increase to 19, which is the + * window log for compression level 1 with the largest source size. + */ + if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) { + U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19); + U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1; + params.cParams.windowLog = MAX(params.cParams.windowLog, limitedSrcLog); + } + params.fParams = fParams; + return ZSTD_compressBegin_internal(cctx, + NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, + cdict, + params, pledgedSrcSize, + ZSTDb_not_buffered); + } +} + +/* ZSTD_compressBegin_usingCDict() : + * pledgedSrcSize=0 means "unknown" + * if pledgedSrcSize>0, it will enable contentSizeFlag */ +size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) +{ + ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag); + return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); +} + +size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) +{ + CHECK_F (ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize)); /* will check if cdict != NULL */ + return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); +} + +/*! ZSTD_compress_usingCDict() : + * Compression using a digested Dictionary. + * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. + * Note that compression parameters are decided at CDict creation time + * while frame parameters are hardcoded */ +size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict) +{ + ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); +} + + + +/* ****************************************************************** +* Streaming +********************************************************************/ + +ZSTD_CStream* ZSTD_createCStream(void) +{ + DEBUGLOG(3, "ZSTD_createCStream"); + return ZSTD_createCStream_advanced(ZSTD_defaultCMem); +} + +ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize) +{ + return ZSTD_initStaticCCtx(workspace, workspaceSize); +} + +ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem) +{ /* CStream and CCtx are now same object */ + return ZSTD_createCCtx_advanced(customMem); +} + +size_t ZSTD_freeCStream(ZSTD_CStream* zcs) +{ + return ZSTD_freeCCtx(zcs); /* same object */ +} + + + +/*====== Initialization ======*/ + +size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; } + +size_t ZSTD_CStreamOutSize(void) +{ + return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; +} + +static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx, + const void* const dict, size_t const dictSize, ZSTD_dictContentType_e const dictContentType, + const ZSTD_CDict* const cdict, + ZSTD_CCtx_params const params, unsigned long long const pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_resetCStream_internal"); + /* params are supposed to be fully validated at this point */ + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + + CHECK_F( ZSTD_compressBegin_internal(cctx, + dict, dictSize, dictContentType, ZSTD_dtlm_fast, + cdict, + params, pledgedSrcSize, + ZSTDb_buffered) ); + + cctx->inToCompress = 0; + cctx->inBuffPos = 0; + cctx->inBuffTarget = cctx->blockSize + + (cctx->blockSize == pledgedSrcSize); /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */ + cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0; + cctx->streamStage = zcss_load; + cctx->frameEnded = 0; + return 0; /* ready to go */ +} + +/* ZSTD_resetCStream(): + * pledgedSrcSize == 0 means "unknown" */ +size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize) +{ + ZSTD_CCtx_params params = zcs->requestedParams; + DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (U32)pledgedSrcSize); + if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; + params.fParams.contentSizeFlag = 1; + params.cParams = ZSTD_getCParamsFromCCtxParams(¶ms, pledgedSrcSize, 0); + return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dct_auto, zcs->cdict, params, pledgedSrcSize); +} + +/*! ZSTD_initCStream_internal() : + * Note : for lib/compress only. Used by zstdmt_compress.c. + * Assumption 1 : params are valid + * Assumption 2 : either dict, or cdict, is defined, not both */ +size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_initCStream_internal"); + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + + if (dict && dictSize >= 8) { + DEBUGLOG(4, "loading dictionary of size %u", (U32)dictSize); + if (zcs->staticSize) { /* static CCtx : never uses malloc */ + /* incompatible with internal cdict creation */ + return ERROR(memory_allocation); + } + ZSTD_freeCDict(zcs->cdictLocal); + zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byCopy, ZSTD_dct_auto, + params.cParams, zcs->customMem); + zcs->cdict = zcs->cdictLocal; + if (zcs->cdictLocal == NULL) return ERROR(memory_allocation); + } else { + if (cdict) { + params.cParams = ZSTD_getCParamsFromCDict(cdict); /* cParams are enforced from cdict; it includes windowLog */ + } + ZSTD_freeCDict(zcs->cdictLocal); + zcs->cdictLocal = NULL; + zcs->cdict = cdict; + } + + return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dct_auto, zcs->cdict, params, pledgedSrcSize); +} + +/* ZSTD_initCStream_usingCDict_advanced() : + * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */ +size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); + if (!cdict) return ERROR(dictionary_wrong); /* cannot handle NULL cdict (does not know what to do) */ + { ZSTD_CCtx_params params = zcs->requestedParams; + params.cParams = ZSTD_getCParamsFromCDict(cdict); + params.fParams = fParams; + return ZSTD_initCStream_internal(zcs, + NULL, 0, cdict, + params, pledgedSrcSize); + } +} + +/* note : cdict must outlive compression session */ +size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) +{ + ZSTD_frameParameters const fParams = { 0 /* contentSizeFlag */, 0 /* checksum */, 0 /* hideDictID */ }; + DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); + return ZSTD_initCStream_usingCDict_advanced(zcs, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); /* note : will check that cdict != NULL */ +} + + +/* ZSTD_initCStream_advanced() : + * pledgedSrcSize must be exact. + * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. + * dict is loaded with default parameters ZSTD_dm_auto and ZSTD_dlm_byCopy. */ +size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_initCStream_advanced: pledgedSrcSize=%u, flag=%u", + (U32)pledgedSrcSize, params.fParams.contentSizeFlag); + CHECK_F( ZSTD_checkCParams(params.cParams) ); + if ((pledgedSrcSize==0) && (params.fParams.contentSizeFlag==0)) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* for compatibility with older programs relying on this behavior. Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. This line will be removed in the future. */ + { ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params); + return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL /*cdict*/, cctxParams, pledgedSrcSize); + } +} + +size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize); + ZSTD_CCtx_params const cctxParams = + ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params); + return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN); +} + +size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss) +{ + U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; /* temporary : 0 interpreted as "unknown" during transition period. Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. `0` will be interpreted as "empty" in the future */ + ZSTD_parameters const params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0); + ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params); + return ZSTD_initCStream_internal(zcs, NULL, 0, NULL, cctxParams, pledgedSrcSize); +} + +size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) +{ + DEBUGLOG(4, "ZSTD_initCStream"); + return ZSTD_initCStream_srcSize(zcs, compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN); +} + +/*====== Compression ======*/ + +MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t const length = MIN(dstCapacity, srcSize); + if (length) memcpy(dst, src, length); + return length; +} + +/** ZSTD_compressStream_generic(): + * internal function for all *compressStream*() variants and *compress_generic() + * non-static, because can be called from zstdmt_compress.c + * @return : hint size for next input */ +size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective const flushMode) +{ + const char* const istart = (const char*)input->src; + const char* const iend = istart + input->size; + const char* ip = istart + input->pos; + char* const ostart = (char*)output->dst; + char* const oend = ostart + output->size; + char* op = ostart + output->pos; + U32 someMoreWork = 1; + + /* check expectations */ + DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (U32)flushMode); + assert(zcs->inBuff != NULL); + assert(zcs->inBuffSize > 0); + assert(zcs->outBuff != NULL); + assert(zcs->outBuffSize > 0); + assert(output->pos <= output->size); + assert(input->pos <= input->size); + + while (someMoreWork) { + switch(zcs->streamStage) + { + case zcss_init: + /* call ZSTD_initCStream() first ! */ + return ERROR(init_missing); + + case zcss_load: + if ( (flushMode == ZSTD_e_end) + && ((size_t)(oend-op) >= ZSTD_compressBound(iend-ip)) /* enough dstCapacity */ + && (zcs->inBuffPos == 0) ) { + /* shortcut to compression pass directly into output buffer */ + size_t const cSize = ZSTD_compressEnd(zcs, + op, oend-op, ip, iend-ip); + DEBUGLOG(4, "ZSTD_compressEnd : %u", (U32)cSize); + if (ZSTD_isError(cSize)) return cSize; + ip = iend; + op += cSize; + zcs->frameEnded = 1; + ZSTD_CCtx_reset(zcs); + someMoreWork = 0; break; + } + /* complete loading into inBuffer */ + { size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; + size_t const loaded = ZSTD_limitCopy( + zcs->inBuff + zcs->inBuffPos, toLoad, + ip, iend-ip); + zcs->inBuffPos += loaded; + ip += loaded; + if ( (flushMode == ZSTD_e_continue) + && (zcs->inBuffPos < zcs->inBuffTarget) ) { + /* not enough input to fill full block : stop here */ + someMoreWork = 0; break; + } + if ( (flushMode == ZSTD_e_flush) + && (zcs->inBuffPos == zcs->inToCompress) ) { + /* empty */ + someMoreWork = 0; break; + } + } + /* compress current block (note : this stage cannot be stopped in the middle) */ + DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode); + { void* cDst; + size_t cSize; + size_t const iSize = zcs->inBuffPos - zcs->inToCompress; + size_t oSize = oend-op; + unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend); + if (oSize >= ZSTD_compressBound(iSize)) + cDst = op; /* compress into output buffer, to skip flush stage */ + else + cDst = zcs->outBuff, oSize = zcs->outBuffSize; + cSize = lastBlock ? + ZSTD_compressEnd(zcs, cDst, oSize, + zcs->inBuff + zcs->inToCompress, iSize) : + ZSTD_compressContinue(zcs, cDst, oSize, + zcs->inBuff + zcs->inToCompress, iSize); + if (ZSTD_isError(cSize)) return cSize; + zcs->frameEnded = lastBlock; + /* prepare next block */ + zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; + if (zcs->inBuffTarget > zcs->inBuffSize) + zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; + DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u", + (U32)zcs->inBuffTarget, (U32)zcs->inBuffSize); + if (!lastBlock) + assert(zcs->inBuffTarget <= zcs->inBuffSize); + zcs->inToCompress = zcs->inBuffPos; + if (cDst == op) { /* no need to flush */ + op += cSize; + if (zcs->frameEnded) { + DEBUGLOG(5, "Frame completed directly in outBuffer"); + someMoreWork = 0; + ZSTD_CCtx_reset(zcs); + } + break; + } + zcs->outBuffContentSize = cSize; + zcs->outBuffFlushedSize = 0; + zcs->streamStage = zcss_flush; /* pass-through to flush stage */ + } + /* fall-through */ + case zcss_flush: + DEBUGLOG(5, "flush stage"); + { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; + size_t const flushed = ZSTD_limitCopy(op, oend-op, + zcs->outBuff + zcs->outBuffFlushedSize, toFlush); + DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", + (U32)toFlush, (U32)(oend-op), (U32)flushed); + op += flushed; + zcs->outBuffFlushedSize += flushed; + if (toFlush!=flushed) { + /* flush not fully completed, presumably because dst is too small */ + assert(op==oend); + someMoreWork = 0; + break; + } + zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; + if (zcs->frameEnded) { + DEBUGLOG(5, "Frame completed on flush"); + someMoreWork = 0; + ZSTD_CCtx_reset(zcs); + break; + } + zcs->streamStage = zcss_load; + break; + } + + default: /* impossible */ + assert(0); + } + } + + input->pos = ip - istart; + output->pos = op - ostart; + if (zcs->frameEnded) return 0; + { size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos; + if (hintInSize==0) hintInSize = zcs->blockSize; + return hintInSize; + } +} + +size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + /* check conditions */ + if (output->pos > output->size) return ERROR(GENERIC); + if (input->pos > input->size) return ERROR(GENERIC); + + return ZSTD_compressStream_generic(zcs, output, input, ZSTD_e_continue); +} + + +size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp) +{ + DEBUGLOG(5, "ZSTD_compress_generic, endOp=%u ", (U32)endOp); + /* check conditions */ + if (output->pos > output->size) return ERROR(GENERIC); + if (input->pos > input->size) return ERROR(GENERIC); + assert(cctx!=NULL); + + /* transparent initialization stage */ + if (cctx->streamStage == zcss_init) { + ZSTD_CCtx_params params = cctx->requestedParams; + ZSTD_prefixDict const prefixDict = cctx->prefixDict; + memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ + assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ + DEBUGLOG(4, "ZSTD_compress_generic : transparent init stage"); + if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1; /* auto-fix pledgedSrcSize */ + params.cParams = ZSTD_getCParamsFromCCtxParams( + &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/); + + +#ifdef ZSTD_MULTITHREAD + if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) { + params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */ + } + if (params.nbWorkers > 0) { + /* mt context creation */ + if (cctx->mtctx == NULL) { + DEBUGLOG(4, "ZSTD_compress_generic: creating new mtctx for nbWorkers=%u", + params.nbWorkers); + cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbWorkers, cctx->customMem); + if (cctx->mtctx == NULL) return ERROR(memory_allocation); + } + /* mt compression */ + DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers); + CHECK_F( ZSTDMT_initCStream_internal( + cctx->mtctx, + prefixDict.dict, prefixDict.dictSize, ZSTD_dct_rawContent, + cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) ); + cctx->streamStage = zcss_load; + cctx->appliedParams.nbWorkers = params.nbWorkers; + } else +#endif + { CHECK_F( ZSTD_resetCStream_internal(cctx, + prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, + cctx->cdict, + params, cctx->pledgedSrcSizePlusOne-1) ); + assert(cctx->streamStage == zcss_load); + assert(cctx->appliedParams.nbWorkers == 0); + } } + + /* compression stage */ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers > 0) { + if (cctx->cParamsChanged) { + ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams); + cctx->cParamsChanged = 0; + } + { size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); + if ( ZSTD_isError(flushMin) + || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ + ZSTD_CCtx_reset(cctx); + } + return flushMin; + } } +#endif + CHECK_F( ZSTD_compressStream_generic(cctx, output, input, endOp) ); + DEBUGLOG(5, "completed ZSTD_compress_generic"); + return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ +} + +size_t ZSTD_compress_generic_simpleArgs ( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos, + ZSTD_EndDirective endOp) +{ + ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; + ZSTD_inBuffer input = { src, srcSize, *srcPos }; + /* ZSTD_compress_generic() will check validity of dstPos and srcPos */ + size_t const cErr = ZSTD_compress_generic(cctx, &output, &input, endOp); + *dstPos = output.pos; + *srcPos = input.pos; + return cErr; +} + + +/*====== Finalize ======*/ + +/*! ZSTD_flushStream() : + * @return : amount of data remaining to flush */ +size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) +{ + ZSTD_inBuffer input = { NULL, 0, 0 }; + if (output->pos > output->size) return ERROR(GENERIC); + CHECK_F( ZSTD_compressStream_generic(zcs, output, &input, ZSTD_e_flush) ); + return zcs->outBuffContentSize - zcs->outBuffFlushedSize; /* remaining to flush */ +} + + +size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) +{ + ZSTD_inBuffer input = { NULL, 0, 0 }; + if (output->pos > output->size) return ERROR(GENERIC); + CHECK_F( ZSTD_compressStream_generic(zcs, output, &input, ZSTD_e_end) ); + { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; + size_t const checksumSize = zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4; + size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize + lastBlockSize + checksumSize; + DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (U32)toFlush); + return toFlush; + } +} + + +/*-===== Pre-defined compression levels =====-*/ + +#define ZSTD_MAX_CLEVEL 22 +int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } + +static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { +{ /* "default" - guarantees a monotonically increasing memory budget */ + /* W, C, H, S, L, TL, strat */ + { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ + { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ + { 19, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */ + { 20, 16, 17, 1, 5, 1, ZSTD_dfast }, /* level 3 */ + { 20, 18, 18, 1, 5, 1, ZSTD_dfast }, /* level 4 */ + { 20, 18, 18, 2, 5, 2, ZSTD_greedy }, /* level 5 */ + { 21, 18, 19, 2, 5, 4, ZSTD_lazy }, /* level 6 */ + { 21, 18, 19, 3, 5, 8, ZSTD_lazy2 }, /* level 7 */ + { 21, 19, 19, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ + { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ + { 21, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ + { 21, 21, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ + { 22, 20, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ + { 22, 21, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */ + { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */ + { 22, 22, 22, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */ + { 22, 21, 22, 4, 5, 48, ZSTD_btopt }, /* level 16 */ + { 23, 22, 22, 4, 4, 64, ZSTD_btopt }, /* level 17 */ + { 23, 23, 22, 6, 3,256, ZSTD_btopt }, /* level 18 */ + { 23, 24, 22, 7, 3,256, ZSTD_btultra }, /* level 19 */ + { 25, 25, 23, 7, 3,256, ZSTD_btultra }, /* level 20 */ + { 26, 26, 24, 7, 3,512, ZSTD_btultra }, /* level 21 */ + { 27, 27, 25, 9, 3,999, ZSTD_btultra }, /* level 22 */ +}, +{ /* for srcSize <= 256 KB */ + /* W, C, H, S, L, T, strat */ + { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */ + { 18, 14, 14, 1, 5, 1, ZSTD_dfast }, /* level 2 */ + { 18, 16, 16, 1, 4, 1, ZSTD_dfast }, /* level 3 */ + { 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/ + { 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/ + { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ + { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */ + { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 18, 18, 19, 5, 4, 16, ZSTD_btlazy2 }, /* level 11.*/ + { 18, 19, 19, 6, 4, 16, ZSTD_btlazy2 }, /* level 12.*/ + { 18, 19, 19, 8, 4, 16, ZSTD_btlazy2 }, /* level 13 */ + { 18, 18, 19, 4, 4, 24, ZSTD_btopt }, /* level 14.*/ + { 18, 18, 19, 4, 3, 24, ZSTD_btopt }, /* level 15.*/ + { 18, 19, 19, 6, 3, 64, ZSTD_btopt }, /* level 16.*/ + { 18, 19, 19, 8, 3,128, ZSTD_btopt }, /* level 17.*/ + { 18, 19, 19, 10, 3,256, ZSTD_btopt }, /* level 18.*/ + { 18, 19, 19, 10, 3,256, ZSTD_btultra }, /* level 19.*/ + { 18, 19, 19, 11, 3,512, ZSTD_btultra }, /* level 20.*/ + { 18, 19, 19, 12, 3,512, ZSTD_btultra }, /* level 21.*/ + { 18, 19, 19, 13, 3,999, ZSTD_btultra }, /* level 22.*/ +}, +{ /* for srcSize <= 128 KB */ + /* W, C, H, S, L, T, strat */ + { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */ + { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */ + { 17, 15, 16, 2, 5, 1, ZSTD_dfast }, /* level 3 */ + { 17, 17, 17, 2, 4, 1, ZSTD_dfast }, /* level 4 */ + { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ + { 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ + { 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 17, 17, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 11 */ + { 17, 18, 17, 6, 4, 16, ZSTD_btlazy2 }, /* level 12 */ + { 17, 18, 17, 8, 4, 16, ZSTD_btlazy2 }, /* level 13.*/ + { 17, 18, 17, 4, 4, 32, ZSTD_btopt }, /* level 14.*/ + { 17, 18, 17, 6, 3, 64, ZSTD_btopt }, /* level 15.*/ + { 17, 18, 17, 7, 3,128, ZSTD_btopt }, /* level 16.*/ + { 17, 18, 17, 7, 3,256, ZSTD_btopt }, /* level 17.*/ + { 17, 18, 17, 8, 3,256, ZSTD_btopt }, /* level 18.*/ + { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 19.*/ + { 17, 18, 17, 9, 3,256, ZSTD_btultra }, /* level 20.*/ + { 17, 18, 17, 10, 3,256, ZSTD_btultra }, /* level 21.*/ + { 17, 18, 17, 11, 3,512, ZSTD_btultra }, /* level 22.*/ +}, +{ /* for srcSize <= 16 KB */ + /* W, C, H, S, L, T, strat */ + { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */ + { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */ + { 14, 14, 14, 2, 4, 1, ZSTD_dfast }, /* level 3.*/ + { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4.*/ + { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/ + { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ + { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/ + { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/ + { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/ + { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/ + { 14, 15, 14, 6, 3, 16, ZSTD_btopt }, /* level 12.*/ + { 14, 15, 14, 6, 3, 24, ZSTD_btopt }, /* level 13.*/ + { 14, 15, 15, 6, 3, 48, ZSTD_btopt }, /* level 14.*/ + { 14, 15, 15, 6, 3, 64, ZSTD_btopt }, /* level 15.*/ + { 14, 15, 15, 6, 3, 96, ZSTD_btopt }, /* level 16.*/ + { 14, 15, 15, 6, 3,128, ZSTD_btopt }, /* level 17.*/ + { 14, 15, 15, 8, 3,256, ZSTD_btopt }, /* level 18.*/ + { 14, 15, 15, 6, 3,256, ZSTD_btultra }, /* level 19.*/ + { 14, 15, 15, 8, 3,256, ZSTD_btultra }, /* level 20.*/ + { 14, 15, 15, 9, 3,256, ZSTD_btultra }, /* level 21.*/ + { 14, 15, 15, 10, 3,512, ZSTD_btultra }, /* level 22.*/ +}, +}; + +/*! ZSTD_getCParams() : +* @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. +* Size values are optional, provide 0 if not known or unused */ +ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) +{ + size_t const addedSize = srcSizeHint ? 0 : 500; + U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : (U64)-1; + U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */ + int row = compressionLevel; + DEBUGLOG(5, "ZSTD_getCParams (cLevel=%i)", compressionLevel); + if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ + if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */ + if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; + { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; + if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */ + return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); } + +} + +/*! ZSTD_getParams() : +* same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object (instead of `ZSTD_compressionParameters`). +* All fields of `ZSTD_frameParameters` are set to default (0) */ +ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { + ZSTD_parameters params; + ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize); + DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); + memset(¶ms, 0, sizeof(params)); + params.cParams = cParams; + params.fParams.contentSizeFlag = 1; + return params; +} diff --git a/deps/SZ/zstd/compress/zstd_compress_internal.h b/deps/SZ/zstd/compress/zstd_compress_internal.h new file mode 100644 index 0000000000000000000000000000000000000000..d31542c69b62d4dabe75aa3a3d0589a7b22281fe --- /dev/null +++ b/deps/SZ/zstd/compress/zstd_compress_internal.h @@ -0,0 +1,795 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* This header contains definitions + * that shall **only** be used by modules within lib/compress. + */ + +#ifndef ZSTD_COMPRESS_H +#define ZSTD_COMPRESS_H + +/*-************************************* +* Dependencies +***************************************/ +#include "zstd_internal.h" +#ifdef ZSTD_MULTITHREAD +# include "zstdmt_compress.h" +#endif + +#if defined (__cplusplus) +extern "C" { +#endif + + +/*-************************************* +* Constants +***************************************/ +#define kSearchStrength 8 +#define HASH_READ_SIZE 8 +#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted". + It could be confused for a real successor at index "1", if sorted as larger than its predecessor. + It's not a big deal though : candidate will just be sorted again. + Additionnally, candidate position 1 will be lost. + But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. + The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy + Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ + + +/*-************************************* +* Context memory management +***************************************/ +typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; +typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage; + +typedef enum { + ZSTD_dictDefaultAttach = 0, + ZSTD_dictForceAttach = 1, + ZSTD_dictForceCopy = -1, +} ZSTD_dictAttachPref_e; + +typedef struct ZSTD_prefixDict_s { + const void* dict; + size_t dictSize; + ZSTD_dictContentType_e dictContentType; +} ZSTD_prefixDict; + +typedef struct { + U32 CTable[HUF_CTABLE_SIZE_U32(255)]; + HUF_repeat repeatMode; +} ZSTD_hufCTables_t; + +typedef struct { + FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; + FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; + FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; + FSE_repeat offcode_repeatMode; + FSE_repeat matchlength_repeatMode; + FSE_repeat litlength_repeatMode; +} ZSTD_fseCTables_t; + +typedef struct { + ZSTD_hufCTables_t huf; + ZSTD_fseCTables_t fse; +} ZSTD_entropyCTables_t; + +typedef struct { + U32 off; + U32 len; +} ZSTD_match_t; + +typedef struct { + int price; + U32 off; + U32 mlen; + U32 litlen; + U32 rep[ZSTD_REP_NUM]; +} ZSTD_optimal_t; + +typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; + +typedef struct { + /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ + U32* litFreq; /* table of literals statistics, of size 256 */ + U32* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ + U32* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ + U32* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ + ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ + ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ + + U32 litSum; /* nb of literals */ + U32 litLengthSum; /* nb of litLength codes */ + U32 matchLengthSum; /* nb of matchLength codes */ + U32 offCodeSum; /* nb of offset codes */ + U32 litSumBasePrice; /* to compare to log2(litfreq) */ + U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */ + U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */ + U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ + ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ + const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ +} optState_t; + +typedef struct { + ZSTD_entropyCTables_t entropy; + U32 rep[ZSTD_REP_NUM]; +} ZSTD_compressedBlockState_t; + +typedef struct { + BYTE const* nextSrc; /* next block here to continue on current prefix */ + BYTE const* base; /* All regular indexes relative to this position */ + BYTE const* dictBase; /* extDict indexes relative to this position */ + U32 dictLimit; /* below that point, need extDict */ + U32 lowLimit; /* below that point, no more data */ +} ZSTD_window_t; + +typedef struct ZSTD_matchState_t ZSTD_matchState_t; +struct ZSTD_matchState_t { + ZSTD_window_t window; /* State for window round buffer management */ + U32 loadedDictEnd; /* index of end of dictionary */ + U32 nextToUpdate; /* index from which to continue table update */ + U32 nextToUpdate3; /* index from which to continue table update */ + U32 hashLog3; /* dispatch table : larger == faster, more memory */ + U32* hashTable; + U32* hashTable3; + U32* chainTable; + optState_t opt; /* optimal parser state */ + const ZSTD_matchState_t *dictMatchState; +}; + +typedef struct { + ZSTD_compressedBlockState_t* prevCBlock; + ZSTD_compressedBlockState_t* nextCBlock; + ZSTD_matchState_t matchState; +} ZSTD_blockState_t; + +typedef struct { + U32 offset; + U32 checksum; +} ldmEntry_t; + +typedef struct { + ZSTD_window_t window; /* State for the window round buffer management */ + ldmEntry_t* hashTable; + BYTE* bucketOffsets; /* Next position in bucket to insert entry */ + U64 hashPower; /* Used to compute the rolling hash. + * Depends on ldmParams.minMatchLength */ +} ldmState_t; + +typedef struct { + U32 enableLdm; /* 1 if enable long distance matching */ + U32 hashLog; /* Log size of hashTable */ + U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ + U32 minMatchLength; /* Minimum match length */ + U32 hashEveryLog; /* Log number of entries to skip */ + U32 windowLog; /* Window log for the LDM */ +} ldmParams_t; + +typedef struct { + U32 offset; + U32 litLength; + U32 matchLength; +} rawSeq; + +typedef struct { + rawSeq* seq; /* The start of the sequences */ + size_t pos; /* The position where reading stopped. <= size. */ + size_t size; /* The number of sequences. <= capacity. */ + size_t capacity; /* The capacity starting from `seq` pointer */ +} rawSeqStore_t; + +struct ZSTD_CCtx_params_s { + ZSTD_format_e format; + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; + + int compressionLevel; + int forceWindow; /* force back-references to respect limit of + * 1< 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; +} + +/* ZSTD_MLcode() : + * note : mlBase = matchLength - MINMATCH; + * because it's the format it's stored in seqStore->sequences */ +MEM_STATIC U32 ZSTD_MLcode(U32 mlBase) +{ + static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, + 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; + static const U32 ML_deltaCode = 36; + return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; +} + +/*! ZSTD_storeSeq() : + * Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. + * `offsetCode` : distance to match + 3 (values 1-3 are repCodes). + * `mlBase` : matchLength - MINMATCH +*/ +MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase) +{ +#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) + static const BYTE* g_start = NULL; + if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ + { U32 const pos = (U32)((const BYTE*)literals - g_start); + DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", + pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode); + } +#endif + /* copy Literals */ + assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + 128 KB); + ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); + seqStorePtr->lit += litLength; + + /* literal Length */ + if (litLength>0xFFFF) { + assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ + seqStorePtr->longLengthID = 1; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].litLength = (U16)litLength; + + /* match offset */ + seqStorePtr->sequences[0].offset = offsetCode + 1; + + /* match Length */ + if (mlBase>0xFFFF) { + assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ + seqStorePtr->longLengthID = 2; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].matchLength = (U16)mlBase; + + seqStorePtr->sequences++; +} + + +/*-************************************* +* Match length counter +***************************************/ +static unsigned ZSTD_NbCommonBytes (size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanForward64( &r, (U64)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, + 0, 3, 1, 3, 1, 4, 2, 7, + 0, 2, 3, 6, 1, 5, 3, 5, + 1, 3, 4, 4, 2, 5, 6, 7, + 7, 0, 1, 2, 3, 3, 4, 6, + 2, 6, 5, 5, 3, 4, 5, 6, + 7, 1, 2, 4, 6, 4, 4, 5, + 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r=0; + _BitScanForward( &r, (U32)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, + 3, 2, 2, 1, 3, 2, 0, 1, + 3, 3, 1, 2, 2, 2, 2, 0, + 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanReverse64( &r, val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r = 0; + _BitScanReverse( &r, (unsigned long)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } } +} + + +MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) +{ + const BYTE* const pStart = pIn; + const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); + + if (pIn < pInLoopLimit) { + { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (diff) return ZSTD_NbCommonBytes(diff); } + pIn+=sizeof(size_t); pMatch+=sizeof(size_t); + while (pIn < pInLoopLimit) { + size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } + pIn += ZSTD_NbCommonBytes(diff); + return (size_t)(pIn - pStart); + } } + if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } + if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } + if ((pIn> (32-h) ; } +MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ + +static const U32 prime4bytes = 2654435761U; +static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } +static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } + +static const U64 prime5bytes = 889523592379ULL; +static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } +static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } + +static const U64 prime6bytes = 227718039650203ULL; +static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } +static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } + +static const U64 prime7bytes = 58295818150454627ULL; +static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } +static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } + +static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; +static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } +static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } + +MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) +{ + switch(mls) + { + default: + case 4: return ZSTD_hash4Ptr(p, hBits); + case 5: return ZSTD_hash5Ptr(p, hBits); + case 6: return ZSTD_hash6Ptr(p, hBits); + case 7: return ZSTD_hash7Ptr(p, hBits); + case 8: return ZSTD_hash8Ptr(p, hBits); + } +} + +/*-************************************* +* Round buffer management +***************************************/ +/* Max current allowed */ +#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) +/* Maximum chunk size before overflow correction needs to be called again */ +#define ZSTD_CHUNKSIZE_MAX \ + ( ((U32)-1) /* Maximum ending current index */ \ + - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */ + +/** + * ZSTD_window_clear(): + * Clears the window containing the history by simply setting it to empty. + */ +MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window) +{ + size_t const endT = (size_t)(window->nextSrc - window->base); + U32 const end = (U32)endT; + + window->lowLimit = end; + window->dictLimit = end; +} + +/** + * ZSTD_window_hasExtDict(): + * Returns non-zero if the window has a non-empty extDict. + */ +MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) +{ + return window.lowLimit < window.dictLimit; +} + +/** + * ZSTD_matchState_dictMode(): + * Inspects the provided matchState and figures out what dictMode should be + * passed to the compressor. + */ +MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) +{ + return ZSTD_window_hasExtDict(ms->window) ? + ZSTD_extDict : + ms->dictMatchState != NULL ? + ZSTD_dictMatchState : + ZSTD_noDict; +} + +/** + * ZSTD_window_needOverflowCorrection(): + * Returns non-zero if the indices are getting too large and need overflow + * protection. + */ +MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, + void const* srcEnd) +{ + U32 const current = (U32)((BYTE const*)srcEnd - window.base); + return current > ZSTD_CURRENT_MAX; +} + +/** + * ZSTD_window_correctOverflow(): + * Reduces the indices to protect from index overflow. + * Returns the correction made to the indices, which must be applied to every + * stored index. + * + * The least significant cycleLog bits of the indices must remain the same, + * which may be 0. Every index up to maxDist in the past must be valid. + * NOTE: (maxDist & cycleMask) must be zero. + */ +MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, + U32 maxDist, void const* src) +{ + /* preemptive overflow correction: + * 1. correction is large enough: + * lowLimit > (3<<29) ==> current > 3<<29 + 1< (3<<29 + 1< (3<<29) - (1< (3<<29) - (1<<30) (NOTE: chainLog <= 30) + * > 1<<29 + * + * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow: + * After correction, current is less than (1<base < 1<<32. + * 3. (cctx->lowLimit + 1< 3<<29 + 1<base); + U32 const newCurrent = (current & cycleMask) + maxDist; + U32 const correction = current - newCurrent; + assert((maxDist & cycleMask) == 0); + assert(current > newCurrent); + /* Loose bound, should be around 1<<29 (see above) */ + assert(correction > 1<<28); + + window->base += correction; + window->dictBase += correction; + window->lowLimit -= correction; + window->dictLimit -= correction; + + DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction, + window->lowLimit); + return correction; +} + +/** + * ZSTD_window_enforceMaxDist(): + * Updates lowLimit so that: + * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd + * + * This allows a simple check that index >= lowLimit to see if index is valid. + * This must be called before a block compression call, with srcEnd as the block + * source end. + * + * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit. + * This is because dictionaries are allowed to be referenced as long as the last + * byte of the dictionary is in the window, but once they are out of range, + * they cannot be referenced. If loadedDictEndPtr is NULL, we use + * loadedDictEnd == 0. + * + * In normal dict mode, the dict is between lowLimit and dictLimit. In + * dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary + * is below them. forceWindow and dictMatchState are therefore incompatible. + */ +MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window, + void const* srcEnd, U32 maxDist, + U32* loadedDictEndPtr, + const ZSTD_matchState_t** dictMatchStatePtr) +{ + U32 const current = (U32)((BYTE const*)srcEnd - window->base); + U32 loadedDictEnd = loadedDictEndPtr != NULL ? *loadedDictEndPtr : 0; + DEBUGLOG(5, "ZSTD_window_enforceMaxDist: current=%u, maxDist=%u", current, maxDist); + if (current > maxDist + loadedDictEnd) { + U32 const newLowLimit = current - maxDist; + if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; + if (window->dictLimit < window->lowLimit) { + DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u", + window->dictLimit, window->lowLimit); + window->dictLimit = window->lowLimit; + } + if (loadedDictEndPtr) + *loadedDictEndPtr = 0; + if (dictMatchStatePtr) + *dictMatchStatePtr = NULL; + } +} + +/** + * ZSTD_window_update(): + * Updates the window by appending [src, src + srcSize) to the window. + * If it is not contiguous, the current prefix becomes the extDict, and we + * forget about the extDict. Handles overlap of the prefix and extDict. + * Returns non-zero if the segment is contiguous. + */ +MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, + void const* src, size_t srcSize) +{ + BYTE const* const ip = (BYTE const*)src; + U32 contiguous = 1; + DEBUGLOG(5, "ZSTD_window_update"); + /* Check if blocks follow each other */ + if (src != window->nextSrc) { + /* not contiguous */ + size_t const distanceFromBase = (size_t)(window->nextSrc - window->base); + DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit); + window->lowLimit = window->dictLimit; + assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */ + window->dictLimit = (U32)distanceFromBase; + window->dictBase = window->base; + window->base = ip - distanceFromBase; + // ms->nextToUpdate = window->dictLimit; + if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */ + contiguous = 0; + } + window->nextSrc = ip + srcSize; + /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ + if ( (ip+srcSize > window->dictBase + window->lowLimit) + & (ip < window->dictBase + window->dictLimit)) { + ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase; + U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; + window->lowLimit = lowLimitMax; + DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit); + } + return contiguous; +} + + +/* debug functions */ + +MEM_STATIC double ZSTD_fWeight(U32 rawStat) +{ + U32 const fp_accuracy = 8; + U32 const fp_multiplier = (1 << fp_accuracy); + U32 const stat = rawStat + 1; + U32 const hb = ZSTD_highbit32(stat); + U32 const BWeight = hb * fp_multiplier; + U32 const FWeight = (stat << fp_accuracy) >> hb; + U32 const weight = BWeight + FWeight; + assert(hb + fp_accuracy < 31); + return (double)weight / fp_multiplier; +} + +MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) +{ + unsigned u, sum; + for (u=0, sum=0; u<=max; u++) sum += table[u]; + DEBUGLOG(2, "total nb elts: %u", sum); + for (u=0; u<=max; u++) { + DEBUGLOG(2, "%2u: %5u (%.2f)", + u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) ); + } +} + +#if defined (__cplusplus) +} +#endif + + +/* ============================================================== + * Private declarations + * These prototypes shall only be called from within lib/compress + * ============================================================== */ + +/* ZSTD_getCParamsFromCCtxParams() : + * cParams are built depending on compressionLevel, src size hints, + * LDM and manually set compression parameters. + */ +ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize); + +/*! ZSTD_initCStream_internal() : + * Private use only. Init streaming operation. + * expects params to be valid. + * must receive dict, or cdict, or none, but not both. + * @return : 0, or an error code */ +size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, unsigned long long pledgedSrcSize); + +void ZSTD_resetSeqStore(seqStore_t* ssPtr); + +/*! ZSTD_compressStream_generic() : + * Private use only. To be called from zstdmt_compress.c in single-thread mode. */ +size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective const flushMode); + +/*! ZSTD_getCParamsFromCDict() : + * as the name implies */ +ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); + +/* ZSTD_compressBegin_advanced_internal() : + * Private use only. To be called from zstdmt_compress.c. */ +size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + unsigned long long pledgedSrcSize); + +/* ZSTD_compress_advanced_internal() : + * Private use only. To be called from zstdmt_compress.c. */ +size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_CCtx_params params); + + +/* ZSTD_writeLastEmptyBlock() : + * output an empty Block with end-of-frame mark to complete a frame + * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) + * or an error code if `dstCapcity` is too small (hashTable; + U32 const hBitsL = cParams->hashLog; + U32 const mls = cParams->searchLength; + U32* const hashSmall = ms->chainTable; + U32 const hBitsS = cParams->chainLog; + const BYTE* const base = ms->window.base; + const BYTE* ip = base + ms->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const U32 fastHashFillStep = 3; + + /* Always insert every fastHashFillStep position into the hash tables. + * Insert the other positions into the large hash table if their entry + * is empty. + */ + for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { + U32 const current = (U32)(ip - base); + U32 i; + for (i = 0; i < fastHashFillStep; ++i) { + size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls); + size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8); + if (i == 0) + hashSmall[smHash] = current + i; + if (i == 0 || hashLarge[lgHash] == 0) + hashLarge[lgHash] = current + i; + /* Only load extra positions for ZSTD_dtlm_full */ + if (dtlm == ZSTD_dtlm_fast) + break; + } + } +} + + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_doubleFast_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize, + U32 const mls /* template */, ZSTD_dictMode_e const dictMode) +{ + U32* const hashLong = ms->hashTable; + const U32 hBitsL = cParams->hashLog; + U32* const hashSmall = ms->chainTable; + const U32 hBitsS = cParams->chainLog; + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 prefixLowestIndex = ms->window.dictLimit; + const BYTE* const prefixLowest = base + prefixLowestIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ? + dms->hashTable : NULL; + const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ? + dms->chainTable : NULL; + const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ? + dms->window.dictLimit : 0; + const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? + dms->window.base : NULL; + const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ? + dictBase + dictStartIndex : NULL; + const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? + dms->window.nextSrc : NULL; + const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? + prefixLowestIndex - (U32)(dictEnd - dictBase) : + 0; + const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart); + + assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); + + /* init */ + ip += (dictAndPrefixLength == 0); + if (dictMode == ZSTD_noDict) { + U32 const maxRep = (U32)(ip - prefixLowest); + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + if (dictMode == ZSTD_dictMatchState) { + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + } + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + U32 offset; + size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); + size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); + U32 const current = (U32)(ip-base); + U32 const matchIndexL = hashLong[h2]; + U32 matchIndexS = hashSmall[h]; + const BYTE* matchLong = base + matchIndexL; + const BYTE* match = base + matchIndexS; + const U32 repIndex = current + 1 - offset_1; + const BYTE* repMatch = (dictMode == ZSTD_dictMatchState + && repIndex < prefixLowestIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + hashLong[h2] = hashSmall[h] = current; /* update hash tables */ + + /* check dictMatchState repcode */ + if (dictMode == ZSTD_dictMatchState + && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + ip++; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + goto _match_stored; + } + + /* check noDict repcode */ + if ( dictMode == ZSTD_noDict + && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { + mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + ip++; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + goto _match_stored; + } + + /* check prefix long match */ + if ( (matchIndexL > prefixLowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) { + mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; + offset = (U32)(ip-matchLong); + while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + goto _match_found; + } + + /* check dictMatchState long match */ + if (dictMode == ZSTD_dictMatchState) { + U32 const dictMatchIndexL = dictHashLong[h2]; + const BYTE* dictMatchL = dictBase + dictMatchIndexL; + assert(dictMatchL < dictEnd); + + if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) { + mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8; + offset = (U32)(current - dictMatchIndexL - dictIndexDelta); + while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ + goto _match_found; + } + } + + /* check prefix short match */ + if ( (matchIndexS > prefixLowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) { + goto _search_next_long; + } + + /* check dictMatchState short match */ + if (dictMode == ZSTD_dictMatchState) { + U32 const dictMatchIndexS = dictHashSmall[h]; + match = dictBase + dictMatchIndexS; + matchIndexS = dictMatchIndexS + dictIndexDelta; + + if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) { + goto _search_next_long; + } + } + + ip += ((ip-anchor) >> kSearchStrength) + 1; + continue; + +_search_next_long: + + { + size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + U32 const matchIndexL3 = hashLong[hl3]; + const BYTE* matchL3 = base + matchIndexL3; + hashLong[hl3] = current + 1; + + /* check prefix long +1 match */ + if ( (matchIndexL3 > prefixLowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1)) ) { + mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; + ip++; + offset = (U32)(ip-matchL3); + while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ + goto _match_found; + } + + /* check dict long +1 match */ + if (dictMode == ZSTD_dictMatchState) { + U32 const dictMatchIndexL3 = dictHashLong[hl3]; + const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; + assert(dictMatchL3 < dictEnd); + if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { + mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8; + ip++; + offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta); + while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ + goto _match_found; + } + } + } + + /* if no long +1 match, explore the short match we found */ + if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { + mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4; + offset = (U32)(current - matchIndexS); + while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } else { + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + offset = (U32)(ip - match); + while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + + /* fall-through */ + +_match_found: + offset_2 = offset_1; + offset_1 = offset; + + ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + +_match_stored: + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = + hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */ + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = + hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); + + /* check immediate repcode */ + if (dictMode == ZSTD_dictMatchState) { + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState + && repIndex2 < prefixLowestIndex ? + dictBase - dictIndexDelta + repIndex2 : + base + repIndex2; + if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } + } + + if (dictMode == ZSTD_noDict) { + while ( (ip <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); + ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } } + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return iend - anchor; +} + + +size_t ZSTD_compressBlock_doubleFast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + const U32 mls = cParams->searchLength; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 4, ZSTD_noDict); + case 5 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 5, ZSTD_noDict); + case 6 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 6, ZSTD_noDict); + case 7 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 7, ZSTD_noDict); + } +} + + +size_t ZSTD_compressBlock_doubleFast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + const U32 mls = cParams->searchLength; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 4, ZSTD_dictMatchState); + case 5 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 5, ZSTD_dictMatchState); + case 6 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 6, ZSTD_dictMatchState); + case 7 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 7, ZSTD_dictMatchState); + } +} + + +static size_t ZSTD_compressBlock_doubleFast_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize, + U32 const mls /* template */) +{ + U32* const hashLong = ms->hashTable; + U32 const hBitsL = cParams->hashLog; + U32* const hashSmall = ms->chainTable; + U32 const hBitsS = cParams->chainLog; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const U32 prefixStartIndex = ms->window.dictLimit; + const BYTE* const base = ms->window.base; + const BYTE* const prefixStart = base + prefixStartIndex; + const U32 dictStartIndex = ms->window.lowLimit; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const dictStart = dictBase + dictStartIndex; + const BYTE* const dictEnd = dictBase + prefixStartIndex; + U32 offset_1=rep[0], offset_2=rep[1]; + + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize); + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); + const U32 matchIndex = hashSmall[hSmall]; + const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + + const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); + const U32 matchLongIndex = hashLong[hLong]; + const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base; + const BYTE* matchLong = matchLongBase + matchLongIndex; + + const U32 current = (U32)(ip-base); + const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */ + const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + size_t mLength; + hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */ + + if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ + & (repIndex > dictStartIndex)) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; + ip++; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + } else { + if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { + const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart; + U32 offset; + mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8; + offset = current - matchLongIndex; + while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + + } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { + size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + U32 const matchIndex3 = hashLong[h3]; + const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base; + const BYTE* match3 = match3Base + matchIndex3; + U32 offset; + hashLong[h3] = current + 1; + if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { + const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart; + mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8; + ip++; + offset = current+1 - matchIndex3; + while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ + } else { + const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; + mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; + offset = current - matchIndex; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + + } else { + ip += ((ip-anchor) >> kSearchStrength) + 1; + continue; + } } + + /* found a match : store it */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; + hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2; + hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ + & (repIndex2 > dictStartIndex)) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return iend - anchor; +} + + +size_t ZSTD_compressBlock_doubleFast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + U32 const mls = cParams->searchLength; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 7); + } +} diff --git a/deps/SZ/zstd/compress/zstd_double_fast.h b/deps/SZ/zstd/compress/zstd_double_fast.h new file mode 100644 index 0000000000000000000000000000000000000000..c475021d29daefcd7e9f826aff28e06c90479cda --- /dev/null +++ b/deps/SZ/zstd/compress/zstd_double_fast.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_DOUBLE_FAST_H +#define ZSTD_DOUBLE_FAST_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include "mem.h" /* U32 */ +#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */ + +void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, + ZSTD_compressionParameters const* cParams, + void const* end, ZSTD_dictTableLoadMethod_e dtlm); +size_t ZSTD_compressBlock_doubleFast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_doubleFast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_doubleFast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_DOUBLE_FAST_H */ diff --git a/deps/SZ/zstd/compress/zstd_fast.c b/deps/SZ/zstd/compress/zstd_fast.c new file mode 100644 index 0000000000000000000000000000000000000000..37a715167c62732fcbf5f4215322bafe89c45ec1 --- /dev/null +++ b/deps/SZ/zstd/compress/zstd_fast.c @@ -0,0 +1,380 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_compress_internal.h" +#include "zstd_fast.h" + + +void ZSTD_fillHashTable(ZSTD_matchState_t* ms, + ZSTD_compressionParameters const* cParams, + void const* end, ZSTD_dictTableLoadMethod_e dtlm) +{ + U32* const hashTable = ms->hashTable; + U32 const hBits = cParams->hashLog; + U32 const mls = cParams->searchLength; + const BYTE* const base = ms->window.base; + const BYTE* ip = base + ms->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const U32 fastHashFillStep = 3; + + /* Always insert every fastHashFillStep position into the hash table. + * Insert the other positions if their hash entry is empty. + */ + for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { + U32 const current = (U32)(ip - base); + U32 i; + for (i = 0; i < fastHashFillStep; ++i) { + size_t const hash = ZSTD_hashPtr(ip + i, hBits, mls); + if (i == 0 || hashTable[hash] == 0) + hashTable[hash] = current + i; + /* Only load extra positions for ZSTD_dtlm_full */ + if (dtlm == ZSTD_dtlm_fast) + break; + } + } +} + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_fast_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const hlog, U32 stepSize, U32 const mls, + ZSTD_dictMode_e const dictMode) +{ + U32* const hashTable = ms->hashTable; + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 prefixStartIndex = ms->window.dictLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32* const dictHashTable = dictMode == ZSTD_dictMatchState ? + dms->hashTable : NULL; + const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ? + dms->window.dictLimit : 0; + const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? + dms->window.base : NULL; + const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ? + dictBase + dictStartIndex : NULL; + const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? + dms->window.nextSrc : NULL; + const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? + prefixStartIndex - (U32)(dictEnd - dictBase) : + 0; + const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); + + assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); + + /* otherwise, we would get index underflow when translating a dict index + * into a local index */ + assert(dictMode != ZSTD_dictMatchState + || prefixStartIndex >= (U32)(dictEnd - dictBase)); + + /* init */ + stepSize += !stepSize; /* support stepSize of 0 */ + ip += (dictAndPrefixLength == 0); + if (dictMode == ZSTD_noDict) { + U32 const maxRep = (U32)(ip - prefixStart); + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + if (dictMode == ZSTD_dictMatchState) { + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + } + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + size_t const h = ZSTD_hashPtr(ip, hlog, mls); + U32 const current = (U32)(ip-base); + U32 const matchIndex = hashTable[h]; + const BYTE* match = base + matchIndex; + const U32 repIndex = current + 1 - offset_1; + const BYTE* repMatch = (dictMode == ZSTD_dictMatchState + && repIndex < prefixStartIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + hashTable[h] = current; /* update hash table */ + + if ( (dictMode == ZSTD_dictMatchState) + && ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; + ip++; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + } else if ( dictMode == ZSTD_noDict + && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { + mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + ip++; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + } else if ( (matchIndex <= prefixStartIndex) + || (MEM_read32(match) != MEM_read32(ip)) ) { + if (dictMode == ZSTD_dictMatchState) { + U32 const dictMatchIndex = dictHashTable[h]; + const BYTE* dictMatch = dictBase + dictMatchIndex; + if (dictMatchIndex <= dictStartIndex || + MEM_read32(dictMatch) != MEM_read32(ip)) { + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } else { + /* found a dict match */ + U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta); + mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; + while (((ip>anchor) & (dictMatch>dictStart)) + && (ip[-1] == dictMatch[-1])) { + ip--; dictMatch--; mLength++; + } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } + } else { + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } + } else { + /* found a regular match */ + U32 const offset = (U32)(ip-match); + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + while (((ip>anchor) & (match>prefixStart)) + && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } + + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); + + /* check immediate repcode */ + if (dictMode == ZSTD_dictMatchState) { + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? + dictBase - dictIndexDelta + repIndex2 : + base + repIndex2; + if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } + } + + if (dictMode == ZSTD_noDict) { + while ( (ip <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base); + ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } } + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return iend - anchor; +} + + +size_t ZSTD_compressBlock_fast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + U32 const hlog = cParams->hashLog; + U32 const mls = cParams->searchLength; + U32 const stepSize = cParams->targetLength; + assert(ms->dictMatchState == NULL); + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4, ZSTD_noDict); + case 5 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5, ZSTD_noDict); + case 6 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6, ZSTD_noDict); + case 7 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7, ZSTD_noDict); + } +} + +size_t ZSTD_compressBlock_fast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + U32 const hlog = cParams->hashLog; + U32 const mls = cParams->searchLength; + U32 const stepSize = cParams->targetLength; + assert(ms->dictMatchState != NULL); + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4, ZSTD_dictMatchState); + case 5 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5, ZSTD_dictMatchState); + case 6 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6, ZSTD_dictMatchState); + case 7 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7, ZSTD_dictMatchState); + } +} + + +static size_t ZSTD_compressBlock_fast_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const hlog, U32 stepSize, U32 const mls) +{ + U32* hashTable = ms->hashTable; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 dictStartIndex = ms->window.lowLimit; + const BYTE* const dictStart = dictBase + dictStartIndex; + const U32 prefixStartIndex = ms->window.dictLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const dictEnd = dictBase + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + U32 offset_1=rep[0], offset_2=rep[1]; + + stepSize += !stepSize; /* support stepSize == 0 */ + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t h = ZSTD_hashPtr(ip, hlog, mls); + const U32 matchIndex = hashTable[h]; + const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + const U32 current = (U32)(ip-base); + const U32 repIndex = current + 1 - offset_1; + const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + size_t mLength; + hashTable[h] = current; /* update hash table */ + assert(offset_1 <= current +1); /* check repIndex */ + + if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; + ip++; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + } else { + if ( (matchIndex < dictStartIndex) || + (MEM_read32(match) != MEM_read32(ip)) ) { + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } + { const BYTE* matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; + U32 offset; + mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset = current - matchIndex; + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } } + + /* found a match : store it */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; + hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */ + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return iend - anchor; +} + + +size_t ZSTD_compressBlock_fast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + U32 const hlog = cParams->hashLog; + U32 const mls = cParams->searchLength; + U32 const stepSize = cParams->targetLength; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4); + case 5 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5); + case 6 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6); + case 7 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7); + } +} diff --git a/deps/SZ/zstd/compress/zstd_fast.h b/deps/SZ/zstd/compress/zstd_fast.h new file mode 100644 index 0000000000000000000000000000000000000000..7e7435f8c60fabee248bf6b685d0f36e8077e0e3 --- /dev/null +++ b/deps/SZ/zstd/compress/zstd_fast.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_FAST_H +#define ZSTD_FAST_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include "mem.h" /* U32 */ +#include "zstd_compress_internal.h" + +void ZSTD_fillHashTable(ZSTD_matchState_t* ms, + ZSTD_compressionParameters const* cParams, + void const* end, ZSTD_dictTableLoadMethod_e dtlm); +size_t ZSTD_compressBlock_fast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_fast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_fast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_FAST_H */ diff --git a/deps/SZ/zstd/compress/zstd_lazy.c b/deps/SZ/zstd/compress/zstd_lazy.c new file mode 100644 index 0000000000000000000000000000000000000000..bfe944928202807aec1fbefc8a1493d1ecb8d097 --- /dev/null +++ b/deps/SZ/zstd/compress/zstd_lazy.c @@ -0,0 +1,1090 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_compress_internal.h" +#include "zstd_lazy.h" + + +/*-************************************* +* Binary Tree search +***************************************/ + +void ZSTD_updateDUBT( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip, const BYTE* iend, + U32 mls) +{ + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + + if (idx != target) + DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)", + idx, target, ms->window.dictLimit); + assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */ + (void)iend; + + assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */ + for ( ; idx < target ; idx++) { + size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */ + U32 const matchIndex = hashTable[h]; + + U32* const nextCandidatePtr = bt + 2*(idx&btMask); + U32* const sortMarkPtr = nextCandidatePtr + 1; + + DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx); + hashTable[h] = idx; /* Update Hash Table */ + *nextCandidatePtr = matchIndex; /* update BT like a chain */ + *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK; + } + ms->nextToUpdate = target; +} + + +/** ZSTD_insertDUBT1() : + * sort one already inserted but unsorted position + * assumption : current >= btlow == (current - btmask) + * doesn't fail */ +static void ZSTD_insertDUBT1( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + U32 current, const BYTE* inputEnd, + U32 nbCompares, U32 btLow, const ZSTD_dictMode_e dictMode) +{ + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current; + const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = smallerPtr + 1; + U32 matchIndex = *smallerPtr; + U32 dummy32; /* to be nullified at the end */ + U32 const windowLow = ms->window.lowLimit; + + DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)", + current, dictLimit, windowLow); + assert(current >= btLow); + assert(ip < iend); /* condition for ZSTD_count */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(matchIndex < current); + + if ( (dictMode != ZSTD_extDict) + || (matchIndex+matchLength >= dictLimit) /* both in current segment*/ + || (current < dictLimit) /* both in extDict */) { + const BYTE* const mBase = ( (dictMode != ZSTD_extDict) + || (matchIndex+matchLength >= dictLimit)) ? + base : dictBase; + assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */ + || (current < dictLimit) ); + match = mBase + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ", + current, matchIndex, (U32)matchLength); + + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ + } + + if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u", + matchIndex, btLow, nextPtr[1]); + smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u", + matchIndex, btLow, nextPtr[0]); + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; +} + + +static size_t ZSTD_DUBT_findBetterDictMatch ( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* const ip, const BYTE* const iend, + size_t* offsetPtr, + size_t bestLength, + U32 nbCompares, + U32 const mls, + const ZSTD_dictMode_e dictMode) { + const ZSTD_matchState_t * const dms = ms->dictMatchState; + const U32 * const dictHashTable = dms->hashTable; + U32 const hashLog = cParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 dictMatchIndex = dictHashTable[h]; + + const BYTE* const base = ms->window.base; + const BYTE* const prefixStart = base + ms->window.dictLimit; + U32 const current = (U32)(ip-base); + const BYTE* const dictBase = dms->window.base; + const BYTE* const dictEnd = dms->window.nextSrc; + U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base); + U32 const dictLowLimit = dms->window.lowLimit; + U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit; + + U32* const dictBt = dms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask; + + size_t commonLengthSmaller=0, commonLengthLarger=0; + U32 matchEndIdx = current+8+1; + + (void)dictMode; + assert(dictMode == ZSTD_dictMatchState); + + while (nbCompares-- && (dictMatchIndex > dictLowLimit)) { + U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match = dictBase + dictMatchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (dictMatchIndex+matchLength >= dictHighLimit) + match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */ + + if (matchLength > bestLength) { + U32 matchIndex = dictMatchIndex + dictIndexDelta; + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) { + DEBUGLOG(9, "ZSTD_DUBT_findBestDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)", + current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex); + bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; + } + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + + if (match[matchLength] < ip[matchLength]) { + if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ + commonLengthLarger = matchLength; + dictMatchIndex = nextPtr[0]; + } + } + + if (bestLength >= MINMATCH) { + U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; + DEBUGLOG(8, "ZSTD_DUBT_findBestDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)", + current, (U32)bestLength, (U32)*offsetPtr, mIndex); + } + return bestLength; + +} + + +static size_t ZSTD_DUBT_findBestMatch ( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* const ip, const BYTE* const iend, + size_t* offsetPtr, + U32 const mls, + const ZSTD_dictMode_e dictMode) +{ + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 matchIndex = hashTable[h]; + + const BYTE* const base = ms->window.base; + U32 const current = (U32)(ip-base); + U32 const windowLow = ms->window.lowLimit; + + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 const btLow = (btMask >= current) ? 0 : current - btMask; + U32 const unsortLimit = MAX(btLow, windowLow); + + U32* nextCandidate = bt + 2*(matchIndex&btMask); + U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1; + U32 nbCompares = 1U << cParams->searchLog; + U32 nbCandidates = nbCompares; + U32 previousCandidate = 0; + + DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current); + assert(ip <= iend-8); /* required for h calculation */ + + /* reach end of unsorted candidates list */ + while ( (matchIndex > unsortLimit) + && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK) + && (nbCandidates > 1) ) { + DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted", + matchIndex); + *unsortedMark = previousCandidate; + previousCandidate = matchIndex; + matchIndex = *nextCandidate; + nextCandidate = bt + 2*(matchIndex&btMask); + unsortedMark = bt + 2*(matchIndex&btMask) + 1; + nbCandidates --; + } + + if ( (matchIndex > unsortLimit) + && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) { + DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u", + matchIndex); + *nextCandidate = *unsortedMark = 0; /* nullify next candidate if it's still unsorted (note : simplification, detrimental to compression ratio, beneficial for speed) */ + } + + /* batch sort stacked candidates */ + matchIndex = previousCandidate; + while (matchIndex) { /* will end on matchIndex == 0 */ + U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1; + U32 const nextCandidateIdx = *nextCandidateIdxPtr; + ZSTD_insertDUBT1(ms, cParams, matchIndex, iend, + nbCandidates, unsortLimit, dictMode); + matchIndex = nextCandidateIdx; + nbCandidates++; + } + + /* find longest match */ + { size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = bt + 2*(current&btMask) + 1; + U32 matchEndIdx = current+8+1; + U32 dummy32; /* to be nullified at the end */ + size_t bestLength = 0; + + matchIndex = hashTable[h]; + hashTable[h] = current; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match; + + if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) { + match = base + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) + bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + + if (match[matchLength] < ip[matchLength]) { + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + + if (dictMode == ZSTD_dictMatchState && nbCompares) { + bestLength = ZSTD_DUBT_findBetterDictMatch(ms, cParams, ip, iend, offsetPtr, bestLength, nbCompares, mls, dictMode); + } + + assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */ + ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ + if (bestLength >= MINMATCH) { + U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; + DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)", + current, (U32)bestLength, (U32)*offsetPtr, mIndex); + } + return bestLength; + } +} + + +/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ +FORCE_INLINE_TEMPLATE size_t ZSTD_BtFindBestMatch ( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls /* template */, + const ZSTD_dictMode_e dictMode) +{ + DEBUGLOG(7, "ZSTD_BtFindBestMatch"); + if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateDUBT(ms, cParams, ip, iLimit, mls); + return ZSTD_DUBT_findBestMatch(ms, cParams, ip, iLimit, offsetPtr, mls, dictMode); +} + + +static size_t ZSTD_BtFindBestMatch_selectMLS ( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(cParams->searchLength) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 4, ZSTD_noDict); + case 5 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 5, ZSTD_noDict); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 6, ZSTD_noDict); + } +} + + +static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS ( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(cParams->searchLength) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); + case 5 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); + } +} + + +static size_t ZSTD_BtFindBestMatch_extDict_selectMLS ( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(cParams->searchLength) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 4, ZSTD_extDict); + case 5 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 5, ZSTD_extDict); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 6, ZSTD_extDict); + } +} + + + +/* ********************************* +* Hash Chain +***********************************/ +#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask] + +/* Update chains up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +static U32 ZSTD_insertAndFindFirstIndex_internal( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip, U32 const mls) +{ + U32* const hashTable = ms->hashTable; + const U32 hashLog = cParams->hashLog; + U32* const chainTable = ms->chainTable; + const U32 chainMask = (1 << cParams->chainLog) - 1; + const BYTE* const base = ms->window.base; + const U32 target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + + while(idx < target) { /* catch up */ + size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); + NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; + hashTable[h] = idx; + idx++; + } + + ms->nextToUpdate = target; + return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; +} + +U32 ZSTD_insertAndFindFirstIndex( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip) +{ + return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, cParams->searchLength); +} + + +/* inlining is important to hardwire a hot branch (template emulation) */ +FORCE_INLINE_TEMPLATE +size_t ZSTD_HcFindBestMatch_generic ( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls, const ZSTD_dictMode_e dictMode) +{ + U32* const chainTable = ms->chainTable; + const U32 chainSize = (1 << cParams->chainLog); + const U32 chainMask = chainSize-1; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const U32 lowLimit = ms->window.lowLimit; + const U32 current = (U32)(ip-base); + const U32 minChain = current > chainSize ? current - chainSize : 0; + U32 nbAttempts = 1U << cParams->searchLog; + size_t ml=4-1; + + /* HC4 match finder */ + U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); + + for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) { + size_t currentMl=0; + if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { + const BYTE* const match = base + matchIndex; + if (match[ml] == ip[ml]) /* potentially better */ + currentMl = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex; + assert(match+4 <= dictEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + + if (matchIndex <= minChain) break; + matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); + } + + if (dictMode == ZSTD_dictMatchState) { + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32* const dmsChainTable = dms->chainTable; + const U32 dmsLowestIndex = dms->window.dictLimit; + const BYTE* const dmsBase = dms->window.base; + const BYTE* const dmsEnd = dms->window.nextSrc; + const U32 dmsSize = (U32)(dmsEnd - dmsBase); + const U32 dmsIndexDelta = dictLimit - dmsSize; + const U32 dmsMinChain = dmsSize > chainSize ? dmsSize - chainSize : 0; + + matchIndex = dms->hashTable[ZSTD_hashPtr(ip, cParams->hashLog, mls)]; + + for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) { + size_t currentMl=0; + const BYTE* const match = dmsBase + matchIndex; + assert(match+4 <= dmsEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + + if (matchIndex <= dmsMinChain) break; + matchIndex = dmsChainTable[matchIndex & chainMask]; + } + } + + return ml; +} + + +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS ( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(cParams->searchLength) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 4, ZSTD_noDict); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 5, ZSTD_noDict); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 6, ZSTD_noDict); + } +} + + +static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS ( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(cParams->searchLength) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); + } +} + + +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(cParams->searchLength) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 4, ZSTD_extDict); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 5, ZSTD_extDict); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 6, ZSTD_extDict); + } +} + + +/* ******************************* +* Common parser - lazy strategy +*********************************/ +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_lazy_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, + const void* src, size_t srcSize, + const U32 searchMethod, const U32 depth, + ZSTD_dictMode_e const dictMode) +{ + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const U32 prefixLowestIndex = ms->window.dictLimit; + const BYTE* const prefixLowest = base + prefixLowestIndex; + + typedef size_t (*searchMax_f)( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); + searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ? + (searchMethod ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) : + (searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS); + U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; + + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ? + dms->window.dictLimit : 0; + const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? + dms->window.base : NULL; + const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ? + dictBase + dictLowestIndex : NULL; + const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? + dms->window.nextSrc : NULL; + const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? + prefixLowestIndex - (U32)(dictEnd - dictBase) : + 0; + const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest); + + (void)dictMode; + + /* init */ + ip += (dictAndPrefixLength == 0); + ms->nextToUpdate3 = ms->nextToUpdate; + if (dictMode == ZSTD_noDict) { + U32 const maxRep = (U32)(ip - prefixLowest); + if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; + if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; + } + if (dictMode == ZSTD_dictMatchState) { + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + } + + /* Match Loop */ + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + + /* check repCode */ + if (dictMode == ZSTD_dictMatchState) { + const U32 repIndex = (U32)(ip - base) + 1 - offset_1; + const BYTE* repMatch = (dictMode == ZSTD_dictMatchState + && repIndex < prefixLowestIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + if (depth==0) goto _storeSequence; + } + } + if ( dictMode == ZSTD_noDict + && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { + matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + if (depth==0) goto _storeSequence; + } + + /* first search (depth 0) */ + { size_t offsetFound = 99999999; + size_t const ml2 = searchMax(ms, cParams, ip, iend, &offsetFound); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < 4) { + ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; + int const gain2 = (int)(mlRep * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + if (dictMode == ZSTD_dictMatchState) { + const U32 repIndex = (U32)(ip - base) - offset_1; + const BYTE* repMatch = repIndex < prefixLowestIndex ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + int const gain2 = (int)(mlRep * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + } + { size_t offset2=99999999; + size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; + int const gain2 = (int)(mlRep * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + if (dictMode == ZSTD_dictMatchState) { + const U32 repIndex = (U32)(ip - base) - offset_1; + const BYTE* repMatch = repIndex < prefixLowestIndex ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + int const gain2 = (int)(mlRep * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + } + { size_t offset2=99999999; + size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* NOTE: + * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. + * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which + * overflows the pointer, which is undefined behavior. + */ + /* catch up */ + if (offset) { + if (dictMode == ZSTD_noDict) { + while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest)) + && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */ + { start--; matchLength++; } + } + if (dictMode == ZSTD_dictMatchState) { + U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex; + const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest; + while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ + } + offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + } + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + if (dictMode == ZSTD_dictMatchState) { + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex = current2 - offset_2; + const BYTE* repMatch = dictMode == ZSTD_dictMatchState + && repIndex < prefixLowestIndex ? + dictBase - dictIndexDelta + repIndex : + base + repIndex; + if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; + } + break; + } + } + + if (dictMode == ZSTD_noDict) { + while ( ((ip <= ilimit) & (offset_2>0)) + && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { + /* store sequence */ + matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ + ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } + + /* Save reps for next block */ + rep[0] = offset_1 ? offset_1 : savedOffset; + rep[1] = offset_2 ? offset_2 : savedOffset; + + /* Return the last literals size */ + return iend - anchor; +} + + +size_t ZSTD_compressBlock_btlazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 1, 2, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 2, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 1, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_greedy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 0, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_btlazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 1, 2, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_lazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 2, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_lazy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 1, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_greedy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 0, ZSTD_dictMatchState); +} + + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_lazy_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, + const void* src, size_t srcSize, + const U32 searchMethod, const U32 depth) +{ + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const U32 dictLimit = ms->window.dictLimit; + const U32 lowestIndex = ms->window.lowLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const dictStart = dictBase + lowestIndex; + + typedef size_t (*searchMax_f)( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); + searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; + + U32 offset_1 = rep[0], offset_2 = rep[1]; + + /* init */ + ms->nextToUpdate3 = ms->nextToUpdate; + ip += (ip == prefixStart); + + /* Match Loop */ + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + U32 current = (U32)(ip-base); + + /* check repCode */ + { const U32 repIndex = (U32)(current+1 - offset_1); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (MEM_read32(ip+1) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4; + if (depth==0) goto _storeSequence; + } } + + /* first search (depth 0) */ + { size_t offsetFound = 99999999; + size_t const ml2 = searchMax(ms, cParams, ip, iend, &offsetFound); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < 4) { + ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + int const gain2 = (int)(repLength * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((repLength >= 4) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 1 */ + { size_t offset2=99999999; + size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + int const gain2 = (int)(repLength * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((repLength >= 4) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 2 */ + { size_t offset2=99999999; + size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* catch up */ + if (offset) { + U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; + const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; + while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ + offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + } + + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + while (ip <= ilimit) { + const U32 repIndex = (U32)((ip-base) - offset_2); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ + ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } + break; + } } + + /* Save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return iend - anchor; +} + + +size_t ZSTD_compressBlock_greedy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 0); +} + +size_t ZSTD_compressBlock_lazy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 1); +} + +size_t ZSTD_compressBlock_lazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 2); +} + +size_t ZSTD_compressBlock_btlazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 1, 2); +} diff --git a/deps/SZ/zstd/compress/zstd_lazy.h b/deps/SZ/zstd/compress/zstd_lazy.h new file mode 100644 index 0000000000000000000000000000000000000000..c299de6dcabe191def69731ffa24f66fb0f9c35a --- /dev/null +++ b/deps/SZ/zstd/compress/zstd_lazy.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LAZY_H +#define ZSTD_LAZY_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include "zstd_compress_internal.h" + +U32 ZSTD_insertAndFindFirstIndex( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip); + +void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK */ + +size_t ZSTD_compressBlock_btlazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_btlazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_greedy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btlazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_LAZY_H */ diff --git a/deps/SZ/zstd/compress/zstd_ldm.c b/deps/SZ/zstd/compress/zstd_ldm.c new file mode 100644 index 0000000000000000000000000000000000000000..215f55cf451d130aed3e58147bdc96880ba81caf --- /dev/null +++ b/deps/SZ/zstd/compress/zstd_ldm.c @@ -0,0 +1,648 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +#include "zstd_ldm.h" + +#include "debug.h" +#include "zstd_fast.h" /* ZSTD_fillHashTable() */ +#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */ + +#define LDM_BUCKET_SIZE_LOG 3 +#define LDM_MIN_MATCH_LENGTH 64 +#define LDM_HASH_RLOG 7 +#define LDM_HASH_CHAR_OFFSET 10 + +void ZSTD_ldm_adjustParameters(ldmParams_t* params, + ZSTD_compressionParameters const* cParams) +{ + params->windowLog = cParams->windowLog; + ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); + DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); + if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; + if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; + if (cParams->strategy >= ZSTD_btopt) { + /* Get out of the way of the optimal parser */ + U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength); + assert(minMatch >= ZSTD_LDM_MINMATCH_MIN); + assert(minMatch <= ZSTD_LDM_MINMATCH_MAX); + params->minMatchLength = minMatch; + } + if (params->hashLog == 0) { + params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG); + assert(params->hashLog <= ZSTD_HASHLOG_MAX); + } + if (params->hashEveryLog == 0) { + params->hashEveryLog = params->windowLog < params->hashLog + ? 0 + : params->windowLog - params->hashLog; + } + params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog); +} + +size_t ZSTD_ldm_getTableSize(ldmParams_t params) +{ + size_t const ldmHSize = ((size_t)1) << params.hashLog; + size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog); + size_t const ldmBucketSize = + ((size_t)1) << (params.hashLog - ldmBucketSizeLog); + size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t); + return params.enableLdm ? totalSize : 0; +} + +size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize) +{ + return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0; +} + +/** ZSTD_ldm_getSmallHash() : + * numBits should be <= 32 + * If numBits==0, returns 0. + * @return : the most significant numBits of value. */ +static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits) +{ + assert(numBits <= 32); + return numBits == 0 ? 0 : (U32)(value >> (64 - numBits)); +} + +/** ZSTD_ldm_getChecksum() : + * numBitsToDiscard should be <= 32 + * @return : the next most significant 32 bits after numBitsToDiscard */ +static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard) +{ + assert(numBitsToDiscard <= 32); + return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF; +} + +/** ZSTD_ldm_getTag() ; + * Given the hash, returns the most significant numTagBits bits + * after (32 + hbits) bits. + * + * If there are not enough bits remaining, return the last + * numTagBits bits. */ +static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits) +{ + assert(numTagBits < 32 && hbits <= 32); + if (32 - hbits < numTagBits) { + return hash & (((U32)1 << numTagBits) - 1); + } else { + return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1); + } +} + +/** ZSTD_ldm_getBucket() : + * Returns a pointer to the start of the bucket associated with hash. */ +static ldmEntry_t* ZSTD_ldm_getBucket( + ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams) +{ + return ldmState->hashTable + (hash << ldmParams.bucketSizeLog); +} + +/** ZSTD_ldm_insertEntry() : + * Insert the entry with corresponding hash into the hash table */ +static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, + size_t const hash, const ldmEntry_t entry, + ldmParams_t const ldmParams) +{ + BYTE* const bucketOffsets = ldmState->bucketOffsets; + *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry; + bucketOffsets[hash]++; + bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1; +} + +/** ZSTD_ldm_makeEntryAndInsertByTag() : + * + * Gets the small hash, checksum, and tag from the rollingHash. + * + * If the tag matches (1 << ldmParams.hashEveryLog)-1, then + * creates an ldmEntry from the offset, and inserts it into the hash table. + * + * hBits is the length of the small hash, which is the most significant hBits + * of rollingHash. The checksum is the next 32 most significant bits, followed + * by ldmParams.hashEveryLog bits that make up the tag. */ +static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState, + U64 const rollingHash, + U32 const hBits, + U32 const offset, + ldmParams_t const ldmParams) +{ + U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog); + U32 const tagMask = ((U32)1 << ldmParams.hashEveryLog) - 1; + if (tag == tagMask) { + U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits); + U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); + ldmEntry_t entry; + entry.offset = offset; + entry.checksum = checksum; + ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams); + } +} + +/** ZSTD_ldm_getRollingHash() : + * Get a 64-bit hash using the first len bytes from buf. + * + * Giving bytes s = s_1, s_2, ... s_k, the hash is defined to be + * H(s) = s_1*(a^(k-1)) + s_2*(a^(k-2)) + ... + s_k*(a^0) + * + * where the constant a is defined to be prime8bytes. + * + * The implementation adds an offset to each byte, so + * H(s) = (s_1 + HASH_CHAR_OFFSET)*(a^(k-1)) + ... */ +static U64 ZSTD_ldm_getRollingHash(const BYTE* buf, U32 len) +{ + U64 ret = 0; + U32 i; + for (i = 0; i < len; i++) { + ret *= prime8bytes; + ret += buf[i] + LDM_HASH_CHAR_OFFSET; + } + return ret; +} + +/** ZSTD_ldm_ipow() : + * Return base^exp. */ +static U64 ZSTD_ldm_ipow(U64 base, U64 exp) +{ + U64 ret = 1; + while (exp) { + if (exp & 1) { ret *= base; } + exp >>= 1; + base *= base; + } + return ret; +} + +U64 ZSTD_ldm_getHashPower(U32 minMatchLength) { + DEBUGLOG(4, "ZSTD_ldm_getHashPower: mml=%u", minMatchLength); + assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN); + return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1); +} + +/** ZSTD_ldm_updateHash() : + * Updates hash by removing toRemove and adding toAdd. */ +static U64 ZSTD_ldm_updateHash(U64 hash, BYTE toRemove, BYTE toAdd, U64 hashPower) +{ + hash -= ((toRemove + LDM_HASH_CHAR_OFFSET) * hashPower); + hash *= prime8bytes; + hash += toAdd + LDM_HASH_CHAR_OFFSET; + return hash; +} + +/** ZSTD_ldm_countBackwardsMatch() : + * Returns the number of bytes that match backwards before pIn and pMatch. + * + * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */ +static size_t ZSTD_ldm_countBackwardsMatch( + const BYTE* pIn, const BYTE* pAnchor, + const BYTE* pMatch, const BYTE* pBase) +{ + size_t matchLength = 0; + while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) { + pIn--; + pMatch--; + matchLength++; + } + return matchLength; +} + +/** ZSTD_ldm_fillFastTables() : + * + * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies. + * This is similar to ZSTD_loadDictionaryContent. + * + * The tables for the other strategies are filled within their + * block compressors. */ +static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, + ZSTD_compressionParameters const* cParams, + void const* end) +{ + const BYTE* const iend = (const BYTE*)end; + + switch(cParams->strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(ms, cParams, iend, ZSTD_dtlm_fast); + break; + + case ZSTD_dfast: + ZSTD_fillDoubleHashTable(ms, cParams, iend, ZSTD_dtlm_fast); + break; + + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + break; + default: + assert(0); /* not possible : not a valid strategy id */ + } + + return 0; +} + +/** ZSTD_ldm_fillLdmHashTable() : + * + * Fills hashTable from (lastHashed + 1) to iend (non-inclusive). + * lastHash is the rolling hash that corresponds to lastHashed. + * + * Returns the rolling hash corresponding to position iend-1. */ +static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state, + U64 lastHash, const BYTE* lastHashed, + const BYTE* iend, const BYTE* base, + U32 hBits, ldmParams_t const ldmParams) +{ + U64 rollingHash = lastHash; + const BYTE* cur = lastHashed + 1; + + while (cur < iend) { + rollingHash = ZSTD_ldm_updateHash(rollingHash, cur[-1], + cur[ldmParams.minMatchLength-1], + state->hashPower); + ZSTD_ldm_makeEntryAndInsertByTag(state, + rollingHash, hBits, + (U32)(cur - base), ldmParams); + ++cur; + } + return rollingHash; +} + + +/** ZSTD_ldm_limitTableUpdate() : + * + * Sets cctx->nextToUpdate to a position corresponding closer to anchor + * if it is far way + * (after a long match, only update tables a limited amount). */ +static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) +{ + U32 const current = (U32)(anchor - ms->window.base); + if (current > ms->nextToUpdate + 1024) { + ms->nextToUpdate = + current - MIN(512, current - ms->nextToUpdate - 1024); + } +} + +static size_t ZSTD_ldm_generateSequences_internal( + ldmState_t* ldmState, rawSeqStore_t* rawSeqStore, + ldmParams_t const* params, void const* src, size_t srcSize) +{ + /* LDM parameters */ + int const extDict = ZSTD_window_hasExtDict(ldmState->window); + U32 const minMatchLength = params->minMatchLength; + U64 const hashPower = ldmState->hashPower; + U32 const hBits = params->hashLog - params->bucketSizeLog; + U32 const ldmBucketSize = 1U << params->bucketSizeLog; + U32 const hashEveryLog = params->hashEveryLog; + U32 const ldmTagMask = (1U << params->hashEveryLog) - 1; + /* Prefix and extDict parameters */ + U32 const dictLimit = ldmState->window.dictLimit; + U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit; + BYTE const* const base = ldmState->window.base; + BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL; + BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL; + BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL; + BYTE const* const lowPrefixPtr = base + dictLimit; + /* Input bounds */ + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE); + /* Input positions */ + BYTE const* anchor = istart; + BYTE const* ip = istart; + /* Rolling hash */ + BYTE const* lastHashed = NULL; + U64 rollingHash = 0; + + while (ip <= ilimit) { + size_t mLength; + U32 const current = (U32)(ip - base); + size_t forwardMatchLength = 0, backwardMatchLength = 0; + ldmEntry_t* bestEntry = NULL; + if (ip != istart) { + rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0], + lastHashed[minMatchLength], + hashPower); + } else { + rollingHash = ZSTD_ldm_getRollingHash(ip, minMatchLength); + } + lastHashed = ip; + + /* Do not insert and do not look for a match */ + if (ZSTD_ldm_getTag(rollingHash, hBits, hashEveryLog) != ldmTagMask) { + ip++; + continue; + } + + /* Get the best entry and compute the match lengths */ + { + ldmEntry_t* const bucket = + ZSTD_ldm_getBucket(ldmState, + ZSTD_ldm_getSmallHash(rollingHash, hBits), + *params); + ldmEntry_t* cur; + size_t bestMatchLength = 0; + U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); + + for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) { + size_t curForwardMatchLength, curBackwardMatchLength, + curTotalMatchLength; + if (cur->checksum != checksum || cur->offset <= lowestIndex) { + continue; + } + if (extDict) { + BYTE const* const curMatchBase = + cur->offset < dictLimit ? dictBase : base; + BYTE const* const pMatch = curMatchBase + cur->offset; + BYTE const* const matchEnd = + cur->offset < dictLimit ? dictEnd : iend; + BYTE const* const lowMatchPtr = + cur->offset < dictLimit ? dictStart : lowPrefixPtr; + + curForwardMatchLength = ZSTD_count_2segments( + ip, pMatch, iend, + matchEnd, lowPrefixPtr); + if (curForwardMatchLength < minMatchLength) { + continue; + } + curBackwardMatchLength = + ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, + lowMatchPtr); + curTotalMatchLength = curForwardMatchLength + + curBackwardMatchLength; + } else { /* !extDict */ + BYTE const* const pMatch = base + cur->offset; + curForwardMatchLength = ZSTD_count(ip, pMatch, iend); + if (curForwardMatchLength < minMatchLength) { + continue; + } + curBackwardMatchLength = + ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, + lowPrefixPtr); + curTotalMatchLength = curForwardMatchLength + + curBackwardMatchLength; + } + + if (curTotalMatchLength > bestMatchLength) { + bestMatchLength = curTotalMatchLength; + forwardMatchLength = curForwardMatchLength; + backwardMatchLength = curBackwardMatchLength; + bestEntry = cur; + } + } + } + + /* No match found -- continue searching */ + if (bestEntry == NULL) { + ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, + hBits, current, + *params); + ip++; + continue; + } + + /* Match found */ + mLength = forwardMatchLength + backwardMatchLength; + ip -= backwardMatchLength; + + { + /* Store the sequence: + * ip = current - backwardMatchLength + * The match is at (bestEntry->offset - backwardMatchLength) + */ + U32 const matchIndex = bestEntry->offset; + U32 const offset = current - matchIndex; + rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; + + /* Out of sequence storage */ + if (rawSeqStore->size == rawSeqStore->capacity) + return ERROR(dstSize_tooSmall); + seq->litLength = (U32)(ip - anchor); + seq->matchLength = (U32)mLength; + seq->offset = offset; + rawSeqStore->size++; + } + + /* Insert the current entry into the hash table */ + ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, + (U32)(lastHashed - base), + *params); + + assert(ip + backwardMatchLength == lastHashed); + + /* Fill the hash table from lastHashed+1 to ip+mLength*/ + /* Heuristic: don't need to fill the entire table at end of block */ + if (ip + mLength <= ilimit) { + rollingHash = ZSTD_ldm_fillLdmHashTable( + ldmState, rollingHash, lastHashed, + ip + mLength, base, hBits, *params); + lastHashed = ip + mLength - 1; + } + ip += mLength; + anchor = ip; + } + return iend - anchor; +} + +/*! ZSTD_ldm_reduceTable() : + * reduce table indexes by `reducerValue` */ +static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size, + U32 const reducerValue) +{ + U32 u; + for (u = 0; u < size; u++) { + if (table[u].offset < reducerValue) table[u].offset = 0; + else table[u].offset -= reducerValue; + } +} + +size_t ZSTD_ldm_generateSequences( + ldmState_t* ldmState, rawSeqStore_t* sequences, + ldmParams_t const* params, void const* src, size_t srcSize) +{ + U32 const maxDist = 1U << params->windowLog; + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + size_t const kMaxChunkSize = 1 << 20; + size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0); + size_t chunk; + size_t leftoverSize = 0; + + assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize); + /* Check that ZSTD_window_update() has been called for this chunk prior + * to passing it to this function. + */ + assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize); + /* The input could be very large (in zstdmt), so it must be broken up into + * chunks to enforce the maximmum distance and handle overflow correction. + */ + assert(sequences->pos <= sequences->size); + assert(sequences->size <= sequences->capacity); + for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) { + BYTE const* const chunkStart = istart + chunk * kMaxChunkSize; + size_t const remaining = (size_t)(iend - chunkStart); + BYTE const *const chunkEnd = + (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize; + size_t const chunkSize = chunkEnd - chunkStart; + size_t newLeftoverSize; + size_t const prevSize = sequences->size; + + assert(chunkStart < iend); + /* 1. Perform overflow correction if necessary. */ + if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { + U32 const ldmHSize = 1U << params->hashLog; + U32 const correction = ZSTD_window_correctOverflow( + &ldmState->window, /* cycleLog */ 0, maxDist, src); + ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); + } + /* 2. We enforce the maximum offset allowed. + * + * kMaxChunkSize should be small enough that we don't lose too much of + * the window through early invalidation. + * TODO: * Test the chunk size. + * * Try invalidation after the sequence generation and test the + * the offset against maxDist directly. + */ + ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL); + /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ + newLeftoverSize = ZSTD_ldm_generateSequences_internal( + ldmState, sequences, params, chunkStart, chunkSize); + if (ZSTD_isError(newLeftoverSize)) + return newLeftoverSize; + /* 4. We add the leftover literals from previous iterations to the first + * newly generated sequence, or add the `newLeftoverSize` if none are + * generated. + */ + /* Prepend the leftover literals from the last call */ + if (prevSize < sequences->size) { + sequences->seq[prevSize].litLength += (U32)leftoverSize; + leftoverSize = newLeftoverSize; + } else { + assert(newLeftoverSize == chunkSize); + leftoverSize += chunkSize; + } + } + return 0; +} + +void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) { + while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { + rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos; + if (srcSize <= seq->litLength) { + /* Skip past srcSize literals */ + seq->litLength -= (U32)srcSize; + return; + } + srcSize -= seq->litLength; + seq->litLength = 0; + if (srcSize < seq->matchLength) { + /* Skip past the first srcSize of the match */ + seq->matchLength -= (U32)srcSize; + if (seq->matchLength < minMatch) { + /* The match is too short, omit it */ + if (rawSeqStore->pos + 1 < rawSeqStore->size) { + seq[1].litLength += seq[0].matchLength; + } + rawSeqStore->pos++; + } + return; + } + srcSize -= seq->matchLength; + seq->matchLength = 0; + rawSeqStore->pos++; + } +} + +/** + * If the sequence length is longer than remaining then the sequence is split + * between this block and the next. + * + * Returns the current sequence to handle, or if the rest of the block should + * be literals, it returns a sequence with offset == 0. + */ +static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, + U32 const remaining, U32 const minMatch) +{ + rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos]; + assert(sequence.offset > 0); + /* Likely: No partial sequence */ + if (remaining >= sequence.litLength + sequence.matchLength) { + rawSeqStore->pos++; + return sequence; + } + /* Cut the sequence short (offset == 0 ==> rest is literals). */ + if (remaining <= sequence.litLength) { + sequence.offset = 0; + } else if (remaining < sequence.litLength + sequence.matchLength) { + sequence.matchLength = remaining - sequence.litLength; + if (sequence.matchLength < minMatch) { + sequence.offset = 0; + } + } + /* Skip past `remaining` bytes for the future sequences. */ + ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch); + return sequence; +} + +size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + unsigned const minMatch = cParams->searchLength; + ZSTD_blockCompressor const blockCompressor = + ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms)); + /* Input bounds */ + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + /* Input positions */ + BYTE const* ip = istart; + + DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize); + assert(rawSeqStore->pos <= rawSeqStore->size); + assert(rawSeqStore->size <= rawSeqStore->capacity); + /* Loop through each sequence and apply the block compressor to the lits */ + while (rawSeqStore->pos < rawSeqStore->size && ip < iend) { + /* maybeSplitSequence updates rawSeqStore->pos */ + rawSeq const sequence = maybeSplitSequence(rawSeqStore, + (U32)(iend - ip), minMatch); + int i; + /* End signal */ + if (sequence.offset == 0) + break; + + assert(sequence.offset <= (1U << cParams->windowLog)); + assert(ip + sequence.litLength + sequence.matchLength <= iend); + + /* Fill tables for block compressor */ + ZSTD_ldm_limitTableUpdate(ms, ip); + ZSTD_ldm_fillFastTables(ms, cParams, ip); + /* Run the block compressor */ + DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength); + { + size_t const newLitLength = + blockCompressor(ms, seqStore, rep, cParams, ip, + sequence.litLength); + ip += sequence.litLength; + /* Update the repcodes */ + for (i = ZSTD_REP_NUM - 1; i > 0; i--) + rep[i] = rep[i-1]; + rep[0] = sequence.offset; + /* Store the sequence */ + ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, + sequence.offset + ZSTD_REP_MOVE, + sequence.matchLength - MINMATCH); + ip += sequence.matchLength; + } + } + /* Fill the tables for the block compressor */ + ZSTD_ldm_limitTableUpdate(ms, ip); + ZSTD_ldm_fillFastTables(ms, cParams, ip); + /* Compress the last literals */ + return blockCompressor(ms, seqStore, rep, cParams, + ip, iend - ip); +} diff --git a/deps/SZ/zstd/compress/zstd_ldm.h b/deps/SZ/zstd/compress/zstd_ldm.h new file mode 100644 index 0000000000000000000000000000000000000000..96588adb0680e012d33e1491836fa8572f1f8403 --- /dev/null +++ b/deps/SZ/zstd/compress/zstd_ldm.h @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +#ifndef ZSTD_LDM_H +#define ZSTD_LDM_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include "zstd_compress_internal.h" /* ldmParams_t, U32 */ +#include "zstd.h" /* ZSTD_CCtx, size_t */ + +/*-************************************* +* Long distance matching +***************************************/ + +#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_DEFAULTMAX + +/** + * ZSTD_ldm_generateSequences(): + * + * Generates the sequences using the long distance match finder. + * Generates long range matching sequences in `sequences`, which parse a prefix + * of the source. `sequences` must be large enough to store every sequence, + * which can be checked with `ZSTD_ldm_getMaxNbSeq()`. + * @returns 0 or an error code. + * + * NOTE: The user must have called ZSTD_window_update() for all of the input + * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks. + * NOTE: This function returns an error if it runs out of space to store + * sequences. + */ +size_t ZSTD_ldm_generateSequences( + ldmState_t* ldms, rawSeqStore_t* sequences, + ldmParams_t const* params, void const* src, size_t srcSize); + +/** + * ZSTD_ldm_blockCompress(): + * + * Compresses a block using the predefined sequences, along with a secondary + * block compressor. The literals section of every sequence is passed to the + * secondary block compressor, and those sequences are interspersed with the + * predefined sequences. Returns the length of the last literals. + * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed. + * `rawSeqStore.seq` may also be updated to split the last sequence between two + * blocks. + * @return The length of the last literals. + * + * NOTE: The source must be at most the maximum block size, but the predefined + * sequences can be any size, and may be longer than the block. In the case that + * they are longer than the block, the last sequences may need to be split into + * two. We handle that case correctly, and update `rawSeqStore` appropriately. + * NOTE: This function does not return any errors. + */ +size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, + void const* src, size_t srcSize); + +/** + * ZSTD_ldm_skipSequences(): + * + * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`. + * Avoids emitting matches less than `minMatch` bytes. + * Must be called for data with is not passed to ZSTD_ldm_blockCompress(). + */ +void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, + U32 const minMatch); + + +/** ZSTD_ldm_getTableSize() : + * Estimate the space needed for long distance matching tables or 0 if LDM is + * disabled. + */ +size_t ZSTD_ldm_getTableSize(ldmParams_t params); + +/** ZSTD_ldm_getSeqSpace() : + * Return an upper bound on the number of sequences that can be produced by + * the long distance matcher, or 0 if LDM is disabled. + */ +size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize); + +/** ZSTD_ldm_getTableSize() : + * Return prime8bytes^(minMatchLength-1) */ +U64 ZSTD_ldm_getHashPower(U32 minMatchLength); + +/** ZSTD_ldm_adjustParameters() : + * If the params->hashEveryLog is not set, set it to its default value based on + * windowLog and params->hashLog. + * + * Ensures that params->bucketSizeLog is <= params->hashLog (setting it to + * params->hashLog if it is not). + * + * Ensures that the minMatchLength >= targetLength during optimal parsing. + */ +void ZSTD_ldm_adjustParameters(ldmParams_t* params, + ZSTD_compressionParameters const* cParams); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_FAST_H */ diff --git a/deps/SZ/zstd/compress/zstd_opt.c b/deps/SZ/zstd/compress/zstd_opt.c new file mode 100644 index 0000000000000000000000000000000000000000..476cdc148920d355a7869a3929be0fc3297297a6 --- /dev/null +++ b/deps/SZ/zstd/compress/zstd_opt.c @@ -0,0 +1,1126 @@ +/* + * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_compress_internal.h" +#include "hist.h" +#include "zstd_opt.h" + + +#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ +#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */ +#define ZSTD_MAX_PRICE (1<<30) + + +/*-************************************* +* Price functions for optimal parser +***************************************/ + +#if 0 /* approximation at bit level */ +# define BITCOST_ACCURACY 0 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat)) +#elif 0 /* fractional bit accuracy */ +# define BITCOST_ACCURACY 8 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat)) +#else /* opt==approx, ultra==accurate */ +# define BITCOST_ACCURACY 8 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) +#endif + +MEM_STATIC U32 ZSTD_bitWeight(U32 stat) +{ + return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER); +} + +MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat) +{ + U32 const stat = rawStat + 1; + U32 const hb = ZSTD_highbit32(stat); + U32 const BWeight = hb * BITCOST_MULTIPLIER; + U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb; + U32 const weight = BWeight + FWeight; + assert(hb + BITCOST_ACCURACY < 31); + return weight; +} + +/* debugging function, @return price in bytes */ +MEM_STATIC double ZSTD_fCost(U32 price) +{ + return (double)price / (BITCOST_MULTIPLIER*8); +} + +static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) +{ + optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); + optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel); + optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel); + optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel); +} + + +static U32 ZSTD_downscaleStat(U32* table, U32 lastEltIndex, int malus) +{ + U32 s, sum=0; + assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31); + for (s=0; s<=lastEltIndex; s++) { + table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus)); + sum += table[s]; + } + return sum; +} + +static void ZSTD_rescaleFreqs(optState_t* const optPtr, + const BYTE* const src, size_t const srcSize, + int optLevel) +{ + optPtr->priceType = zop_dynamic; + + if (optPtr->litLengthSum == 0) { /* first block : init */ + if (srcSize <= 1024) /* heuristic */ + optPtr->priceType = zop_predef; + + assert(optPtr->symbolCosts != NULL); + if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { /* huffman table presumed generated by dictionary */ + optPtr->priceType = zop_dynamic; + + assert(optPtr->litFreq != NULL); + optPtr->litSum = 0; + { unsigned lit; + for (lit=0; lit<=MaxLit; lit++) { + U32 const scaleLog = 11; /* scale to 2K */ + U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit); + assert(bitCost <= scaleLog); + optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->litSum += optPtr->litFreq[lit]; + } } + + { unsigned ll; + FSE_CState_t llstate; + FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable); + optPtr->litLengthSum = 0; + for (ll=0; ll<=MaxLL; ll++) { + U32 const scaleLog = 10; /* scale to 1K */ + U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll); + assert(bitCost < scaleLog); + optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->litLengthSum += optPtr->litLengthFreq[ll]; + } } + + { unsigned ml; + FSE_CState_t mlstate; + FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable); + optPtr->matchLengthSum = 0; + for (ml=0; ml<=MaxML; ml++) { + U32 const scaleLog = 10; + U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml); + assert(bitCost < scaleLog); + optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->matchLengthSum += optPtr->matchLengthFreq[ml]; + } } + + { unsigned of; + FSE_CState_t ofstate; + FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable); + optPtr->offCodeSum = 0; + for (of=0; of<=MaxOff; of++) { + U32 const scaleLog = 10; + U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of); + assert(bitCost < scaleLog); + optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->offCodeSum += optPtr->offCodeFreq[of]; + } } + + } else { /* not a dictionary */ + + assert(optPtr->litFreq != NULL); + { unsigned lit = MaxLit; + HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ + } + optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); + + { unsigned ll; + for (ll=0; ll<=MaxLL; ll++) + optPtr->litLengthFreq[ll] = 1; + } + optPtr->litLengthSum = MaxLL+1; + + { unsigned ml; + for (ml=0; ml<=MaxML; ml++) + optPtr->matchLengthFreq[ml] = 1; + } + optPtr->matchLengthSum = MaxML+1; + + { unsigned of; + for (of=0; of<=MaxOff; of++) + optPtr->offCodeFreq[of] = 1; + } + optPtr->offCodeSum = MaxOff+1; + + } + + } else { /* new block : re-use previous statistics, scaled down */ + + optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); + optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0); + optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0); + optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0); + } + + ZSTD_setBasePrices(optPtr, optLevel); +} + +/* ZSTD_rawLiteralsCost() : + * price of literals (only) in specified segment (which length can be 0). + * does not include price of literalLength symbol */ +static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, + const optState_t* const optPtr, + int optLevel) +{ + if (litLength == 0) return 0; + if (optPtr->priceType == zop_predef) + return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ + + /* dynamic statistics */ + { U32 price = litLength * optPtr->litSumBasePrice; + U32 u; + for (u=0; u < litLength; u++) { + assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */ + price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel); + } + return price; + } +} + +/* ZSTD_litLengthPrice() : + * cost of literalLength symbol */ +static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel) +{ + if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel); + + /* dynamic statistics */ + { U32 const llCode = ZSTD_LLcode(litLength); + return (LL_bits[llCode] * BITCOST_MULTIPLIER) + (optPtr->litLengthSumBasePrice - WEIGHT(optPtr->litLengthFreq[llCode], optLevel)); + } +} + +/* ZSTD_litLengthContribution() : + * @return ( cost(litlength) - cost(0) ) + * this value can then be added to rawLiteralsCost() + * to provide a cost which is directly comparable to a match ending at same position */ +static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel) +{ + if (optPtr->priceType >= zop_predef) return WEIGHT(litLength, optLevel); + + /* dynamic statistics */ + { U32 const llCode = ZSTD_LLcode(litLength); + int const contribution = (LL_bits[llCode] * BITCOST_MULTIPLIER) + + WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */ + - WEIGHT(optPtr->litLengthFreq[llCode], optLevel); +#if 1 + return contribution; +#else + return MAX(0, contribution); /* sometimes better, sometimes not ... */ +#endif + } +} + +/* ZSTD_literalsContribution() : + * creates a fake cost for the literals part of a sequence + * which can be compared to the ending cost of a match + * should a new match start at this position */ +static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength, + const optState_t* const optPtr, + int optLevel) +{ + int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel) + + ZSTD_litLengthContribution(litLength, optPtr, optLevel); + return contribution; +} + +/* ZSTD_getMatchPrice() : + * Provides the cost of the match part (offset + matchLength) of a sequence + * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. + * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */ +FORCE_INLINE_TEMPLATE U32 +ZSTD_getMatchPrice(U32 const offset, + U32 const matchLength, + const optState_t* const optPtr, + int const optLevel) +{ + U32 price; + U32 const offCode = ZSTD_highbit32(offset+1); + U32 const mlBase = matchLength - MINMATCH; + assert(matchLength >= MINMATCH); + + if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */ + return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER); + + /* dynamic statistics */ + price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel)); + if ((optLevel<2) /*static*/ && offCode >= 20) + price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */ + + /* match Length */ + { U32 const mlCode = ZSTD_MLcode(mlBase); + price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel)); + } + + price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */ + + DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price); + return price; +} + +/* ZSTD_updateStats() : + * assumption : literals + litLengtn <= iend */ +static void ZSTD_updateStats(optState_t* const optPtr, + U32 litLength, const BYTE* literals, + U32 offsetCode, U32 matchLength) +{ + /* literals */ + { U32 u; + for (u=0; u < litLength; u++) + optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; + optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; + } + + /* literal Length */ + { U32 const llCode = ZSTD_LLcode(litLength); + optPtr->litLengthFreq[llCode]++; + optPtr->litLengthSum++; + } + + /* match offset code (0-2=>repCode; 3+=>offset+2) */ + { U32 const offCode = ZSTD_highbit32(offsetCode+1); + assert(offCode <= MaxOff); + optPtr->offCodeFreq[offCode]++; + optPtr->offCodeSum++; + } + + /* match Length */ + { U32 const mlBase = matchLength - MINMATCH; + U32 const mlCode = ZSTD_MLcode(mlBase); + optPtr->matchLengthFreq[mlCode]++; + optPtr->matchLengthSum++; + } +} + + +/* ZSTD_readMINMATCH() : + * function safe only for comparisons + * assumption : memPtr must be at least 4 bytes before end of buffer */ +MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) +{ + switch (length) + { + default : + case 4 : return MEM_read32(memPtr); + case 3 : if (MEM_isLittleEndian()) + return MEM_read32(memPtr)<<8; + else + return MEM_read32(memPtr)>>8; + } +} + + +/* Update hashTable3 up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE* const ip) +{ + U32* const hashTable3 = ms->hashTable3; + U32 const hashLog3 = ms->hashLog3; + const BYTE* const base = ms->window.base; + U32 idx = ms->nextToUpdate3; + U32 const target = ms->nextToUpdate3 = (U32)(ip - base); + size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3); + assert(hashLog3 > 0); + + while(idx < target) { + hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; + idx++; + } + + return hashTable3[hash3]; +} + + +/*-************************************* +* Binary Tree search +***************************************/ +/** ZSTD_insertBt1() : add one or multiple positions to tree. + * ip : assumed <= iend-8 . + * @return : nb of positions added */ +static U32 ZSTD_insertBt1( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* const ip, const BYTE* const iend, + U32 const mls, const int extDict) +{ + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 matchIndex = hashTable[h]; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match; + const U32 current = (U32)(ip-base); + const U32 btLow = btMask >= current ? 0 : current - btMask; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = smallerPtr + 1; + U32 dummy32; /* to be nullified at the end */ + U32 const windowLow = ms->window.lowLimit; + U32 const matchLow = windowLow ? windowLow : 1; + U32 matchEndIdx = current+8+1; + size_t bestLength = 8; + U32 nbCompares = 1U << cParams->searchLog; +#ifdef ZSTD_C_PREDICT + U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); + U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1); + predictedSmall += (predictedSmall>0); + predictedLarge += (predictedLarge>0); +#endif /* ZSTD_C_PREDICT */ + + DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current); + + assert(ip <= iend-8); /* required for h calculation */ + hashTable[h] = current; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex >= matchLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(matchIndex < current); + +#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ + const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ + if (matchIndex == predictedSmall) { + /* no need to check length, result known */ + *smallerPtr = matchIndex; + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + predictedSmall = predictPtr[1] + (predictPtr[1]>0); + continue; + } + if (matchIndex == predictedLarge) { + *largerPtr = matchIndex; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + predictedLarge = predictPtr[0] + (predictPtr[0]>0); + continue; + } +#endif + + if (!extDict || (matchIndex+matchLength >= dictLimit)) { + assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */ + match = base + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + bestLength = matchLength; + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + } + + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ + } + + if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */ + assert(matchEndIdx > current + 8); + return matchEndIdx - (current + 8); +} + +FORCE_INLINE_TEMPLATE +void ZSTD_updateTree_internal( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* const ip, const BYTE* const iend, + const U32 mls, const ZSTD_dictMode_e dictMode) +{ + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + DEBUGLOG(5, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", + idx, target, dictMode); + + while(idx < target) + idx += ZSTD_insertBt1(ms, cParams, base+idx, iend, mls, dictMode == ZSTD_extDict); + ms->nextToUpdate = target; +} + +void ZSTD_updateTree( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip, const BYTE* iend) +{ + ZSTD_updateTree_internal(ms, cParams, ip, iend, cParams->searchLength, ZSTD_noDict); +} + +FORCE_INLINE_TEMPLATE +U32 ZSTD_insertBtAndGetAllMatches ( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, + U32 rep[ZSTD_REP_NUM], U32 const ll0, + ZSTD_match_t* matches, const U32 lengthToBeat, U32 const mls /* template */) +{ + U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); + const BYTE* const base = ms->window.base; + U32 const current = (U32)(ip-base); + U32 const hashLog = cParams->hashLog; + U32 const minMatch = (mls==3) ? 3 : 4; + U32* const hashTable = ms->hashTable; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 matchIndex = hashTable[h]; + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask= (1U << btLog) - 1; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const dictBase = ms->window.dictBase; + U32 const dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + U32 const btLow = btMask >= current ? 0 : current - btMask; + U32 const windowLow = ms->window.lowLimit; + U32 const matchLow = windowLow ? windowLow : 1; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = bt + 2*(current&btMask) + 1; + U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */ + U32 dummy32; /* to be nullified at the end */ + U32 mnum = 0; + U32 nbCompares = 1U << cParams->searchLog; + + const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL; + const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL; + const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL; + U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0; + U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0; + U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0; + U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && btMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - btMask : dmsLowLimit; + + size_t bestLength = lengthToBeat-1; + DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current); + + /* check repCode */ + { U32 const lastR = ZSTD_REP_NUM + ll0; + U32 repCode; + for (repCode = ll0; repCode < lastR; repCode++) { + U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + U32 const repIndex = current - repOffset; + U32 repLen = 0; + assert(current >= dictLimit); + if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */ + if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) { + repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch; + } + } else { /* repIndex < dictLimit || repIndex >= current */ + const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ? + dmsBase + repIndex - dmsIndexDelta : + dictBase + repIndex; + assert(current >= windowLow); + if ( dictMode == ZSTD_extDict + && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */ + & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */) + && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { + repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch; + } + if (dictMode == ZSTD_dictMatchState + && ( ((repOffset-1) /*intentional overflow*/ < current - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `current > repIndex >= dmsLowLimit` */ + & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */ + && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { + repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch; + } } + /* save longer solution */ + if (repLen > bestLength) { + DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u", + repCode, ll0, repOffset, repLen); + bestLength = repLen; + matches[mnum].off = repCode - ll0; + matches[mnum].len = (U32)repLen; + mnum++; + if ( (repLen > sufficient_len) + | (ip+repLen == iLimit) ) { /* best possible */ + return mnum; + } } } } + + /* HC3 match finder */ + if ((mls == 3) /*static*/ && (bestLength < mls)) { + U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip); + if ((matchIndex3 >= matchLow) + & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { + size_t mlen; + if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) { + const BYTE* const match = base + matchIndex3; + mlen = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex3; + mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart); + } + + /* save best solution */ + if (mlen >= mls /* == 3 > bestLength */) { + DEBUGLOG(8, "found small match with hlog3, of length %u", + (U32)mlen); + bestLength = mlen; + assert(current > matchIndex3); + assert(mnum==0); /* no prior solution */ + matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE; + matches[0].len = (U32)mlen; + mnum = 1; + if ( (mlen > sufficient_len) | + (ip+mlen == iLimit) ) { /* best possible length */ + ms->nextToUpdate = current+1; /* skip insertion */ + return 1; + } + } + } + /* no dictMatchState lookup: dicts don't have a populated HC3 table */ + } + + hashTable[h] = current; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex >= matchLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match; + assert(current > matchIndex); + + if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) { + assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */ + match = base + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* prepare for match[matchLength] */ + } + + if (matchLength > bestLength) { + DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)", + (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE); + assert(matchEndIdx > matchIndex); + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + bestLength = matchLength; + matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE; + matches[mnum].len = (U32)matchLength; + mnum++; + if ( (matchLength > ZSTD_OPT_NUM) + | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { + if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */ + break; /* drop, to preserve bt consistency (miss a little bit of compression) */ + } + } + + if (match[matchLength] < ip[matchLength]) { + /* match smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */ + } else { + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + + if (dictMode == ZSTD_dictMatchState && nbCompares) { + U32 dictMatchIndex = dms->hashTable[h]; + const U32* const dmsBt = dms->chainTable; + commonLengthSmaller = commonLengthLarger = 0; + while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) { + const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match = dmsBase + dictMatchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart); + if (dictMatchIndex+matchLength >= dmsHighLimit) + match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */ + + if (matchLength > bestLength) { + matchIndex = dictMatchIndex + dmsIndexDelta; + DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)", + (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE); + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + bestLength = matchLength; + matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE; + matches[mnum].len = (U32)matchLength; + mnum++; + if ( (matchLength > ZSTD_OPT_NUM) + | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + + if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */ + if (match[matchLength] < ip[matchLength]) { + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + commonLengthLarger = matchLength; + dictMatchIndex = nextPtr[0]; + } + } + } + + assert(matchEndIdx > current+8); + ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ + return mnum; +} + + +FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode, + U32 rep[ZSTD_REP_NUM], U32 const ll0, + ZSTD_match_t* matches, U32 const lengthToBeat) +{ + U32 const matchLengthSearch = cParams->searchLength; + DEBUGLOG(8, "ZSTD_BtGetAllMatches"); + if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree_internal(ms, cParams, ip, iHighLimit, matchLengthSearch, dictMode); + switch(matchLengthSearch) + { + case 3 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 3); + default : + case 4 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 4); + case 5 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 5); + case 7 : + case 6 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 6); + } +} + + +/*-******************************* +* Optimal parser +*********************************/ +typedef struct repcodes_s { + U32 rep[3]; +} repcodes_t; + +repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0) +{ + repcodes_t newReps; + if (offset >= ZSTD_REP_NUM) { /* full offset */ + newReps.rep[2] = rep[1]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = offset - ZSTD_REP_MOVE; + } else { /* repcode */ + U32 const repCode = offset + ll0; + if (repCode > 0) { /* note : if repCode==0, no change */ + U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = currentOffset; + } else { /* repCode == 0 */ + memcpy(&newReps, rep, sizeof(newReps)); + } + } + return newReps; +} + + +static U32 ZSTD_totalLen(ZSTD_optimal_t sol) +{ + return sol.litlen + sol.mlen; +} + +FORCE_INLINE_TEMPLATE size_t +ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, + seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const ZSTD_compressionParameters* cParams, + const void* src, size_t srcSize, + const int optLevel, const ZSTD_dictMode_e dictMode) +{ + optState_t* const optStatePtr = &ms->opt; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const BYTE* const prefixStart = base + ms->window.dictLimit; + + U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); + U32 const minMatch = (cParams->searchLength == 3) ? 3 : 4; + + ZSTD_optimal_t* const opt = optStatePtr->priceTable; + ZSTD_match_t* const matches = optStatePtr->matchTable; + ZSTD_optimal_t lastSequence; + + /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_opt_generic"); + assert(optLevel <= 2); + ms->nextToUpdate3 = ms->nextToUpdate; + ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); + ip += (ip==prefixStart); + + /* Match Loop */ + while (ip < ilimit) { + U32 cur, last_pos = 0; + + /* find first match */ + { U32 const litlen = (U32)(ip - anchor); + U32 const ll0 = !litlen; + U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, ip, iend, dictMode, rep, ll0, matches, minMatch); + if (!nbMatches) { ip++; continue; } + + /* initialize opt[0] */ + { U32 i ; for (i=0; i immediate encoding */ + { U32 const maxML = matches[nbMatches-1].len; + U32 const maxOffset = matches[nbMatches-1].off; + DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new serie", + nbMatches, maxML, maxOffset, (U32)(ip-prefixStart)); + + if (maxML > sufficient_len) { + lastSequence.litlen = litlen; + lastSequence.mlen = maxML; + lastSequence.off = maxOffset; + DEBUGLOG(6, "large match (%u>%u), immediate encoding", + maxML, sufficient_len); + cur = 0; + last_pos = ZSTD_totalLen(lastSequence); + goto _shortestPath; + } } + + /* set prices for first matches starting position == 0 */ + { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel); + U32 pos; + U32 matchNb; + for (pos = 1; pos < minMatch; pos++) { + opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */ + } + for (matchNb = 0; matchNb < nbMatches; matchNb++) { + U32 const offset = matches[matchNb].off; + U32 const end = matches[matchNb].len; + repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0); + for ( ; pos <= end ; pos++ ) { + U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel); + U32 const sequencePrice = literalsPrice + matchPrice; + DEBUGLOG(7, "rPos:%u => set initial price : %.2f", + pos, ZSTD_fCost(sequencePrice)); + opt[pos].mlen = pos; + opt[pos].off = offset; + opt[pos].litlen = litlen; + opt[pos].price = sequencePrice; + ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory)); + memcpy(opt[pos].rep, &repHistory, sizeof(repHistory)); + } } + last_pos = pos-1; + } + } + + /* check further positions */ + for (cur = 1; cur <= last_pos; cur++) { + const BYTE* const inr = ip + cur; + assert(cur < ZSTD_OPT_NUM); + DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur) + + /* Fix current position with one literal if cheaper */ + { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1; + int const price = opt[cur-1].price + + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel) + + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel) + - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel); + assert(price < 1000000000); /* overflow check */ + if (price <= opt[cur].price) { + DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", + inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, + opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]); + opt[cur].mlen = 0; + opt[cur].off = 0; + opt[cur].litlen = litlen; + opt[cur].price = price; + memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep)); + } else { + DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)", + inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), + opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]); + } + } + + /* last match must start at a minimum distance of 8 from oend */ + if (inr > ilimit) continue; + + if (cur == last_pos) break; + + if ( (optLevel==0) /*static_test*/ + && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) { + DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1); + continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ + } + + { U32 const ll0 = (opt[cur].mlen != 0); + U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; + U32 const previousPrice = opt[cur].price; + U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); + U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, inr, iend, dictMode, opt[cur].rep, ll0, matches, minMatch); + U32 matchNb; + if (!nbMatches) { + DEBUGLOG(7, "rPos:%u : no match found", cur); + continue; + } + + { U32 const maxML = matches[nbMatches-1].len; + DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u", + inr-istart, cur, nbMatches, maxML); + + if ( (maxML > sufficient_len) + || (cur + maxML >= ZSTD_OPT_NUM) ) { + lastSequence.mlen = maxML; + lastSequence.off = matches[nbMatches-1].off; + lastSequence.litlen = litlen; + cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */ + last_pos = cur + ZSTD_totalLen(lastSequence); + if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */ + goto _shortestPath; + } } + + /* set prices using matches found at position == cur */ + for (matchNb = 0; matchNb < nbMatches; matchNb++) { + U32 const offset = matches[matchNb].off; + repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0); + U32 const lastML = matches[matchNb].len; + U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; + U32 mlen; + + DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u", + matchNb, matches[matchNb].off, lastML, litlen); + + for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ + U32 const pos = cur + mlen; + int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); + + if ((pos > last_pos) || (price < opt[pos].price)) { + DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)", + pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); + while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */ + opt[pos].mlen = mlen; + opt[pos].off = offset; + opt[pos].litlen = litlen; + opt[pos].price = price; + ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory)); + memcpy(opt[pos].rep, &repHistory, sizeof(repHistory)); + } else { + DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)", + pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); + if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */ + } + } } } + } /* for (cur = 1; cur <= last_pos; cur++) */ + + lastSequence = opt[last_pos]; + cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */ + assert(cur < ZSTD_OPT_NUM); /* control overflow*/ + +_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ + assert(opt[0].mlen == 0); + + { U32 const storeEnd = cur + 1; + U32 storeStart = storeEnd; + U32 seqPos = cur; + + DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)", + last_pos, cur); + assert(storeEnd < ZSTD_OPT_NUM); + DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", + storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off); + opt[storeEnd] = lastSequence; + while (seqPos > 0) { + U32 const backDist = ZSTD_totalLen(opt[seqPos]); + storeStart--; + DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", + seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off); + opt[storeStart] = opt[seqPos]; + seqPos = (seqPos > backDist) ? seqPos - backDist : 0; + } + + /* save sequences */ + DEBUGLOG(6, "sending selected sequences into seqStore") + { U32 storePos; + for (storePos=storeStart; storePos <= storeEnd; storePos++) { + U32 const llen = opt[storePos].litlen; + U32 const mlen = opt[storePos].mlen; + U32 const offCode = opt[storePos].off; + U32 const advance = llen + mlen; + DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u", + anchor - istart, llen, mlen); + + if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */ + assert(storePos == storeEnd); /* must be last sequence */ + ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */ + continue; /* will finish */ + } + + /* repcodes update : like ZSTD_updateRep(), but update in place */ + if (offCode >= ZSTD_REP_NUM) { /* full offset */ + rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = offCode - ZSTD_REP_MOVE; + } else { /* repcode */ + U32 const repCode = offCode + (llen==0); + if (repCode) { /* note : if repCode==0, no change */ + U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + if (repCode >= 2) rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = currentOffset; + } } + + assert(anchor + llen <= iend); + ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); + ZSTD_storeSeq(seqStore, llen, anchor, offCode, mlen-MINMATCH); + anchor += advance; + ip = anchor; + } } + ZSTD_setBasePrices(optStatePtr, optLevel); + } + + } /* while (ip < ilimit) */ + + /* Return the last literals size */ + return iend - anchor; +} + + +size_t ZSTD_compressBlock_btopt( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const ZSTD_compressionParameters* cParams, const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock_btopt"); + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, ZSTD_noDict); +} + + +/* used in 2-pass strategy */ +static U32 ZSTD_upscaleStat(U32* table, U32 lastEltIndex, int bonus) +{ + U32 s, sum=0; + assert(ZSTD_FREQ_DIV+bonus > 0); + for (s=0; s<=lastEltIndex; s++) { + table[s] <<= ZSTD_FREQ_DIV+bonus; + table[s]--; + sum += table[s]; + } + return sum; +} + +/* used in 2-pass strategy */ +MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr) +{ + optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); + optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 1); + optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 1); + optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 1); +} + +size_t ZSTD_compressBlock_btultra( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const ZSTD_compressionParameters* cParams, const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); +#if 0 + /* 2-pass strategy (disabled) + * this strategy makes a first pass over first block to collect statistics + * and seed next round's statistics with it. + * The compression ratio gain is generally small (~0.5% on first block), + * the cost is 2x cpu time on first block. */ + assert(srcSize <= ZSTD_BLOCKSIZE_MAX); + if ( (ms->opt.litLengthSum==0) /* first block */ + && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ + && (ms->window.dictLimit == ms->window.lowLimit) ) { /* no dictionary */ + U32 tmpRep[ZSTD_REP_NUM]; + DEBUGLOG(5, "ZSTD_compressBlock_btultra: first block: collecting statistics"); + assert(ms->nextToUpdate >= ms->window.dictLimit + && ms->nextToUpdate <= ms->window.dictLimit + 1); + memcpy(tmpRep, rep, sizeof(tmpRep)); + ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, cParams, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/ + ZSTD_resetSeqStore(seqStore); + /* invalidate first scan from history */ + ms->window.base -= srcSize; + ms->window.dictLimit += (U32)srcSize; + ms->window.lowLimit = ms->window.dictLimit; + ms->nextToUpdate = ms->window.dictLimit; + ms->nextToUpdate3 = ms->window.dictLimit; + /* re-inforce weight of collected statistics */ + ZSTD_upscaleStats(&ms->opt); + } +#endif + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_btopt_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const ZSTD_compressionParameters* cParams, const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_btultra_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const ZSTD_compressionParameters* cParams, const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_btopt_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const ZSTD_compressionParameters* cParams, const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, ZSTD_extDict); +} + +size_t ZSTD_compressBlock_btultra_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const ZSTD_compressionParameters* cParams, const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, ZSTD_extDict); +} diff --git a/deps/SZ/zstd/compress/zstd_opt.h b/deps/SZ/zstd/compress/zstd_opt.h new file mode 100644 index 0000000000000000000000000000000000000000..63dbe79a846d42e511e01847e6e30bae866f44cf --- /dev/null +++ b/deps/SZ/zstd/compress/zstd_opt.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_OPT_H +#define ZSTD_OPT_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include "zstd_compress_internal.h" + +void ZSTD_updateTree( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip, const BYTE* iend); /* used in ZSTD_loadDictionaryContent() */ + +size_t ZSTD_compressBlock_btopt( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_btopt_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_btopt_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_OPT_H */ diff --git a/deps/SZ/zstd/compress/zstdmt_compress.c b/deps/SZ/zstd/compress/zstdmt_compress.c new file mode 100644 index 0000000000000000000000000000000000000000..6daedca8b3d0a57af2ad48e5fa7934a521a92e6d --- /dev/null +++ b/deps/SZ/zstd/compress/zstdmt_compress.c @@ -0,0 +1,1906 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* ====== Tuning parameters ====== */ +#define ZSTDMT_NBWORKERS_MAX 200 +#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (2 GB)) /* note : limited by `jobSize` type, which is `unsigned` */ +#define ZSTDMT_OVERLAPLOG_DEFAULT 6 + + +/* ====== Compiler specifics ====== */ +#if defined(_MSC_VER) +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +#endif + + +/* ====== Dependencies ====== */ +#include /* memcpy, memset */ +#include /* INT_MAX */ +#include "pool.h" /* threadpool */ +#include "threading.h" /* mutex */ +#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */ +#include "zstd_ldm.h" +#include "zstdmt_compress.h" + +/* Guards code to support resizing the SeqPool. + * We will want to resize the SeqPool to save memory in the future. + * Until then, comment the code out since it is unused. + */ +#define ZSTD_RESIZE_SEQPOOL 0 + +/* ====== Debug ====== */ +#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2) && !defined(_MSC_VER) + +# include +# include +# include + +# define DEBUG_PRINTHEX(l,p,n) { \ + unsigned debug_u; \ + for (debug_u=0; debug_u<(n); debug_u++) \ + RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \ + RAWLOG(l, " \n"); \ +} + +static unsigned long long GetCurrentClockTimeMicroseconds(void) +{ + static clock_t _ticksPerSecond = 0; + if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK); + + { struct tms junk; clock_t newTicks = (clock_t) times(&junk); + return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond); } +} + +#define MUTEX_WAIT_TIME_DLEVEL 6 +#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \ + if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \ + unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \ + ZSTD_pthread_mutex_lock(mutex); \ + { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \ + unsigned long long const elapsedTime = (afterTime-beforeTime); \ + if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \ + DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \ + elapsedTime, #mutex); \ + } } \ + } else { \ + ZSTD_pthread_mutex_lock(mutex); \ + } \ +} + +#else + +# define ZSTD_PTHREAD_MUTEX_LOCK(m) ZSTD_pthread_mutex_lock(m) +# define DEBUG_PRINTHEX(l,p,n) {} + +#endif + + +/* ===== Buffer Pool ===== */ +/* a single Buffer Pool can be invoked from multiple threads in parallel */ + +typedef struct buffer_s { + void* start; + size_t capacity; +} buffer_t; + +static const buffer_t g_nullBuffer = { NULL, 0 }; + +typedef struct ZSTDMT_bufferPool_s { + ZSTD_pthread_mutex_t poolMutex; + size_t bufferSize; + unsigned totalBuffers; + unsigned nbBuffers; + ZSTD_customMem cMem; + buffer_t bTable[1]; /* variable size */ +} ZSTDMT_bufferPool; + +static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbWorkers, ZSTD_customMem cMem) +{ + unsigned const maxNbBuffers = 2*nbWorkers + 3; + ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_calloc( + sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem); + if (bufPool==NULL) return NULL; + if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) { + ZSTD_free(bufPool, cMem); + return NULL; + } + bufPool->bufferSize = 64 KB; + bufPool->totalBuffers = maxNbBuffers; + bufPool->nbBuffers = 0; + bufPool->cMem = cMem; + return bufPool; +} + +static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool) +{ + unsigned u; + DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool); + if (!bufPool) return; /* compatibility with free on NULL */ + for (u=0; utotalBuffers; u++) { + DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->bTable[u].start); + ZSTD_free(bufPool->bTable[u].start, bufPool->cMem); + } + ZSTD_pthread_mutex_destroy(&bufPool->poolMutex); + ZSTD_free(bufPool, bufPool->cMem); +} + +/* only works at initialization, not during compression */ +static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool) +{ + size_t const poolSize = sizeof(*bufPool) + + (bufPool->totalBuffers - 1) * sizeof(buffer_t); + unsigned u; + size_t totalBufferSize = 0; + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + for (u=0; utotalBuffers; u++) + totalBufferSize += bufPool->bTable[u].capacity; + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + + return poolSize + totalBufferSize; +} + +/* ZSTDMT_setBufferSize() : + * all future buffers provided by this buffer pool will have _at least_ this size + * note : it's better for all buffers to have same size, + * as they become freely interchangeable, reducing malloc/free usages and memory fragmentation */ +static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const bSize) +{ + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + DEBUGLOG(4, "ZSTDMT_setBufferSize: bSize = %u", (U32)bSize); + bufPool->bufferSize = bSize; + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); +} + + +static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, U32 nbWorkers) +{ + unsigned const maxNbBuffers = 2*nbWorkers + 3; + if (srcBufPool==NULL) return NULL; + if (srcBufPool->totalBuffers >= maxNbBuffers) /* good enough */ + return srcBufPool; + /* need a larger buffer pool */ + { ZSTD_customMem const cMem = srcBufPool->cMem; + size_t const bSize = srcBufPool->bufferSize; /* forward parameters */ + ZSTDMT_bufferPool* newBufPool; + ZSTDMT_freeBufferPool(srcBufPool); + newBufPool = ZSTDMT_createBufferPool(nbWorkers, cMem); + if (newBufPool==NULL) return newBufPool; + ZSTDMT_setBufferSize(newBufPool, bSize); + return newBufPool; + } +} + +/** ZSTDMT_getBuffer() : + * assumption : bufPool must be valid + * @return : a buffer, with start pointer and size + * note: allocation may fail, in this case, start==NULL and size==0 */ +static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool) +{ + size_t const bSize = bufPool->bufferSize; + DEBUGLOG(5, "ZSTDMT_getBuffer: bSize = %u", (U32)bufPool->bufferSize); + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + if (bufPool->nbBuffers) { /* try to use an existing buffer */ + buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)]; + size_t const availBufferSize = buf.capacity; + bufPool->bTable[bufPool->nbBuffers] = g_nullBuffer; + if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) { + /* large enough, but not too much */ + DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u", + bufPool->nbBuffers, (U32)buf.capacity); + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + return buf; + } + /* size conditions not respected : scratch this buffer, create new one */ + DEBUGLOG(5, "ZSTDMT_getBuffer: existing buffer does not meet size conditions => freeing"); + ZSTD_free(buf.start, bufPool->cMem); + } + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + /* create new buffer */ + DEBUGLOG(5, "ZSTDMT_getBuffer: create a new buffer"); + { buffer_t buffer; + void* const start = ZSTD_malloc(bSize, bufPool->cMem); + buffer.start = start; /* note : start can be NULL if malloc fails ! */ + buffer.capacity = (start==NULL) ? 0 : bSize; + if (start==NULL) { + DEBUGLOG(5, "ZSTDMT_getBuffer: buffer allocation failure !!"); + } else { + DEBUGLOG(5, "ZSTDMT_getBuffer: created buffer of size %u", (U32)bSize); + } + return buffer; + } +} + +#if ZSTD_RESIZE_SEQPOOL +/** ZSTDMT_resizeBuffer() : + * assumption : bufPool must be valid + * @return : a buffer that is at least the buffer pool buffer size. + * If a reallocation happens, the data in the input buffer is copied. + */ +static buffer_t ZSTDMT_resizeBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buffer) +{ + size_t const bSize = bufPool->bufferSize; + if (buffer.capacity < bSize) { + void* const start = ZSTD_malloc(bSize, bufPool->cMem); + buffer_t newBuffer; + newBuffer.start = start; + newBuffer.capacity = start == NULL ? 0 : bSize; + if (start != NULL) { + assert(newBuffer.capacity >= buffer.capacity); + memcpy(newBuffer.start, buffer.start, buffer.capacity); + DEBUGLOG(5, "ZSTDMT_resizeBuffer: created buffer of size %u", (U32)bSize); + return newBuffer; + } + DEBUGLOG(5, "ZSTDMT_resizeBuffer: buffer allocation failure !!"); + } + return buffer; +} +#endif + +/* store buffer for later re-use, up to pool capacity */ +static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf) +{ + if (buf.start == NULL) return; /* compatible with release on NULL */ + DEBUGLOG(5, "ZSTDMT_releaseBuffer"); + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + if (bufPool->nbBuffers < bufPool->totalBuffers) { + bufPool->bTable[bufPool->nbBuffers++] = buf; /* stored for later use */ + DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u", + (U32)buf.capacity, (U32)(bufPool->nbBuffers-1)); + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + return; + } + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + /* Reached bufferPool capacity (should not happen) */ + DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing "); + ZSTD_free(buf.start, bufPool->cMem); +} + + +/* ===== Seq Pool Wrapper ====== */ + +static rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0}; + +typedef ZSTDMT_bufferPool ZSTDMT_seqPool; + +static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool) +{ + return ZSTDMT_sizeof_bufferPool(seqPool); +} + +static rawSeqStore_t bufferToSeq(buffer_t buffer) +{ + rawSeqStore_t seq = {NULL, 0, 0, 0}; + seq.seq = (rawSeq*)buffer.start; + seq.capacity = buffer.capacity / sizeof(rawSeq); + return seq; +} + +static buffer_t seqToBuffer(rawSeqStore_t seq) +{ + buffer_t buffer; + buffer.start = seq.seq; + buffer.capacity = seq.capacity * sizeof(rawSeq); + return buffer; +} + +static rawSeqStore_t ZSTDMT_getSeq(ZSTDMT_seqPool* seqPool) +{ + if (seqPool->bufferSize == 0) { + return kNullRawSeqStore; + } + return bufferToSeq(ZSTDMT_getBuffer(seqPool)); +} + +#if ZSTD_RESIZE_SEQPOOL +static rawSeqStore_t ZSTDMT_resizeSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq) +{ + return bufferToSeq(ZSTDMT_resizeBuffer(seqPool, seqToBuffer(seq))); +} +#endif + +static void ZSTDMT_releaseSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq) +{ + ZSTDMT_releaseBuffer(seqPool, seqToBuffer(seq)); +} + +static void ZSTDMT_setNbSeq(ZSTDMT_seqPool* const seqPool, size_t const nbSeq) +{ + ZSTDMT_setBufferSize(seqPool, nbSeq * sizeof(rawSeq)); +} + +static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem) +{ + ZSTDMT_seqPool* seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem); + ZSTDMT_setNbSeq(seqPool, 0); + return seqPool; +} + +static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool) +{ + ZSTDMT_freeBufferPool(seqPool); +} + +static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers) +{ + return ZSTDMT_expandBufferPool(pool, nbWorkers); +} + + +/* ===== CCtx Pool ===== */ +/* a single CCtx Pool can be invoked from multiple threads in parallel */ + +typedef struct { + ZSTD_pthread_mutex_t poolMutex; + unsigned totalCCtx; + unsigned availCCtx; + ZSTD_customMem cMem; + ZSTD_CCtx* cctx[1]; /* variable size */ +} ZSTDMT_CCtxPool; + +/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */ +static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool) +{ + unsigned u; + for (u=0; utotalCCtx; u++) + ZSTD_freeCCtx(pool->cctx[u]); /* note : compatible with free on NULL */ + ZSTD_pthread_mutex_destroy(&pool->poolMutex); + ZSTD_free(pool, pool->cMem); +} + +/* ZSTDMT_createCCtxPool() : + * implies nbWorkers >= 1 , checked by caller ZSTDMT_createCCtx() */ +static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbWorkers, + ZSTD_customMem cMem) +{ + ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc( + sizeof(ZSTDMT_CCtxPool) + (nbWorkers-1)*sizeof(ZSTD_CCtx*), cMem); + assert(nbWorkers > 0); + if (!cctxPool) return NULL; + if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) { + ZSTD_free(cctxPool, cMem); + return NULL; + } + cctxPool->cMem = cMem; + cctxPool->totalCCtx = nbWorkers; + cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */ + cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem); + if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; } + DEBUGLOG(3, "cctxPool created, with %u workers", nbWorkers); + return cctxPool; +} + +static ZSTDMT_CCtxPool* ZSTDMT_expandCCtxPool(ZSTDMT_CCtxPool* srcPool, + unsigned nbWorkers) +{ + if (srcPool==NULL) return NULL; + if (nbWorkers <= srcPool->totalCCtx) return srcPool; /* good enough */ + /* need a larger cctx pool */ + { ZSTD_customMem const cMem = srcPool->cMem; + ZSTDMT_freeCCtxPool(srcPool); + return ZSTDMT_createCCtxPool(nbWorkers, cMem); + } +} + +/* only works during initialization phase, not during compression */ +static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool) +{ + ZSTD_pthread_mutex_lock(&cctxPool->poolMutex); + { unsigned const nbWorkers = cctxPool->totalCCtx; + size_t const poolSize = sizeof(*cctxPool) + + (nbWorkers-1) * sizeof(ZSTD_CCtx*); + unsigned u; + size_t totalCCtxSize = 0; + for (u=0; ucctx[u]); + } + ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex); + assert(nbWorkers > 0); + return poolSize + totalCCtxSize; + } +} + +static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* cctxPool) +{ + DEBUGLOG(5, "ZSTDMT_getCCtx"); + ZSTD_pthread_mutex_lock(&cctxPool->poolMutex); + if (cctxPool->availCCtx) { + cctxPool->availCCtx--; + { ZSTD_CCtx* const cctx = cctxPool->cctx[cctxPool->availCCtx]; + ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex); + return cctx; + } } + ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex); + DEBUGLOG(5, "create one more CCtx"); + return ZSTD_createCCtx_advanced(cctxPool->cMem); /* note : can be NULL, when creation fails ! */ +} + +static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return; /* compatibility with release on NULL */ + ZSTD_pthread_mutex_lock(&pool->poolMutex); + if (pool->availCCtx < pool->totalCCtx) + pool->cctx[pool->availCCtx++] = cctx; + else { + /* pool overflow : should not happen, since totalCCtx==nbWorkers */ + DEBUGLOG(4, "CCtx pool overflow : free cctx"); + ZSTD_freeCCtx(cctx); + } + ZSTD_pthread_mutex_unlock(&pool->poolMutex); +} + +/* ==== Serial State ==== */ + +typedef struct { + void const* start; + size_t size; +} range_t; + +typedef struct { + /* All variables in the struct are protected by mutex. */ + ZSTD_pthread_mutex_t mutex; + ZSTD_pthread_cond_t cond; + ZSTD_CCtx_params params; + ldmState_t ldmState; + XXH64_state_t xxhState; + unsigned nextJobID; + /* Protects ldmWindow. + * Must be acquired after the main mutex when acquiring both. + */ + ZSTD_pthread_mutex_t ldmWindowMutex; + ZSTD_pthread_cond_t ldmWindowCond; /* Signaled when ldmWindow is udpated */ + ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */ +} serialState_t; + +static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params, size_t jobSize) +{ + /* Adjust parameters */ + if (params.ldmParams.enableLdm) { + DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10); + ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); + assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); + assert(params.ldmParams.hashEveryLog < 32); + serialState->ldmState.hashPower = + ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength); + } else { + memset(¶ms.ldmParams, 0, sizeof(params.ldmParams)); + } + serialState->nextJobID = 0; + if (params.fParams.checksumFlag) + XXH64_reset(&serialState->xxhState, 0); + if (params.ldmParams.enableLdm) { + ZSTD_customMem cMem = params.customMem; + unsigned const hashLog = params.ldmParams.hashLog; + size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t); + unsigned const bucketLog = + params.ldmParams.hashLog - params.ldmParams.bucketSizeLog; + size_t const bucketSize = (size_t)1 << bucketLog; + unsigned const prevBucketLog = + serialState->params.ldmParams.hashLog - + serialState->params.ldmParams.bucketSizeLog; + /* Size the seq pool tables */ + ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize)); + /* Reset the window */ + ZSTD_window_clear(&serialState->ldmState.window); + serialState->ldmWindow = serialState->ldmState.window; + /* Resize tables and output space if necessary. */ + if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) { + ZSTD_free(serialState->ldmState.hashTable, cMem); + serialState->ldmState.hashTable = (ldmEntry_t*)ZSTD_malloc(hashSize, cMem); + } + if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) { + ZSTD_free(serialState->ldmState.bucketOffsets, cMem); + serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_malloc(bucketSize, cMem); + } + if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets) + return 1; + /* Zero the tables */ + memset(serialState->ldmState.hashTable, 0, hashSize); + memset(serialState->ldmState.bucketOffsets, 0, bucketSize); + } + serialState->params = params; + serialState->params.jobSize = (U32)jobSize; + return 0; +} + +static int ZSTDMT_serialState_init(serialState_t* serialState) +{ + int initError = 0; + memset(serialState, 0, sizeof(*serialState)); + initError |= ZSTD_pthread_mutex_init(&serialState->mutex, NULL); + initError |= ZSTD_pthread_cond_init(&serialState->cond, NULL); + initError |= ZSTD_pthread_mutex_init(&serialState->ldmWindowMutex, NULL); + initError |= ZSTD_pthread_cond_init(&serialState->ldmWindowCond, NULL); + return initError; +} + +static void ZSTDMT_serialState_free(serialState_t* serialState) +{ + ZSTD_customMem cMem = serialState->params.customMem; + ZSTD_pthread_mutex_destroy(&serialState->mutex); + ZSTD_pthread_cond_destroy(&serialState->cond); + ZSTD_pthread_mutex_destroy(&serialState->ldmWindowMutex); + ZSTD_pthread_cond_destroy(&serialState->ldmWindowCond); + ZSTD_free(serialState->ldmState.hashTable, cMem); + ZSTD_free(serialState->ldmState.bucketOffsets, cMem); +} + +static void ZSTDMT_serialState_update(serialState_t* serialState, + ZSTD_CCtx* jobCCtx, rawSeqStore_t seqStore, + range_t src, unsigned jobID) +{ + /* Wait for our turn */ + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex); + while (serialState->nextJobID < jobID) { + ZSTD_pthread_cond_wait(&serialState->cond, &serialState->mutex); + } + /* A future job may error and skip our job */ + if (serialState->nextJobID == jobID) { + /* It is now our turn, do any processing necessary */ + if (serialState->params.ldmParams.enableLdm) { + size_t error; + assert(seqStore.seq != NULL && seqStore.pos == 0 && + seqStore.size == 0 && seqStore.capacity > 0); + assert(src.size <= serialState->params.jobSize); + ZSTD_window_update(&serialState->ldmState.window, src.start, src.size); + error = ZSTD_ldm_generateSequences( + &serialState->ldmState, &seqStore, + &serialState->params.ldmParams, src.start, src.size); + /* We provide a large enough buffer to never fail. */ + assert(!ZSTD_isError(error)); (void)error; + /* Update ldmWindow to match the ldmState.window and signal the main + * thread if it is waiting for a buffer. + */ + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex); + serialState->ldmWindow = serialState->ldmState.window; + ZSTD_pthread_cond_signal(&serialState->ldmWindowCond); + ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex); + } + if (serialState->params.fParams.checksumFlag && src.size > 0) + XXH64_update(&serialState->xxhState, src.start, src.size); + } + /* Now it is the next jobs turn */ + serialState->nextJobID++; + ZSTD_pthread_cond_broadcast(&serialState->cond); + ZSTD_pthread_mutex_unlock(&serialState->mutex); + + if (seqStore.size > 0) { + size_t const err = ZSTD_referenceExternalSequences( + jobCCtx, seqStore.seq, seqStore.size); + assert(serialState->params.ldmParams.enableLdm); + assert(!ZSTD_isError(err)); + (void)err; + } +} + +static void ZSTDMT_serialState_ensureFinished(serialState_t* serialState, + unsigned jobID, size_t cSize) +{ + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex); + if (serialState->nextJobID <= jobID) { + assert(ZSTD_isError(cSize)); (void)cSize; + DEBUGLOG(5, "Skipping past job %u because of error", jobID); + serialState->nextJobID = jobID + 1; + ZSTD_pthread_cond_broadcast(&serialState->cond); + + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex); + ZSTD_window_clear(&serialState->ldmWindow); + ZSTD_pthread_cond_signal(&serialState->ldmWindowCond); + ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex); + } + ZSTD_pthread_mutex_unlock(&serialState->mutex); + +} + + +/* ------------------------------------------ */ +/* ===== Worker thread ===== */ +/* ------------------------------------------ */ + +static const range_t kNullRange = { NULL, 0 }; + +typedef struct { + size_t consumed; /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx */ + size_t cSize; /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx, then set0 by mtctx */ + ZSTD_pthread_mutex_t job_mutex; /* Thread-safe - used by mtctx and worker */ + ZSTD_pthread_cond_t job_cond; /* Thread-safe - used by mtctx and worker */ + ZSTDMT_CCtxPool* cctxPool; /* Thread-safe - used by mtctx and (all) workers */ + ZSTDMT_bufferPool* bufPool; /* Thread-safe - used by mtctx and (all) workers */ + ZSTDMT_seqPool* seqPool; /* Thread-safe - used by mtctx and (all) workers */ + serialState_t* serial; /* Thread-safe - used by mtctx and (all) workers */ + buffer_t dstBuff; /* set by worker (or mtctx), then read by worker & mtctx, then modified by mtctx => no barrier */ + range_t prefix; /* set by mtctx, then read by worker & mtctx => no barrier */ + range_t src; /* set by mtctx, then read by worker & mtctx => no barrier */ + unsigned jobID; /* set by mtctx, then read by worker => no barrier */ + unsigned firstJob; /* set by mtctx, then read by worker => no barrier */ + unsigned lastJob; /* set by mtctx, then read by worker => no barrier */ + ZSTD_CCtx_params params; /* set by mtctx, then read by worker => no barrier */ + const ZSTD_CDict* cdict; /* set by mtctx, then read by worker => no barrier */ + unsigned long long fullFrameSize; /* set by mtctx, then read by worker => no barrier */ + size_t dstFlushed; /* used only by mtctx */ + unsigned frameChecksumNeeded; /* used only by mtctx */ +} ZSTDMT_jobDescription; + +/* ZSTDMT_compressionJob() is a POOL_function type */ +void ZSTDMT_compressionJob(void* jobDescription) +{ + ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription; + ZSTD_CCtx_params jobParams = job->params; /* do not modify job->params ! copy it, modify the copy */ + ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(job->cctxPool); + rawSeqStore_t rawSeqStore = ZSTDMT_getSeq(job->seqPool); + buffer_t dstBuff = job->dstBuff; + + /* ressources */ + if (cctx==NULL) { + job->cSize = ERROR(memory_allocation); + goto _endJob; + } + if (dstBuff.start == NULL) { /* streaming job : doesn't provide a dstBuffer */ + dstBuff = ZSTDMT_getBuffer(job->bufPool); + if (dstBuff.start==NULL) { + job->cSize = ERROR(memory_allocation); + goto _endJob; + } + job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */ + } + if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL) { + job->cSize = ERROR(memory_allocation); + goto _endJob; + } + + /* Don't compute the checksum for chunks, since we compute it externally, + * but write it in the header. + */ + if (job->jobID != 0) jobParams.fParams.checksumFlag = 0; + /* Don't run LDM for the chunks, since we handle it externally */ + jobParams.ldmParams.enableLdm = 0; + + + /* init */ + if (job->cdict) { + size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize); + assert(job->firstJob); /* only allowed for first job */ + if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; } + } else { /* srcStart points at reloaded section */ + U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size; + { size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_p_forceMaxWindow, !job->firstJob); + if (ZSTD_isError(forceWindowError)) { + job->cSize = forceWindowError; + goto _endJob; + } } + { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, + job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */ + ZSTD_dtlm_fast, + NULL, /*cdict*/ + jobParams, pledgedSrcSize); + if (ZSTD_isError(initError)) { + job->cSize = initError; + goto _endJob; + } } } + + /* Perform serial step as early as possible, but after CCtx initialization */ + ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID); + + if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */ + size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0); + if (ZSTD_isError(hSize)) { job->cSize = hSize; /* save error code */ goto _endJob; } + DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize); + ZSTD_invalidateRepCodes(cctx); + } + + /* compress */ + { size_t const chunkSize = 4*ZSTD_BLOCKSIZE_MAX; + int const nbChunks = (int)((job->src.size + (chunkSize-1)) / chunkSize); + const BYTE* ip = (const BYTE*) job->src.start; + BYTE* const ostart = (BYTE*)dstBuff.start; + BYTE* op = ostart; + BYTE* oend = op + dstBuff.capacity; + int chunkNb; + if (sizeof(size_t) > sizeof(int)) assert(job->src.size < ((size_t)INT_MAX) * chunkSize); /* check overflow */ + DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks); + assert(job->cSize == 0); + for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) { + size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize); + if (ZSTD_isError(cSize)) { job->cSize = cSize; goto _endJob; } + ip += chunkSize; + op += cSize; assert(op < oend); + /* stats */ + ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); + job->cSize += cSize; + job->consumed = chunkSize * chunkNb; + DEBUGLOG(5, "ZSTDMT_compressionJob: compress new block : cSize==%u bytes (total: %u)", + (U32)cSize, (U32)job->cSize); + ZSTD_pthread_cond_signal(&job->job_cond); /* warns some more data is ready to be flushed */ + ZSTD_pthread_mutex_unlock(&job->job_mutex); + } + /* last block */ + assert(chunkSize > 0); assert((chunkSize & (chunkSize - 1)) == 0); /* chunkSize must be power of 2 for mask==(chunkSize-1) to work */ + if ((nbChunks > 0) | job->lastJob /*must output a "last block" flag*/ ) { + size_t const lastBlockSize1 = job->src.size & (chunkSize-1); + size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1; + size_t const cSize = (job->lastJob) ? + ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) : + ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize); + if (ZSTD_isError(cSize)) { job->cSize = cSize; goto _endJob; } + /* stats */ + ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); + job->cSize += cSize; + ZSTD_pthread_mutex_unlock(&job->job_mutex); + } } + +_endJob: + ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize); + if (job->prefix.size > 0) + DEBUGLOG(5, "Finished with prefix: %zx", (size_t)job->prefix.start); + DEBUGLOG(5, "Finished with source: %zx", (size_t)job->src.start); + /* release resources */ + ZSTDMT_releaseSeq(job->seqPool, rawSeqStore); + ZSTDMT_releaseCCtx(job->cctxPool, cctx); + /* report */ + ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); + job->consumed = job->src.size; + ZSTD_pthread_cond_signal(&job->job_cond); + ZSTD_pthread_mutex_unlock(&job->job_mutex); +} + + +/* ------------------------------------------ */ +/* ===== Multi-threaded compression ===== */ +/* ------------------------------------------ */ + +typedef struct { + range_t prefix; /* read-only non-owned prefix buffer */ + buffer_t buffer; + size_t filled; +} inBuff_t; + +typedef struct { + BYTE* buffer; /* The round input buffer. All jobs get references + * to pieces of the buffer. ZSTDMT_tryGetInputRange() + * handles handing out job input buffers, and makes + * sure it doesn't overlap with any pieces still in use. + */ + size_t capacity; /* The capacity of buffer. */ + size_t pos; /* The position of the current inBuff in the round + * buffer. Updated past the end if the inBuff once + * the inBuff is sent to the worker thread. + * pos <= capacity. + */ +} roundBuff_t; + +static const roundBuff_t kNullRoundBuff = {NULL, 0, 0}; + +struct ZSTDMT_CCtx_s { + POOL_ctx* factory; + ZSTDMT_jobDescription* jobs; + ZSTDMT_bufferPool* bufPool; + ZSTDMT_CCtxPool* cctxPool; + ZSTDMT_seqPool* seqPool; + ZSTD_CCtx_params params; + size_t targetSectionSize; + size_t targetPrefixSize; + int jobReady; /* 1 => one job is already prepared, but pool has shortage of workers. Don't create a new job. */ + inBuff_t inBuff; + roundBuff_t roundBuff; + serialState_t serial; + unsigned singleBlockingThread; + unsigned jobIDMask; + unsigned doneJobID; + unsigned nextJobID; + unsigned frameEnded; + unsigned allJobsCompleted; + unsigned long long frameContentSize; + unsigned long long consumed; + unsigned long long produced; + ZSTD_customMem cMem; + ZSTD_CDict* cdictLocal; + const ZSTD_CDict* cdict; +}; + +static void ZSTDMT_freeJobsTable(ZSTDMT_jobDescription* jobTable, U32 nbJobs, ZSTD_customMem cMem) +{ + U32 jobNb; + if (jobTable == NULL) return; + for (jobNb=0; jobNb mtctx->jobIDMask+1) { /* need more job capacity */ + ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem); + mtctx->jobIDMask = 0; + mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, mtctx->cMem); + if (mtctx->jobs==NULL) return ERROR(memory_allocation); + assert((nbJobs != 0) && ((nbJobs & (nbJobs - 1)) == 0)); /* ensure nbJobs is a power of 2 */ + mtctx->jobIDMask = nbJobs - 1; + } + return 0; +} + + +/* ZSTDMT_CCtxParam_setNbWorkers(): + * Internal use only */ +size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers) +{ + if (nbWorkers > ZSTDMT_NBWORKERS_MAX) nbWorkers = ZSTDMT_NBWORKERS_MAX; + params->nbWorkers = nbWorkers; + params->overlapSizeLog = ZSTDMT_OVERLAPLOG_DEFAULT; + params->jobSize = 0; + return nbWorkers; +} + +ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem) +{ + ZSTDMT_CCtx* mtctx; + U32 nbJobs = nbWorkers + 2; + int initError; + DEBUGLOG(3, "ZSTDMT_createCCtx_advanced (nbWorkers = %u)", nbWorkers); + + if (nbWorkers < 1) return NULL; + nbWorkers = MIN(nbWorkers , ZSTDMT_NBWORKERS_MAX); + if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL)) + /* invalid custom allocator */ + return NULL; + + mtctx = (ZSTDMT_CCtx*) ZSTD_calloc(sizeof(ZSTDMT_CCtx), cMem); + if (!mtctx) return NULL; + ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers); + mtctx->cMem = cMem; + mtctx->allJobsCompleted = 1; + mtctx->factory = POOL_create_advanced(nbWorkers, 0, cMem); + mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem); + assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0); /* ensure nbJobs is a power of 2 */ + mtctx->jobIDMask = nbJobs - 1; + mtctx->bufPool = ZSTDMT_createBufferPool(nbWorkers, cMem); + mtctx->cctxPool = ZSTDMT_createCCtxPool(nbWorkers, cMem); + mtctx->seqPool = ZSTDMT_createSeqPool(nbWorkers, cMem); + initError = ZSTDMT_serialState_init(&mtctx->serial); + mtctx->roundBuff = kNullRoundBuff; + if (!mtctx->factory | !mtctx->jobs | !mtctx->bufPool | !mtctx->cctxPool | !mtctx->seqPool | initError) { + ZSTDMT_freeCCtx(mtctx); + return NULL; + } + DEBUGLOG(3, "mt_cctx created, for %u threads", nbWorkers); + return mtctx; +} + +ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers) +{ + return ZSTDMT_createCCtx_advanced(nbWorkers, ZSTD_defaultCMem); +} + + +/* ZSTDMT_releaseAllJobResources() : + * note : ensure all workers are killed first ! */ +static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx) +{ + unsigned jobID; + DEBUGLOG(3, "ZSTDMT_releaseAllJobResources"); + for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) { + DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start); + ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff); + mtctx->jobs[jobID].dstBuff = g_nullBuffer; + mtctx->jobs[jobID].cSize = 0; + } + memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription)); + mtctx->inBuff.buffer = g_nullBuffer; + mtctx->inBuff.filled = 0; + mtctx->allJobsCompleted = 1; +} + +static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* mtctx) +{ + DEBUGLOG(4, "ZSTDMT_waitForAllJobsCompleted"); + while (mtctx->doneJobID < mtctx->nextJobID) { + unsigned const jobID = mtctx->doneJobID & mtctx->jobIDMask; + ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex); + while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) { + DEBUGLOG(5, "waiting for jobCompleted signal from job %u", mtctx->doneJobID); /* we want to block when waiting for data to flush */ + ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex); + } + ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex); + mtctx->doneJobID++; + } +} + +size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx) +{ + if (mtctx==NULL) return 0; /* compatible with free on NULL */ + POOL_free(mtctx->factory); /* stop and free worker threads */ + ZSTDMT_releaseAllJobResources(mtctx); /* release job resources into pools first */ + ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem); + ZSTDMT_freeBufferPool(mtctx->bufPool); + ZSTDMT_freeCCtxPool(mtctx->cctxPool); + ZSTDMT_freeSeqPool(mtctx->seqPool); + ZSTDMT_serialState_free(&mtctx->serial); + ZSTD_freeCDict(mtctx->cdictLocal); + if (mtctx->roundBuff.buffer) + ZSTD_free(mtctx->roundBuff.buffer, mtctx->cMem); + ZSTD_free(mtctx, mtctx->cMem); + return 0; +} + +size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx) +{ + if (mtctx == NULL) return 0; /* supports sizeof NULL */ + return sizeof(*mtctx) + + POOL_sizeof(mtctx->factory) + + ZSTDMT_sizeof_bufferPool(mtctx->bufPool) + + (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription) + + ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool) + + ZSTDMT_sizeof_seqPool(mtctx->seqPool) + + ZSTD_sizeof_CDict(mtctx->cdictLocal) + + mtctx->roundBuff.capacity; +} + +/* Internal only */ +size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, + ZSTDMT_parameter parameter, unsigned value) { + DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter"); + switch(parameter) + { + case ZSTDMT_p_jobSize : + DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %u", value); + if ( (value > 0) /* value==0 => automatic job size */ + & (value < ZSTDMT_JOBSIZE_MIN) ) + value = ZSTDMT_JOBSIZE_MIN; + if (value > ZSTDMT_JOBSIZE_MAX) + value = ZSTDMT_JOBSIZE_MAX; + params->jobSize = value; + return value; + case ZSTDMT_p_overlapSectionLog : + if (value > 9) value = 9; + DEBUGLOG(4, "ZSTDMT_p_overlapSectionLog : %u", value); + params->overlapSizeLog = (value >= 9) ? 9 : value; + return value; + default : + return ERROR(parameter_unsupported); + } +} + +size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value) +{ + DEBUGLOG(4, "ZSTDMT_setMTCtxParameter"); + switch(parameter) + { + case ZSTDMT_p_jobSize : + return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value); + case ZSTDMT_p_overlapSectionLog : + return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value); + default : + return ERROR(parameter_unsupported); + } +} + +size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned* value) +{ + switch (parameter) { + case ZSTDMT_p_jobSize: + *value = mtctx->params.jobSize; + break; + case ZSTDMT_p_overlapSectionLog: + *value = mtctx->params.overlapSizeLog; + break; + default: + return ERROR(parameter_unsupported); + } + return 0; +} + +/* Sets parameters relevant to the compression job, + * initializing others to default values. */ +static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params) +{ + ZSTD_CCtx_params jobParams; + memset(&jobParams, 0, sizeof(jobParams)); + + jobParams.cParams = params.cParams; + jobParams.fParams = params.fParams; + jobParams.compressionLevel = params.compressionLevel; + + return jobParams; +} + + +/* ZSTDMT_resize() : + * @return : error code if fails, 0 on success */ +static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers) +{ + if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation); + CHECK_F( ZSTDMT_expandJobsTable(mtctx, nbWorkers) ); + mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers); + if (mtctx->bufPool == NULL) return ERROR(memory_allocation); + mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers); + if (mtctx->cctxPool == NULL) return ERROR(memory_allocation); + mtctx->seqPool = ZSTDMT_expandSeqPool(mtctx->seqPool, nbWorkers); + if (mtctx->seqPool == NULL) return ERROR(memory_allocation); + ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers); + return 0; +} + + +/*! ZSTDMT_updateCParams_whileCompressing() : + * Updates only a selected set of compression parameters, to remain compatible with current frame. + * New parameters will be applied to next compression job. */ +void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams) +{ + U32 const saved_wlog = mtctx->params.cParams.windowLog; /* Do not modify windowLog while compressing */ + int const compressionLevel = cctxParams->compressionLevel; + DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)", + compressionLevel); + mtctx->params.compressionLevel = compressionLevel; + { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, 0, 0); + cParams.windowLog = saved_wlog; + mtctx->params.cParams = cParams; + } +} + +/* ZSTDMT_getFrameProgression(): + * tells how much data has been consumed (input) and produced (output) for current frame. + * able to count progression inside worker threads. + * Note : mutex will be acquired during statistics collection. */ +ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx) +{ + ZSTD_frameProgression fps; + DEBUGLOG(6, "ZSTDMT_getFrameProgression"); + fps.consumed = mtctx->consumed; + fps.produced = mtctx->produced; + fps.ingested = mtctx->consumed + mtctx->inBuff.filled; + { unsigned jobNb; + unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1); + DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)", + mtctx->doneJobID, lastJobNb, mtctx->jobReady) + for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) { + unsigned const wJobID = jobNb & mtctx->jobIDMask; + ZSTD_pthread_mutex_lock(&mtctx->jobs[wJobID].job_mutex); + { size_t const cResult = mtctx->jobs[wJobID].cSize; + size_t const produced = ZSTD_isError(cResult) ? 0 : cResult; + fps.consumed += mtctx->jobs[wJobID].consumed; + fps.ingested += mtctx->jobs[wJobID].src.size; + fps.produced += produced; + } + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + } + } + return fps; +} + + +/* ------------------------------------------ */ +/* ===== Multi-threaded compression ===== */ +/* ------------------------------------------ */ + +static size_t ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params) +{ + if (params.ldmParams.enableLdm) + return MAX(21, params.cParams.chainLog + 4); + return MAX(20, params.cParams.windowLog + 2); +} + +static size_t ZSTDMT_computeOverlapLog(ZSTD_CCtx_params const params) +{ + unsigned const overlapRLog = (params.overlapSizeLog>9) ? 0 : 9-params.overlapSizeLog; + if (params.ldmParams.enableLdm) + return (MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) - overlapRLog); + return overlapRLog >= 9 ? 0 : (params.cParams.windowLog - overlapRLog); +} + +static unsigned ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers) { + assert(nbWorkers>0); + { size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params); + size_t const jobMaxSize = jobSizeTarget << 2; + size_t const passSizeMax = jobMaxSize * nbWorkers; + unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1; + unsigned const nbJobsLarge = multiplier * nbWorkers; + unsigned const nbJobsMax = (unsigned)(srcSize / jobSizeTarget) + 1; + unsigned const nbJobsSmall = MIN(nbJobsMax, nbWorkers); + return (multiplier>1) ? nbJobsLarge : nbJobsSmall; +} } + +/* ZSTDMT_compress_advanced_internal() : + * This is a blocking function : it will only give back control to caller after finishing its compression job. + */ +static size_t ZSTDMT_compress_advanced_internal( + ZSTDMT_CCtx* mtctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params) +{ + ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params); + size_t const overlapSize = (size_t)1 << ZSTDMT_computeOverlapLog(params); + unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers); + size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs; + size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */ + const char* const srcStart = (const char*)src; + size_t remainingSrcSize = srcSize; + unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbJobs : (unsigned)(dstCapacity / ZSTD_compressBound(avgJobSize)); /* presumes avgJobSize >= 256 KB, which should be the case */ + size_t frameStartPos = 0, dstBufferPos = 0; + assert(jobParams.nbWorkers == 0); + assert(mtctx->cctxPool->totalCCtx == params.nbWorkers); + + params.jobSize = (U32)avgJobSize; + DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: nbJobs=%2u (rawSize=%u bytes; fixedSize=%u) ", + nbJobs, (U32)proposedJobSize, (U32)avgJobSize); + + if ((nbJobs==1) | (params.nbWorkers<=1)) { /* fallback to single-thread mode : this is a blocking invocation anyway */ + ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0]; + DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode"); + if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams); + return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams); + } + + assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */ + ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) ); + if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize)) + return ERROR(memory_allocation); + + CHECK_F( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */ + + { unsigned u; + for (u=0; ujobs[u].prefix.start = srcStart + frameStartPos - dictSize; + mtctx->jobs[u].prefix.size = dictSize; + mtctx->jobs[u].src.start = srcStart + frameStartPos; + mtctx->jobs[u].src.size = jobSize; assert(jobSize > 0); /* avoid job.src.size == 0 */ + mtctx->jobs[u].consumed = 0; + mtctx->jobs[u].cSize = 0; + mtctx->jobs[u].cdict = (u==0) ? cdict : NULL; + mtctx->jobs[u].fullFrameSize = srcSize; + mtctx->jobs[u].params = jobParams; + /* do not calculate checksum within sections, but write it in header for first section */ + mtctx->jobs[u].dstBuff = dstBuffer; + mtctx->jobs[u].cctxPool = mtctx->cctxPool; + mtctx->jobs[u].bufPool = mtctx->bufPool; + mtctx->jobs[u].seqPool = mtctx->seqPool; + mtctx->jobs[u].serial = &mtctx->serial; + mtctx->jobs[u].jobID = u; + mtctx->jobs[u].firstJob = (u==0); + mtctx->jobs[u].lastJob = (u==nbJobs-1); + + DEBUGLOG(5, "ZSTDMT_compress_advanced_internal: posting job %u (%u bytes)", u, (U32)jobSize); + DEBUG_PRINTHEX(6, mtctx->jobs[u].prefix.start, 12); + POOL_add(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[u]); + + frameStartPos += jobSize; + dstBufferPos += dstBufferCapacity; + remainingSrcSize -= jobSize; + } } + + /* collect result */ + { size_t error = 0, dstPos = 0; + unsigned jobID; + for (jobID=0; jobIDjobs[jobID].job_mutex); + while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) { + DEBUGLOG(5, "waiting for jobCompleted signal from job %u", jobID); + ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex); + } + ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex); + DEBUGLOG(5, "ready to write job %u ", jobID); + + { size_t const cSize = mtctx->jobs[jobID].cSize; + if (ZSTD_isError(cSize)) error = cSize; + if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall); + if (jobID) { /* note : job 0 is written directly at dst, which is correct position */ + if (!error) + memmove((char*)dst + dstPos, mtctx->jobs[jobID].dstBuff.start, cSize); /* may overlap when job compressed within dst */ + if (jobID >= compressWithinDst) { /* job compressed into its own buffer, which must be released */ + DEBUGLOG(5, "releasing buffer %u>=%u", jobID, compressWithinDst); + ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff); + } } + mtctx->jobs[jobID].dstBuff = g_nullBuffer; + mtctx->jobs[jobID].cSize = 0; + dstPos += cSize ; + } + } /* for (jobID=0; jobIDserial.xxhState); + if (dstPos + 4 > dstCapacity) { + error = ERROR(dstSize_tooSmall); + } else { + DEBUGLOG(4, "writing checksum : %08X \n", checksum); + MEM_writeLE32((char*)dst + dstPos, checksum); + dstPos += 4; + } } + + if (!error) DEBUGLOG(4, "compressed size : %u ", (U32)dstPos); + return error ? error : dstPos; + } +} + +size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, + ZSTD_parameters params, + unsigned overlapLog) +{ + ZSTD_CCtx_params cctxParams = mtctx->params; + cctxParams.cParams = params.cParams; + cctxParams.fParams = params.fParams; + cctxParams.overlapSizeLog = overlapLog; + return ZSTDMT_compress_advanced_internal(mtctx, + dst, dstCapacity, + src, srcSize, + cdict, cctxParams); +} + + +size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel) +{ + U32 const overlapLog = (compressionLevel >= ZSTD_maxCLevel()) ? 9 : ZSTDMT_OVERLAPLOG_DEFAULT; + ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0); + params.fParams.contentSizeFlag = 1; + return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapLog); +} + + +/* ====================================== */ +/* ======= Streaming API ======= */ +/* ====================================== */ + +size_t ZSTDMT_initCStream_internal( + ZSTDMT_CCtx* mtctx, + const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, + const ZSTD_CDict* cdict, ZSTD_CCtx_params params, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u)", + (U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx); + + /* params supposed partially fully validated at this point */ + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + + /* init */ + if (params.nbWorkers != mtctx->params.nbWorkers) + CHECK_F( ZSTDMT_resize(mtctx, params.nbWorkers) ); + + if (params.jobSize > 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN; + if (params.jobSize > ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX; + + mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */ + if (mtctx->singleBlockingThread) { + ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params); + DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode"); + assert(singleThreadParams.nbWorkers == 0); + return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0], + dict, dictSize, cdict, + singleThreadParams, pledgedSrcSize); + } + + DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers); + + if (mtctx->allJobsCompleted == 0) { /* previous compression not correctly finished */ + ZSTDMT_waitForAllJobsCompleted(mtctx); + ZSTDMT_releaseAllJobResources(mtctx); + mtctx->allJobsCompleted = 1; + } + + mtctx->params = params; + mtctx->frameContentSize = pledgedSrcSize; + if (dict) { + ZSTD_freeCDict(mtctx->cdictLocal); + mtctx->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byCopy, dictContentType, /* note : a loadPrefix becomes an internal CDict */ + params.cParams, mtctx->cMem); + mtctx->cdict = mtctx->cdictLocal; + if (mtctx->cdictLocal == NULL) return ERROR(memory_allocation); + } else { + ZSTD_freeCDict(mtctx->cdictLocal); + mtctx->cdictLocal = NULL; + mtctx->cdict = cdict; + } + + mtctx->targetPrefixSize = (size_t)1 << ZSTDMT_computeOverlapLog(params); + DEBUGLOG(4, "overlapLog=%u => %u KB", params.overlapSizeLog, (U32)(mtctx->targetPrefixSize>>10)); + mtctx->targetSectionSize = params.jobSize; + if (mtctx->targetSectionSize == 0) { + mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params); + } + if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; /* job size must be >= overlap size */ + DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), params.jobSize); + DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10)); + ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize)); + { + /* If ldm is enabled we need windowSize space. */ + size_t const windowSize = mtctx->params.ldmParams.enableLdm ? (1U << mtctx->params.cParams.windowLog) : 0; + /* Two buffers of slack, plus extra space for the overlap + * This is the minimum slack that LDM works with. One extra because + * flush might waste up to targetSectionSize-1 bytes. Another extra + * for the overlap (if > 0), then one to fill which doesn't overlap + * with the LDM window. + */ + size_t const nbSlackBuffers = 2 + (mtctx->targetPrefixSize > 0); + size_t const slackSize = mtctx->targetSectionSize * nbSlackBuffers; + /* Compute the total size, and always have enough slack */ + size_t const nbWorkers = MAX(mtctx->params.nbWorkers, 1); + size_t const sectionsSize = mtctx->targetSectionSize * nbWorkers; + size_t const capacity = MAX(windowSize, sectionsSize) + slackSize; + if (mtctx->roundBuff.capacity < capacity) { + if (mtctx->roundBuff.buffer) + ZSTD_free(mtctx->roundBuff.buffer, mtctx->cMem); + mtctx->roundBuff.buffer = (BYTE*)ZSTD_malloc(capacity, mtctx->cMem); + if (mtctx->roundBuff.buffer == NULL) { + mtctx->roundBuff.capacity = 0; + return ERROR(memory_allocation); + } + mtctx->roundBuff.capacity = capacity; + } + } + DEBUGLOG(4, "roundBuff capacity : %u KB", (U32)(mtctx->roundBuff.capacity>>10)); + mtctx->roundBuff.pos = 0; + mtctx->inBuff.buffer = g_nullBuffer; + mtctx->inBuff.filled = 0; + mtctx->inBuff.prefix = kNullRange; + mtctx->doneJobID = 0; + mtctx->nextJobID = 0; + mtctx->frameEnded = 0; + mtctx->allJobsCompleted = 0; + mtctx->consumed = 0; + mtctx->produced = 0; + if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize)) + return ERROR(memory_allocation); + return 0; +} + +size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, + const void* dict, size_t dictSize, + ZSTD_parameters params, + unsigned long long pledgedSrcSize) +{ + ZSTD_CCtx_params cctxParams = mtctx->params; /* retrieve sticky params */ + DEBUGLOG(4, "ZSTDMT_initCStream_advanced (pledgedSrcSize=%u)", (U32)pledgedSrcSize); + cctxParams.cParams = params.cParams; + cctxParams.fParams = params.fParams; + return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, ZSTD_dct_auto, NULL, + cctxParams, pledgedSrcSize); +} + +size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize) +{ + ZSTD_CCtx_params cctxParams = mtctx->params; + if (cdict==NULL) return ERROR(dictionary_wrong); /* method incompatible with NULL cdict */ + cctxParams.cParams = ZSTD_getCParamsFromCDict(cdict); + cctxParams.fParams = fParams; + return ZSTDMT_initCStream_internal(mtctx, NULL, 0 /*dictSize*/, ZSTD_dct_auto, cdict, + cctxParams, pledgedSrcSize); +} + + +/* ZSTDMT_resetCStream() : + * pledgedSrcSize can be zero == unknown (for the time being) + * prefer using ZSTD_CONTENTSIZE_UNKNOWN, + * as `0` might mean "empty" in the future */ +size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize) +{ + if (!pledgedSrcSize) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTDMT_initCStream_internal(mtctx, NULL, 0, ZSTD_dct_auto, 0, mtctx->params, + pledgedSrcSize); +} + +size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel) { + ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0); + ZSTD_CCtx_params cctxParams = mtctx->params; /* retrieve sticky params */ + DEBUGLOG(4, "ZSTDMT_initCStream (cLevel=%i)", compressionLevel); + cctxParams.cParams = params.cParams; + cctxParams.fParams = params.fParams; + return ZSTDMT_initCStream_internal(mtctx, NULL, 0, ZSTD_dct_auto, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN); +} + + +/* ZSTDMT_writeLastEmptyBlock() + * Write a single empty block with an end-of-frame to finish a frame. + * Job must be created from streaming variant. + * This function is always successfull if expected conditions are fulfilled. + */ +static void ZSTDMT_writeLastEmptyBlock(ZSTDMT_jobDescription* job) +{ + assert(job->lastJob == 1); + assert(job->src.size == 0); /* last job is empty -> will be simplified into a last empty block */ + assert(job->firstJob == 0); /* cannot be first job, as it also needs to create frame header */ + assert(job->dstBuff.start == NULL); /* invoked from streaming variant only (otherwise, dstBuff might be user's output) */ + job->dstBuff = ZSTDMT_getBuffer(job->bufPool); + if (job->dstBuff.start == NULL) { + job->cSize = ERROR(memory_allocation); + return; + } + assert(job->dstBuff.capacity >= ZSTD_blockHeaderSize); /* no buffer should ever be that small */ + job->src = kNullRange; + job->cSize = ZSTD_writeLastEmptyBlock(job->dstBuff.start, job->dstBuff.capacity); + assert(!ZSTD_isError(job->cSize)); + assert(job->consumed == 0); +} + +static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZSTD_EndDirective endOp) +{ + unsigned const jobID = mtctx->nextJobID & mtctx->jobIDMask; + int const endFrame = (endOp == ZSTD_e_end); + + if (mtctx->nextJobID > mtctx->doneJobID + mtctx->jobIDMask) { + DEBUGLOG(5, "ZSTDMT_createCompressionJob: will not create new job : table is full"); + assert((mtctx->nextJobID & mtctx->jobIDMask) == (mtctx->doneJobID & mtctx->jobIDMask)); + return 0; + } + + if (!mtctx->jobReady) { + BYTE const* src = (BYTE const*)mtctx->inBuff.buffer.start; + DEBUGLOG(5, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ", + mtctx->nextJobID, (U32)srcSize, (U32)mtctx->inBuff.prefix.size); + mtctx->jobs[jobID].src.start = src; + mtctx->jobs[jobID].src.size = srcSize; + assert(mtctx->inBuff.filled >= srcSize); + mtctx->jobs[jobID].prefix = mtctx->inBuff.prefix; + mtctx->jobs[jobID].consumed = 0; + mtctx->jobs[jobID].cSize = 0; + mtctx->jobs[jobID].params = mtctx->params; + mtctx->jobs[jobID].cdict = mtctx->nextJobID==0 ? mtctx->cdict : NULL; + mtctx->jobs[jobID].fullFrameSize = mtctx->frameContentSize; + mtctx->jobs[jobID].dstBuff = g_nullBuffer; + mtctx->jobs[jobID].cctxPool = mtctx->cctxPool; + mtctx->jobs[jobID].bufPool = mtctx->bufPool; + mtctx->jobs[jobID].seqPool = mtctx->seqPool; + mtctx->jobs[jobID].serial = &mtctx->serial; + mtctx->jobs[jobID].jobID = mtctx->nextJobID; + mtctx->jobs[jobID].firstJob = (mtctx->nextJobID==0); + mtctx->jobs[jobID].lastJob = endFrame; + mtctx->jobs[jobID].frameChecksumNeeded = endFrame && (mtctx->nextJobID>0) && mtctx->params.fParams.checksumFlag; + mtctx->jobs[jobID].dstFlushed = 0; + + /* Update the round buffer pos and clear the input buffer to be reset */ + mtctx->roundBuff.pos += srcSize; + mtctx->inBuff.buffer = g_nullBuffer; + mtctx->inBuff.filled = 0; + /* Set the prefix */ + if (!endFrame) { + size_t const newPrefixSize = MIN(srcSize, mtctx->targetPrefixSize); + mtctx->inBuff.prefix.start = src + srcSize - newPrefixSize; + mtctx->inBuff.prefix.size = newPrefixSize; + } else { /* endFrame==1 => no need for another input buffer */ + mtctx->inBuff.prefix = kNullRange; + mtctx->frameEnded = endFrame; + if (mtctx->nextJobID == 0) { + /* single job exception : checksum is already calculated directly within worker thread */ + mtctx->params.fParams.checksumFlag = 0; + } } + + if ( (srcSize == 0) + && (mtctx->nextJobID>0)/*single job must also write frame header*/ ) { + DEBUGLOG(5, "ZSTDMT_createCompressionJob: creating a last empty block to end frame"); + assert(endOp == ZSTD_e_end); /* only possible case : need to end the frame with an empty last block */ + ZSTDMT_writeLastEmptyBlock(mtctx->jobs + jobID); + mtctx->nextJobID++; + return 0; + } + } + + DEBUGLOG(5, "ZSTDMT_createCompressionJob: posting job %u : %u bytes (end:%u, jobNb == %u (mod:%u))", + mtctx->nextJobID, + (U32)mtctx->jobs[jobID].src.size, + mtctx->jobs[jobID].lastJob, + mtctx->nextJobID, + jobID); + if (POOL_tryAdd(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[jobID])) { + mtctx->nextJobID++; + mtctx->jobReady = 0; + } else { + DEBUGLOG(5, "ZSTDMT_createCompressionJob: no worker available for job %u", mtctx->nextJobID); + mtctx->jobReady = 1; + } + return 0; +} + + +/*! ZSTDMT_flushProduced() : + * `output` : `pos` will be updated with amount of data flushed . + * `blockToFlush` : if >0, the function will block and wait if there is no data available to flush . + * @return : amount of data remaining within internal buffer, 0 if no more, 1 if unknown but > 0, or an error code */ +static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, unsigned blockToFlush, ZSTD_EndDirective end) +{ + unsigned const wJobID = mtctx->doneJobID & mtctx->jobIDMask; + DEBUGLOG(5, "ZSTDMT_flushProduced (blocking:%u , job %u <= %u)", + blockToFlush, mtctx->doneJobID, mtctx->nextJobID); + assert(output->size >= output->pos); + + ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex); + if ( blockToFlush + && (mtctx->doneJobID < mtctx->nextJobID) ) { + assert(mtctx->jobs[wJobID].dstFlushed <= mtctx->jobs[wJobID].cSize); + while (mtctx->jobs[wJobID].dstFlushed == mtctx->jobs[wJobID].cSize) { /* nothing to flush */ + if (mtctx->jobs[wJobID].consumed == mtctx->jobs[wJobID].src.size) { + DEBUGLOG(5, "job %u is completely consumed (%u == %u) => don't wait for cond, there will be none", + mtctx->doneJobID, (U32)mtctx->jobs[wJobID].consumed, (U32)mtctx->jobs[wJobID].src.size); + break; + } + DEBUGLOG(5, "waiting for something to flush from job %u (currently flushed: %u bytes)", + mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed); + ZSTD_pthread_cond_wait(&mtctx->jobs[wJobID].job_cond, &mtctx->jobs[wJobID].job_mutex); /* block when nothing to flush but some to come */ + } } + + /* try to flush something */ + { size_t cSize = mtctx->jobs[wJobID].cSize; /* shared */ + size_t const srcConsumed = mtctx->jobs[wJobID].consumed; /* shared */ + size_t const srcSize = mtctx->jobs[wJobID].src.size; /* read-only, could be done after mutex lock, but no-declaration-after-statement */ + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + if (ZSTD_isError(cSize)) { + DEBUGLOG(5, "ZSTDMT_flushProduced: job %u : compression error detected : %s", + mtctx->doneJobID, ZSTD_getErrorName(cSize)); + ZSTDMT_waitForAllJobsCompleted(mtctx); + ZSTDMT_releaseAllJobResources(mtctx); + return cSize; + } + /* add frame checksum if necessary (can only happen once) */ + assert(srcConsumed <= srcSize); + if ( (srcConsumed == srcSize) /* job completed -> worker no longer active */ + && mtctx->jobs[wJobID].frameChecksumNeeded ) { + U32 const checksum = (U32)XXH64_digest(&mtctx->serial.xxhState); + DEBUGLOG(4, "ZSTDMT_flushProduced: writing checksum : %08X \n", checksum); + MEM_writeLE32((char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].cSize, checksum); + cSize += 4; + mtctx->jobs[wJobID].cSize += 4; /* can write this shared value, as worker is no longer active */ + mtctx->jobs[wJobID].frameChecksumNeeded = 0; + } + if (cSize > 0) { /* compression is ongoing or completed */ + size_t const toFlush = MIN(cSize - mtctx->jobs[wJobID].dstFlushed, output->size - output->pos); + DEBUGLOG(5, "ZSTDMT_flushProduced: Flushing %u bytes from job %u (completion:%u/%u, generated:%u)", + (U32)toFlush, mtctx->doneJobID, (U32)srcConsumed, (U32)srcSize, (U32)cSize); + assert(mtctx->doneJobID < mtctx->nextJobID); + assert(cSize >= mtctx->jobs[wJobID].dstFlushed); + assert(mtctx->jobs[wJobID].dstBuff.start != NULL); + memcpy((char*)output->dst + output->pos, + (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed, + toFlush); + output->pos += toFlush; + mtctx->jobs[wJobID].dstFlushed += toFlush; /* can write : this value is only used by mtctx */ + + if ( (srcConsumed == srcSize) /* job completed */ + && (mtctx->jobs[wJobID].dstFlushed == cSize) ) { /* output buffer fully flushed => free this job position */ + DEBUGLOG(5, "Job %u completed (%u bytes), moving to next one", + mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed); + ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[wJobID].dstBuff); + mtctx->jobs[wJobID].dstBuff = g_nullBuffer; + mtctx->jobs[wJobID].cSize = 0; /* ensure this job slot is considered "not started" in future check */ + mtctx->consumed += srcSize; + mtctx->produced += cSize; + mtctx->doneJobID++; + } } + + /* return value : how many bytes left in buffer ; fake it to 1 when unknown but >0 */ + if (cSize > mtctx->jobs[wJobID].dstFlushed) return (cSize - mtctx->jobs[wJobID].dstFlushed); + if (srcSize > srcConsumed) return 1; /* current job not completely compressed */ + } + if (mtctx->doneJobID < mtctx->nextJobID) return 1; /* some more jobs ongoing */ + if (mtctx->jobReady) return 1; /* one job is ready to push, just not yet in the list */ + if (mtctx->inBuff.filled > 0) return 1; /* input is not empty, and still needs to be converted into a job */ + mtctx->allJobsCompleted = mtctx->frameEnded; /* all jobs are entirely flushed => if this one is last one, frame is completed */ + if (end == ZSTD_e_end) return !mtctx->frameEnded; /* for ZSTD_e_end, question becomes : is frame completed ? instead of : are internal buffers fully flushed ? */ + return 0; /* internal buffers fully flushed */ +} + +/** + * Returns the range of data used by the earliest job that is not yet complete. + * If the data of the first job is broken up into two segments, we cover both + * sections. + */ +static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx) +{ + unsigned const firstJobID = mtctx->doneJobID; + unsigned const lastJobID = mtctx->nextJobID; + unsigned jobID; + + for (jobID = firstJobID; jobID < lastJobID; ++jobID) { + unsigned const wJobID = jobID & mtctx->jobIDMask; + size_t consumed; + + ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex); + consumed = mtctx->jobs[wJobID].consumed; + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + + if (consumed < mtctx->jobs[wJobID].src.size) { + range_t range = mtctx->jobs[wJobID].prefix; + if (range.size == 0) { + /* Empty prefix */ + range = mtctx->jobs[wJobID].src; + } + /* Job source in multiple segments not supported yet */ + assert(range.start <= mtctx->jobs[wJobID].src.start); + return range; + } + } + return kNullRange; +} + +/** + * Returns non-zero iff buffer and range overlap. + */ +static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range) +{ + BYTE const* const bufferStart = (BYTE const*)buffer.start; + BYTE const* const bufferEnd = bufferStart + buffer.capacity; + BYTE const* const rangeStart = (BYTE const*)range.start; + BYTE const* const rangeEnd = rangeStart + range.size; + + if (rangeStart == NULL || bufferStart == NULL) + return 0; + /* Empty ranges cannot overlap */ + if (bufferStart == bufferEnd || rangeStart == rangeEnd) + return 0; + + return bufferStart < rangeEnd && rangeStart < bufferEnd; +} + +static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window) +{ + range_t extDict; + range_t prefix; + + extDict.start = window.dictBase + window.lowLimit; + extDict.size = window.dictLimit - window.lowLimit; + + prefix.start = window.base + window.dictLimit; + prefix.size = window.nextSrc - (window.base + window.dictLimit); + DEBUGLOG(5, "extDict [0x%zx, 0x%zx)", + (size_t)extDict.start, + (size_t)extDict.start + extDict.size); + DEBUGLOG(5, "prefix [0x%zx, 0x%zx)", + (size_t)prefix.start, + (size_t)prefix.start + prefix.size); + + return ZSTDMT_isOverlapped(buffer, extDict) + || ZSTDMT_isOverlapped(buffer, prefix); +} + +static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer) +{ + if (mtctx->params.ldmParams.enableLdm) { + ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex; + DEBUGLOG(5, "source [0x%zx, 0x%zx)", + (size_t)buffer.start, + (size_t)buffer.start + buffer.capacity); + ZSTD_PTHREAD_MUTEX_LOCK(mutex); + while (ZSTDMT_doesOverlapWindow(buffer, mtctx->serial.ldmWindow)) { + DEBUGLOG(6, "Waiting for LDM to finish..."); + ZSTD_pthread_cond_wait(&mtctx->serial.ldmWindowCond, mutex); + } + DEBUGLOG(6, "Done waiting for LDM to finish"); + ZSTD_pthread_mutex_unlock(mutex); + } +} + +/** + * Attempts to set the inBuff to the next section to fill. + * If any part of the new section is still in use we give up. + * Returns non-zero if the buffer is filled. + */ +static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx) +{ + range_t const inUse = ZSTDMT_getInputDataInUse(mtctx); + size_t const spaceLeft = mtctx->roundBuff.capacity - mtctx->roundBuff.pos; + size_t const target = mtctx->targetSectionSize; + buffer_t buffer; + + assert(mtctx->inBuff.buffer.start == NULL); + assert(mtctx->roundBuff.capacity >= target); + + if (spaceLeft < target) { + /* ZSTD_invalidateRepCodes() doesn't work for extDict variants. + * Simply copy the prefix to the beginning in that case. + */ + BYTE* const start = (BYTE*)mtctx->roundBuff.buffer; + size_t const prefixSize = mtctx->inBuff.prefix.size; + + buffer.start = start; + buffer.capacity = prefixSize; + if (ZSTDMT_isOverlapped(buffer, inUse)) { + DEBUGLOG(6, "Waiting for buffer..."); + return 0; + } + ZSTDMT_waitForLdmComplete(mtctx, buffer); + memmove(start, mtctx->inBuff.prefix.start, prefixSize); + mtctx->inBuff.prefix.start = start; + mtctx->roundBuff.pos = prefixSize; + } + buffer.start = mtctx->roundBuff.buffer + mtctx->roundBuff.pos; + buffer.capacity = target; + + if (ZSTDMT_isOverlapped(buffer, inUse)) { + DEBUGLOG(6, "Waiting for buffer..."); + return 0; + } + assert(!ZSTDMT_isOverlapped(buffer, mtctx->inBuff.prefix)); + + ZSTDMT_waitForLdmComplete(mtctx, buffer); + + DEBUGLOG(5, "Using prefix range [%zx, %zx)", + (size_t)mtctx->inBuff.prefix.start, + (size_t)mtctx->inBuff.prefix.start + mtctx->inBuff.prefix.size); + DEBUGLOG(5, "Using source range [%zx, %zx)", + (size_t)buffer.start, + (size_t)buffer.start + buffer.capacity); + + + mtctx->inBuff.buffer = buffer; + mtctx->inBuff.filled = 0; + assert(mtctx->roundBuff.pos + buffer.capacity <= mtctx->roundBuff.capacity); + return 1; +} + + +/** ZSTDMT_compressStream_generic() : + * internal use only - exposed to be invoked from zstd_compress.c + * assumption : output and input are valid (pos <= size) + * @return : minimum amount of data remaining to flush, 0 if none */ +size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp) +{ + unsigned forwardInputProgress = 0; + DEBUGLOG(5, "ZSTDMT_compressStream_generic (endOp=%u, srcSize=%u)", + (U32)endOp, (U32)(input->size - input->pos)); + assert(output->pos <= output->size); + assert(input->pos <= input->size); + + if (mtctx->singleBlockingThread) { /* delegate to single-thread (synchronous) */ + return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp); + } + + if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) { + /* current frame being ended. Only flush/end are allowed */ + return ERROR(stage_wrong); + } + + /* single-pass shortcut (note : synchronous-mode) */ + if ( (mtctx->nextJobID == 0) /* just started */ + && (mtctx->inBuff.filled == 0) /* nothing buffered */ + && (!mtctx->jobReady) /* no job already created */ + && (endOp == ZSTD_e_end) /* end order */ + && (output->size - output->pos >= ZSTD_compressBound(input->size - input->pos)) ) { /* enough space in dst */ + size_t const cSize = ZSTDMT_compress_advanced_internal(mtctx, + (char*)output->dst + output->pos, output->size - output->pos, + (const char*)input->src + input->pos, input->size - input->pos, + mtctx->cdict, mtctx->params); + if (ZSTD_isError(cSize)) return cSize; + input->pos = input->size; + output->pos += cSize; + mtctx->allJobsCompleted = 1; + mtctx->frameEnded = 1; + return 0; + } + + /* fill input buffer */ + if ( (!mtctx->jobReady) + && (input->size > input->pos) ) { /* support NULL input */ + if (mtctx->inBuff.buffer.start == NULL) { + assert(mtctx->inBuff.filled == 0); /* Can't fill an empty buffer */ + if (!ZSTDMT_tryGetInputRange(mtctx)) { + /* It is only possible for this operation to fail if there are + * still compression jobs ongoing. + */ + assert(mtctx->doneJobID != mtctx->nextJobID); + } + } + if (mtctx->inBuff.buffer.start != NULL) { + size_t const toLoad = MIN(input->size - input->pos, mtctx->targetSectionSize - mtctx->inBuff.filled); + assert(mtctx->inBuff.buffer.capacity >= mtctx->targetSectionSize); + DEBUGLOG(5, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u", + (U32)toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize); + memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad); + input->pos += toLoad; + mtctx->inBuff.filled += toLoad; + forwardInputProgress = toLoad>0; + } + if ((input->pos < input->size) && (endOp == ZSTD_e_end)) + endOp = ZSTD_e_flush; /* can't end now : not all input consumed */ + } + + if ( (mtctx->jobReady) + || (mtctx->inBuff.filled >= mtctx->targetSectionSize) /* filled enough : let's compress */ + || ((endOp != ZSTD_e_continue) && (mtctx->inBuff.filled > 0)) /* something to flush : let's go */ + || ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { /* must finish the frame with a zero-size block */ + size_t const jobSize = mtctx->inBuff.filled; + assert(mtctx->inBuff.filled <= mtctx->targetSectionSize); + CHECK_F( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) ); + } + + /* check for potential compressed data ready to be flushed */ + { size_t const remainingToFlush = ZSTDMT_flushProduced(mtctx, output, !forwardInputProgress, endOp); /* block if there was no forward input progress */ + if (input->pos < input->size) return MAX(remainingToFlush, 1); /* input not consumed : do not end flush yet */ + return remainingToFlush; + } +} + + +size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + CHECK_F( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) ); + + /* recommended next input size : fill current input buffer */ + return mtctx->targetSectionSize - mtctx->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */ +} + + +static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_EndDirective endFrame) +{ + size_t const srcSize = mtctx->inBuff.filled; + DEBUGLOG(5, "ZSTDMT_flushStream_internal"); + + if ( mtctx->jobReady /* one job ready for a worker to pick up */ + || (srcSize > 0) /* still some data within input buffer */ + || ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) { /* need a last 0-size block to end frame */ + DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)", + (U32)srcSize, (U32)endFrame); + CHECK_F( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) ); + } + + /* check if there is any data available to flush */ + return ZSTDMT_flushProduced(mtctx, output, 1 /* blockToFlush */, endFrame); +} + + +size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output) +{ + DEBUGLOG(5, "ZSTDMT_flushStream"); + if (mtctx->singleBlockingThread) + return ZSTD_flushStream(mtctx->cctxPool->cctx[0], output); + return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_flush); +} + +size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output) +{ + DEBUGLOG(4, "ZSTDMT_endStream"); + if (mtctx->singleBlockingThread) + return ZSTD_endStream(mtctx->cctxPool->cctx[0], output); + return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_end); +} diff --git a/deps/SZ/zstd/compress/zstdmt_compress.h b/deps/SZ/zstd/compress/zstdmt_compress.h new file mode 100644 index 0000000000000000000000000000000000000000..34a475a42bffc0ed216b1f33e947270930375916 --- /dev/null +++ b/deps/SZ/zstd/compress/zstdmt_compress.h @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + #ifndef ZSTDMT_COMPRESS_H + #define ZSTDMT_COMPRESS_H + + #if defined (__cplusplus) + extern "C" { + #endif + + +/* Note : This is an internal API. + * Some methods are still exposed (ZSTDLIB_API), + * because it used to be the only way to invoke MT compression. + * Now, it's recommended to use ZSTD_compress_generic() instead. + * These methods will stop being exposed in a future version */ + +/* === Dependencies === */ +#include /* size_t */ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */ +#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */ + + +/* === Memory management === */ +typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx; +ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers); +ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, + ZSTD_customMem cMem); +ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx); + +ZSTDLIB_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx); + + +/* === Simple one-pass compression function === */ + +ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + + + +/* === Streaming functions === */ + +ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel); +ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */ + +ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input); + +ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ + + +/* === Advanced functions and parameters === */ + +#ifndef ZSTDMT_JOBSIZE_MIN +# define ZSTDMT_JOBSIZE_MIN (1U << 20) /* 1 MB - Minimum size of each compression job */ +#endif + +ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, + ZSTD_parameters params, + unsigned overlapLog); + +ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, + const void* dict, size_t dictSize, /* dict can be released after init, a local copy is preserved within zcs */ + ZSTD_parameters params, + unsigned long long pledgedSrcSize); /* pledgedSrcSize is optional and can be zero == unknown */ + +ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fparams, + unsigned long long pledgedSrcSize); /* note : zero means empty */ + +/* ZSTDMT_parameter : + * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */ +typedef enum { + ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */ + ZSTDMT_p_overlapSectionLog /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */ +} ZSTDMT_parameter; + +/* ZSTDMT_setMTCtxParameter() : + * allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter. + * The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__ + * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions. + * @return : 0, or an error code (which can be tested using ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value); + +/* ZSTDMT_getMTCtxParameter() : + * Query the ZSTDMT_CCtx for a parameter value. + * @return : 0, or an error code (which can be tested using ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned* value); + + +/*! ZSTDMT_compressStream_generic() : + * Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream() + * depending on flush directive. + * @return : minimum amount of data still to be flushed + * 0 if fully flushed + * or an error code + * note : needs to be init using any ZSTD_initCStream*() variant */ +ZSTDLIB_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp); + + +/* ======================================================== + * === Private interface, for use by ZSTD_compress.c === + * === Not exposed in libzstd. Never invoke directly === + * ======================================================== */ + +size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, unsigned value); + +/* ZSTDMT_CCtxParam_setNbWorkers() + * Set nbWorkers, and clamp it. + * Also reset jobSize and overlapLog */ +size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers); + +/*! ZSTDMT_updateCParams_whileCompressing() : + * Updates only a selected set of compression parameters, to remain compatible with current frame. + * New parameters will be applied to next compression job. */ +void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams); + +/* ZSTDMT_getFrameProgression(): + * tells how much data has been consumed (input) and produced (output) for current frame. + * able to count progression inside worker threads. + */ +ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx); + + +/*! ZSTDMT_initCStream_internal() : + * Private use only. Init streaming operation. + * expects params to be valid. + * must receive dict, or cdict, or none, but not both. + * @return : 0, or an error code */ +size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs, + const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, unsigned long long pledgedSrcSize); + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTDMT_COMPRESS_H */ diff --git a/deps/SZ/zstd/decompress/huf_decompress.c b/deps/SZ/zstd/decompress/huf_decompress.c new file mode 100644 index 0000000000000000000000000000000000000000..a696261bd6383785195e45205fdd5168a57807fc --- /dev/null +++ b/deps/SZ/zstd/decompress/huf_decompress.c @@ -0,0 +1,1096 @@ +/* ****************************************************************** + huff0 huffman decoder, + part of Finite State Entropy library + Copyright (C) 2013-present, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy +****************************************************************** */ + +/* ************************************************************** +* Dependencies +****************************************************************/ +#include /* memcpy, memset */ +#include "compiler.h" +#include "bitstream.h" /* BIT_* */ +#include "fse.h" /* to compress headers */ +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" +#include "error_private.h" + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define HUF_isError ERR_isError +#define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; } + + +/* ************************************************************** +* Byte alignment for workSpace management +****************************************************************/ +#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1) +#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) + + +/*-***************************/ +/* generic DTableDesc */ +/*-***************************/ +typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; + +static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) +{ + DTableDesc dtd; + memcpy(&dtd, table, sizeof(dtd)); + return dtd; +} + + +/*-***************************/ +/* single-symbol decoding */ +/*-***************************/ +typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */ + +size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) +{ + U32 tableLog = 0; + U32 nbSymbols = 0; + size_t iSize; + void* const dtPtr = DTable + 1; + HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; + + U32* rankVal; + BYTE* huffWeight; + size_t spaceUsed32 = 0; + + rankVal = (U32 *)workSpace + spaceUsed32; + spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1; + huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32); + spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; + + if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); + + DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); + /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); + if (HUF_isError(iSize)) return iSize; + + /* Table header */ + { DTableDesc dtd = HUF_getDTableDesc(DTable); + if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ + dtd.tableType = 0; + dtd.tableLog = (BYTE)tableLog; + memcpy(DTable, &dtd, sizeof(dtd)); + } + + /* Calculate starting value for each rank */ + { U32 n, nextRankStart = 0; + for (n=1; n> 1; + U32 u; + HUF_DEltX1 D; + D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); + for (u = rankVal[w]; u < rankVal[w] + length; u++) + dt[u] = D; + rankVal[w] += length; + } } + + return iSize; +} + +size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_readDTableX1_wksp(DTable, src, srcSize, + workSpace, sizeof(workSpace)); +} + +FORCE_INLINE_TEMPLATE BYTE +HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ + BYTE const c = dt[val].byte; + BIT_skipBits(Dstream, dt[val].nbBits); + return c; +} + +#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ + *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) + +#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) + +HINT_INLINE size_t +HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 4 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_1(p, bitDPtr); + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + } + + /* [0-3] symbols remaining */ + if (MEM_32bits()) + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + + /* no more data to retrieve from bitstream, no need to reload */ + while (p < pEnd) + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + + return pEnd-pStart; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress1X1_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BYTE* op = (BYTE*)dst; + BYTE* const oend = op + dstSize; + const void* dtPtr = DTable + 1; + const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; + BIT_DStream_t bitD; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); + + HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog); + + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + return dstSize; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress4X1_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + /* Check */ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + const void* const dtPtr = DTable + 1; + const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal = BIT_DStream_unfinished; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); + CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); + CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); + CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); + + /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) { + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_1(op1, &bitD1); + HUF_DECODE_SYMBOLX1_1(op2, &bitD2); + HUF_DECODE_SYMBOLX1_1(op3, &bitD3); + HUF_DECODE_SYMBOLX1_1(op4, &bitD4); + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_0(op1, &bitD1); + HUF_DECODE_SYMBOLX1_0(op2, &bitD2); + HUF_DECODE_SYMBOLX1_0(op3, &bitD3); + HUF_DECODE_SYMBOLX1_0(op4, &bitD4); + BIT_reloadDStream(&bitD1); + BIT_reloadDStream(&bitD2); + BIT_reloadDStream(&bitD3); + BIT_reloadDStream(&bitD4); + } + + /* check corruption */ + /* note : should not be necessary : op# advance in lock step, and we control op4. + * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog); + + /* check */ + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } + + /* decoded size */ + return dstSize; + } +} + + +typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, + const void *cSrc, + size_t cSrcSize, + const HUF_DTable *DTable); +#if DYNAMIC_BMI2 + +#define HUF_DGEN(fn) \ + \ + static size_t fn##_default( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + if (bmi2) { \ + return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ + } + +#else + +#define HUF_DGEN(fn) \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + (void)bmi2; \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } + +#endif + +HUF_DGEN(HUF_decompress1X1_usingDTable_internal) +HUF_DGEN(HUF_decompress4X1_usingDTable_internal) + + + +size_t HUF_decompress1X1_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); +} + + +size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); +} + +size_t HUF_decompress4X1_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize, + workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} + +size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); +} + + +size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} +size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} + + +/* *************************/ +/* double-symbols decoding */ +/* *************************/ + +typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */ +typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; +typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; +typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; + + +/* HUF_fillDTableX2Level2() : + * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ +static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed, + const U32* rankValOrigin, const int minWeight, + const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, + U32 nbBitsBaseline, U16 baseSeq) +{ + HUF_DEltX2 DElt; + U32 rankVal[HUF_TABLELOG_MAX + 1]; + + /* get pre-calculated rankVal */ + memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + + /* fill skipped values */ + if (minWeight>1) { + U32 i, skipSize = rankVal[minWeight]; + MEM_writeLE16(&(DElt.sequence), baseSeq); + DElt.nbBits = (BYTE)(consumed); + DElt.length = 1; + for (i = 0; i < skipSize; i++) + DTable[i] = DElt; + } + + /* fill DTable */ + { U32 s; for (s=0; s= 1 */ + + rankVal[weight] += length; + } } +} + + +static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, + const sortedSymbol_t* sortedList, const U32 sortedListSize, + const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, + const U32 nbBitsBaseline) +{ + U32 rankVal[HUF_TABLELOG_MAX + 1]; + const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ + const U32 minBits = nbBitsBaseline - maxWeight; + U32 s; + + memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + + /* fill DTable */ + for (s=0; s= minBits) { /* enough room for a second symbol */ + U32 sortedRank; + int minWeight = nbBits + scaleLog; + if (minWeight < 1) minWeight = 1; + sortedRank = rankStart[minWeight]; + HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits, + rankValOrigin[nbBits], minWeight, + sortedList+sortedRank, sortedListSize-sortedRank, + nbBitsBaseline, symbol); + } else { + HUF_DEltX2 DElt; + MEM_writeLE16(&(DElt.sequence), symbol); + DElt.nbBits = (BYTE)(nbBits); + DElt.length = 1; + { U32 const end = start + length; + U32 u; + for (u = start; u < end; u++) DTable[u] = DElt; + } } + rankVal[weight] += length; + } +} + +size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, + size_t srcSize, void* workSpace, + size_t wkspSize) +{ + U32 tableLog, maxW, sizeOfSort, nbSymbols; + DTableDesc dtd = HUF_getDTableDesc(DTable); + U32 const maxTableLog = dtd.maxTableLog; + size_t iSize; + void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ + HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; + U32 *rankStart; + + rankValCol_t* rankVal; + U32* rankStats; + U32* rankStart0; + sortedSymbol_t* sortedSymbol; + BYTE* weightList; + size_t spaceUsed32 = 0; + + rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32); + spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2; + rankStats = (U32 *)workSpace + spaceUsed32; + spaceUsed32 += HUF_TABLELOG_MAX + 1; + rankStart0 = (U32 *)workSpace + spaceUsed32; + spaceUsed32 += HUF_TABLELOG_MAX + 2; + sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t); + spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2; + weightList = (BYTE *)((U32 *)workSpace + spaceUsed32); + spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; + + if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); + + rankStart = rankStart0 + 1; + memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); + + DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ + if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); + if (HUF_isError(iSize)) return iSize; + + /* check result */ + if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ + + /* find maxWeight */ + for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ + + /* Get start index of each weight */ + { U32 w, nextRankStart = 0; + for (w=1; w> consumed; + } } } } + + HUF_fillDTableX2(dt, maxTableLog, + sortedSymbol, sizeOfSort, + rankStart0, rankVal, maxW, + tableLog+1); + + dtd.tableLog = (BYTE)maxTableLog; + dtd.tableType = 1; + memcpy(DTable, &dtd, sizeof(dtd)); + return iSize; +} + +size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_readDTableX2_wksp(DTable, src, srcSize, + workSpace, sizeof(workSpace)); +} + + +FORCE_INLINE_TEMPLATE U32 +HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + memcpy(op, dt+val, 2); + BIT_skipBits(DStream, dt[val].nbBits); + return dt[val].length; +} + +FORCE_INLINE_TEMPLATE U32 +HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + memcpy(op, dt+val, 1); + if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); + else { + if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { + BIT_skipBits(DStream, dt[val].nbBits); + if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) + /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ + DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); + } } + return 1; +} + +#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) + +HINT_INLINE size_t +HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, + const HUF_DEltX2* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 8 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_1(p, bitDPtr); + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + } + + /* closer to end : up to 2 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + + while (p <= pEnd-2) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ + + if (p < pEnd) + p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog); + + return p-pStart; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress1X2_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BIT_DStream_t bitD; + + /* Init */ + CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); + + /* decode */ + { BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog); + } + + /* check */ + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; +} + + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress4X2_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + const void* const dtPtr = DTable+1; + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + size_t const segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); + CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); + CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); + CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); + + /* 16-32 symbols per loop (4-8 symbols per stream) */ + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) { + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); + + /* check */ + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } + + /* decoded size */ + return dstSize; + } +} + +HUF_DGEN(HUF_decompress1X2_usingDTable_internal) +HUF_DGEN(HUF_decompress4X2_usingDTable_internal) + +size_t HUF_decompress1X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, + workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); +} + + +size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} + +size_t HUF_decompress4X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, + workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} + +size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); +} + + +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} + + +/* ***********************************/ +/* Universal decompression selectors */ +/* ***********************************/ + +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); + return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : + HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); + return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : + HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + + +typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; +static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = +{ + /* single, double, quad */ + {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */ + {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */ + {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ + {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ + {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ + {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ + {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ + {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ + {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ + {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ + {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ + {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ + {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ + {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */ + {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ + {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ +}; + +/** HUF_selectDecoder() : + * Tells which decoder is likely to decode faster, + * based on a set of pre-computed metrics. + * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . + * Assumption : 0 < dstSize <= 128 KB */ +U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) +{ + assert(dstSize > 0); + assert(dstSize <= 128*1024); + /* decoder timing evaluation */ + { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ + U32 const D256 = (U32)(dstSize >> 8); + U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); + U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); + DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */ + return DTime1 < DTime0; +} } + + +typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); + +size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; + + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); + return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); + } +} + +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); + return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : + HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; + } +} + +size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + + +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, + size_t dstSize, const void* cSrc, + size_t cSrcSize, void* workSpace, + size_t wkspSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize == 0) return ERROR(corruption_detected); + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); + return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize): + HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); + } +} + +size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); + return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize): + HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); + } +} + +size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + + +size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); + return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : + HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +} + +size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} + +size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); + return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : + HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +} + +size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize == 0) return ERROR(corruption_detected); + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); + return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : + HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); + } +} diff --git a/deps/SZ/zstd/decompress/zstd_decompress.c b/deps/SZ/zstd/decompress/zstd_decompress.c new file mode 100644 index 0000000000000000000000000000000000000000..8f4589d13938df181170904f92c04e207b9f0ded --- /dev/null +++ b/deps/SZ/zstd/decompress/zstd_decompress.c @@ -0,0 +1,3040 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* *************************************************************** +* Tuning parameters +*****************************************************************/ +/*! + * HEAPMODE : + * Select how default decompression function ZSTD_decompress() allocates its context, + * on stack (0), or into heap (1, default; requires malloc()). + * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected. + */ +#ifndef ZSTD_HEAPMODE +# define ZSTD_HEAPMODE 1 +#endif + +/*! +* LEGACY_SUPPORT : +* if set to 1+, ZSTD_decompress() can decode older formats (v0.1+) +*/ +#ifndef ZSTD_LEGACY_SUPPORT +# define ZSTD_LEGACY_SUPPORT 0 +#endif + +/*! + * MAXWINDOWSIZE_DEFAULT : + * maximum window size accepted by DStream __by default__. + * Frames requiring more memory will be rejected. + * It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize(). + */ +#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT +# define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_DEFAULTMAX) + 1) +#endif + + +/*! + * NO_FORWARD_PROGRESS_MAX : + * maximum allowed nb of calls to ZSTD_decompressStream() and ZSTD_decompress_generic() + * without any forward progress + * (defined as: no byte read from input, and no byte flushed to output) + * before triggering an error. + */ +#ifndef ZSTD_NO_FORWARD_PROGRESS_MAX +# define ZSTD_NO_FORWARD_PROGRESS_MAX 16 +#endif + +/*-******************************************************* +* Dependencies +*********************************************************/ +#include /* memcpy, memmove, memset */ +#include "cpu.h" +#include "mem.h" /* low level memory routines */ +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" +#include "zstd_internal.h" + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) +# include "zstd_legacy.h" +#endif + + +/*-************************************* +* Errors +***************************************/ +#define ZSTD_isError ERR_isError /* for inlining */ +#define FSE_isError ERR_isError +#define HUF_isError ERR_isError + + +/*_******************************************************* +* Memory operations +**********************************************************/ +static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } + + +/*-************************************************************* +* Context management +***************************************************************/ +typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, + ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, + ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, + ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage; + +typedef enum { zdss_init=0, zdss_loadHeader, + zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; + + +typedef struct { + U32 fastMode; + U32 tableLog; +} ZSTD_seqSymbol_header; + +typedef struct { + U16 nextState; + BYTE nbAdditionalBits; + BYTE nbBits; + U32 baseValue; +} ZSTD_seqSymbol; + +#define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) + +typedef struct { + ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; + ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; + ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; + HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ + U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + U32 rep[ZSTD_REP_NUM]; +} ZSTD_entropyDTables_t; + +struct ZSTD_DCtx_s +{ + const ZSTD_seqSymbol* LLTptr; + const ZSTD_seqSymbol* MLTptr; + const ZSTD_seqSymbol* OFTptr; + const HUF_DTable* HUFptr; + ZSTD_entropyDTables_t entropy; + const void* previousDstEnd; /* detect continuity */ + const void* prefixStart; /* start of current segment */ + const void* virtualStart; /* virtual start of previous segment if it was just before current one */ + const void* dictEnd; /* end of previous segment */ + size_t expected; + ZSTD_frameHeader fParams; + U64 decodedSize; + blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ + ZSTD_dStage stage; + U32 litEntropy; + U32 fseEntropy; + XXH64_state_t xxhState; + size_t headerSize; + U32 dictID; + ZSTD_format_e format; + const BYTE* litPtr; + ZSTD_customMem customMem; + size_t litSize; + size_t rleSize; + size_t staticSize; + int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ + + /* streaming */ + ZSTD_DDict* ddictLocal; + const ZSTD_DDict* ddict; + ZSTD_dStreamStage streamStage; + char* inBuff; + size_t inBuffSize; + size_t inPos; + size_t maxWindowSize; + char* outBuff; + size_t outBuffSize; + size_t outStart; + size_t outEnd; + size_t lhSize; + void* legacyContext; + U32 previousLegacyVersion; + U32 legacyVersion; + U32 hostageByte; + int noForwardProgress; + + /* workspace */ + BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH]; + BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; +}; /* typedef'd to ZSTD_DCtx within "zstd.h" */ + +size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx) +{ + if (dctx==NULL) return 0; /* support sizeof NULL */ + return sizeof(*dctx) + + ZSTD_sizeof_DDict(dctx->ddictLocal) + + dctx->inBuffSize + dctx->outBuffSize; +} + +size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); } + + +static size_t ZSTD_startingInputLength(ZSTD_format_e format) +{ + size_t const startingInputLength = (format==ZSTD_f_zstd1_magicless) ? + ZSTD_frameHeaderSize_prefix - ZSTD_frameIdSize : + ZSTD_frameHeaderSize_prefix; + ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE); + /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ + assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) ); + return startingInputLength; +} + +static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) +{ + dctx->format = ZSTD_f_zstd1; /* ZSTD_decompressBegin() invokes ZSTD_startingInputLength() with argument dctx->format */ + dctx->staticSize = 0; + dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; + dctx->ddict = NULL; + dctx->ddictLocal = NULL; + dctx->inBuff = NULL; + dctx->inBuffSize = 0; + dctx->outBuffSize = 0; + dctx->streamStage = zdss_init; + dctx->legacyContext = NULL; + dctx->previousLegacyVersion = 0; + dctx->noForwardProgress = 0; + dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); +} + +ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) +{ + ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace; + + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ + if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL; /* minimum size */ + + ZSTD_initDCtx_internal(dctx); + dctx->staticSize = workspaceSize; + dctx->inBuff = (char*)(dctx+1); + return dctx; +} + +ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) +{ + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + + { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(*dctx), customMem); + if (!dctx) return NULL; + dctx->customMem = customMem; + ZSTD_initDCtx_internal(dctx); + return dctx; + } +} + +ZSTD_DCtx* ZSTD_createDCtx(void) +{ + DEBUGLOG(3, "ZSTD_createDCtx"); + return ZSTD_createDCtx_advanced(ZSTD_defaultCMem); +} + +size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) +{ + if (dctx==NULL) return 0; /* support free on NULL */ + if (dctx->staticSize) return ERROR(memory_allocation); /* not compatible with static DCtx */ + { ZSTD_customMem const cMem = dctx->customMem; + ZSTD_freeDDict(dctx->ddictLocal); + dctx->ddictLocal = NULL; + ZSTD_free(dctx->inBuff, cMem); + dctx->inBuff = NULL; +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (dctx->legacyContext) + ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion); +#endif + ZSTD_free(dctx, cMem); + return 0; + } +} + +/* no longer useful */ +void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) +{ + size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx); + memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */ +} + + +/*-************************************************************* + * Frame header decoding + ***************************************************************/ + +/*! ZSTD_isFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. + * Note 3 : Skippable Frame Identifiers are considered valid. */ +unsigned ZSTD_isFrame(const void* buffer, size_t size) +{ + if (size < ZSTD_frameIdSize) return 0; + { U32 const magic = MEM_readLE32(buffer); + if (magic == ZSTD_MAGICNUMBER) return 1; + if ((magic & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) return 1; + } +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(buffer, size)) return 1; +#endif + return 0; +} + +/** ZSTD_frameHeaderSize_internal() : + * srcSize must be large enough to reach header size fields. + * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless. + * @return : size of the Frame Header + * or an error code, which can be tested with ZSTD_isError() */ +static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format) +{ + size_t const minInputSize = ZSTD_startingInputLength(format); + if (srcSize < minInputSize) return ERROR(srcSize_wrong); + + { BYTE const fhd = ((const BYTE*)src)[minInputSize-1]; + U32 const dictID= fhd & 3; + U32 const singleSegment = (fhd >> 5) & 1; + U32 const fcsId = fhd >> 6; + return minInputSize + !singleSegment + + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] + + (singleSegment && !fcsId); + } +} + +/** ZSTD_frameHeaderSize() : + * srcSize must be >= ZSTD_frameHeaderSize_prefix. + * @return : size of the Frame Header, + * or an error code (if srcSize is too small) */ +size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) +{ + return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1); +} + + +/** ZSTD_getFrameHeader_advanced() : + * decode Frame Header, or require larger `srcSize`. + * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format) +{ + const BYTE* ip = (const BYTE*)src; + size_t const minInputSize = ZSTD_startingInputLength(format); + + if (srcSize < minInputSize) return minInputSize; + + if ( (format != ZSTD_f_zstd1_magicless) + && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) { + if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { + /* skippable frame */ + if (srcSize < ZSTD_skippableHeaderSize) + return ZSTD_skippableHeaderSize; /* magic number + frame length */ + memset(zfhPtr, 0, sizeof(*zfhPtr)); + zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_frameIdSize); + zfhPtr->frameType = ZSTD_skippableFrame; + return 0; + } + return ERROR(prefix_unknown); + } + + /* ensure there is enough `srcSize` to fully read/decode frame header */ + { size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format); + if (srcSize < fhsize) return fhsize; + zfhPtr->headerSize = (U32)fhsize; + } + + { BYTE const fhdByte = ip[minInputSize-1]; + size_t pos = minInputSize; + U32 const dictIDSizeCode = fhdByte&3; + U32 const checksumFlag = (fhdByte>>2)&1; + U32 const singleSegment = (fhdByte>>5)&1; + U32 const fcsID = fhdByte>>6; + U64 windowSize = 0; + U32 dictID = 0; + U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN; + if ((fhdByte & 0x08) != 0) + return ERROR(frameParameter_unsupported); /* reserved bits, must be zero */ + + if (!singleSegment) { + BYTE const wlByte = ip[pos++]; + U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; + if (windowLog > ZSTD_WINDOWLOG_MAX) + return ERROR(frameParameter_windowTooLarge); + windowSize = (1ULL << windowLog); + windowSize += (windowSize >> 3) * (wlByte&7); + } + switch(dictIDSizeCode) + { + default: assert(0); /* impossible */ + case 0 : break; + case 1 : dictID = ip[pos]; pos++; break; + case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break; + case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break; + } + switch(fcsID) + { + default: assert(0); /* impossible */ + case 0 : if (singleSegment) frameContentSize = ip[pos]; break; + case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break; + case 2 : frameContentSize = MEM_readLE32(ip+pos); break; + case 3 : frameContentSize = MEM_readLE64(ip+pos); break; + } + if (singleSegment) windowSize = frameContentSize; + + zfhPtr->frameType = ZSTD_frame; + zfhPtr->frameContentSize = frameContentSize; + zfhPtr->windowSize = windowSize; + zfhPtr->blockSizeMax = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + zfhPtr->dictID = dictID; + zfhPtr->checksumFlag = checksumFlag; + } + return 0; +} + +/** ZSTD_getFrameHeader() : + * decode Frame Header, or require larger `srcSize`. + * note : this function does not consume input, it only reads it. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize) +{ + return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1); +} + + +/** ZSTD_getFrameContentSize() : + * compatible with legacy mode + * @return : decompressed size of the single frame pointed to be `src` if known, otherwise + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */ +unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize) +{ +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) { + unsigned long long const ret = ZSTD_getDecompressedSize_legacy(src, srcSize); + return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret; + } +#endif + { ZSTD_frameHeader zfh; + if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0) + return ZSTD_CONTENTSIZE_ERROR; + if (zfh.frameType == ZSTD_skippableFrame) { + return 0; + } else { + return zfh.frameContentSize; + } } +} + +/** ZSTD_findDecompressedSize() : + * compatible with legacy mode + * `srcSize` must be the exact length of some number of ZSTD compressed and/or + * skippable frames + * @return : decompressed size of the frames contained */ +unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) +{ + unsigned long long totalDstSize = 0; + + while (srcSize >= ZSTD_frameHeaderSize_prefix) { + U32 const magicNumber = MEM_readLE32(src); + + if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { + size_t skippableSize; + if (srcSize < ZSTD_skippableHeaderSize) + return ERROR(srcSize_wrong); + skippableSize = MEM_readLE32((const BYTE *)src + ZSTD_frameIdSize) + + ZSTD_skippableHeaderSize; + if (srcSize < skippableSize) { + return ZSTD_CONTENTSIZE_ERROR; + } + + src = (const BYTE *)src + skippableSize; + srcSize -= skippableSize; + continue; + } + + { unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); + if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret; + + /* check for overflow */ + if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR; + totalDstSize += ret; + } + { size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize); + if (ZSTD_isError(frameSrcSize)) { + return ZSTD_CONTENTSIZE_ERROR; + } + + src = (const BYTE *)src + frameSrcSize; + srcSize -= frameSrcSize; + } + } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ + + if (srcSize) return ZSTD_CONTENTSIZE_ERROR; + + return totalDstSize; +} + +/** ZSTD_getDecompressedSize() : +* compatible with legacy mode +* @return : decompressed size if known, 0 otherwise + note : 0 can mean any of the following : + - frame content is empty + - decompressed size field is not present in frame header + - frame header unknown / not supported + - frame header not complete (`srcSize` too small) */ +unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize) +{ + unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN); + return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret; +} + + +/** ZSTD_decodeFrameHeader() : +* `headerSize` must be the size provided by ZSTD_frameHeaderSize(). +* @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ +static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize) +{ + size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format); + if (ZSTD_isError(result)) return result; /* invalid header */ + if (result>0) return ERROR(srcSize_wrong); /* headerSize too small */ + if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID)) + return ERROR(dictionary_wrong); + if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0); + return 0; +} + + +/*-************************************************************* + * Block decoding + ***************************************************************/ + +/*! ZSTD_getcBlockSize() : +* Provides the size of compressed block from block header `src` */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, + blockProperties_t* bpPtr) +{ + if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + { U32 const cBlockHeader = MEM_readLE24(src); + U32 const cSize = cBlockHeader >> 3; + bpPtr->lastBlock = cBlockHeader & 1; + bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); + bpPtr->origSize = cSize; /* only useful for RLE */ + if (bpPtr->blockType == bt_rle) return 1; + if (bpPtr->blockType == bt_reserved) return ERROR(corruption_detected); + return cSize; + } +} + + +static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall); + memcpy(dst, src, srcSize); + return srcSize; +} + + +static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + size_t regenSize) +{ + if (srcSize != 1) return ERROR(srcSize_wrong); + if (regenSize > dstCapacity) return ERROR(dstSize_tooSmall); + memset(dst, *(const BYTE*)src, regenSize); + return regenSize; +} + +/*! ZSTD_decodeLiteralsBlock() : + * @return : nb of bytes read from src (< srcSize ) + * note : symbol not declared but exposed for fullbench */ +size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, + const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ +{ + if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected); + + { const BYTE* const istart = (const BYTE*) src; + symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); + + switch(litEncType) + { + case set_repeat: + if (dctx->litEntropy==0) return ERROR(dictionary_corrupted); + /* fall-through */ + case set_compressed: + if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */ + { size_t lhSize, litSize, litCSize; + U32 singleStream=0; + U32 const lhlCode = (istart[0] >> 2) & 3; + U32 const lhc = MEM_readLE32(istart); + switch(lhlCode) + { + case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ + /* 2 - 2 - 10 - 10 */ + singleStream = !lhlCode; + lhSize = 3; + litSize = (lhc >> 4) & 0x3FF; + litCSize = (lhc >> 14) & 0x3FF; + break; + case 2: + /* 2 - 2 - 14 - 14 */ + lhSize = 4; + litSize = (lhc >> 4) & 0x3FFF; + litCSize = lhc >> 18; + break; + case 3: + /* 2 - 2 - 18 - 18 */ + lhSize = 5; + litSize = (lhc >> 4) & 0x3FFFF; + litCSize = (lhc >> 22) + (istart[4] << 10); + break; + } + if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected); + if (litCSize + lhSize > srcSize) return ERROR(corruption_detected); + + if (HUF_isError((litEncType==set_repeat) ? + ( singleStream ? + HUF_decompress1X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) : + HUF_decompress4X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) ) : + ( singleStream ? + HUF_decompress1X1_DCtx_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize, + dctx->entropy.workspace, sizeof(dctx->entropy.workspace), dctx->bmi2) : + HUF_decompress4X_hufOnly_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize, + dctx->entropy.workspace, sizeof(dctx->entropy.workspace), dctx->bmi2)))) + return ERROR(corruption_detected); + + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + dctx->litEntropy = 1; + if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable; + memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + return litCSize + lhSize; + } + + case set_basic: + { size_t litSize, lhSize; + U32 const lhlCode = ((istart[0]) >> 2) & 3; + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + litSize = MEM_readLE24(istart) >> 4; + break; + } + + if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ + if (litSize+lhSize > srcSize) return ERROR(corruption_detected); + memcpy(dctx->litBuffer, istart+lhSize, litSize); + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + return lhSize+litSize; + } + /* direct reference into compressed stream */ + dctx->litPtr = istart+lhSize; + dctx->litSize = litSize; + return lhSize+litSize; + } + + case set_rle: + { U32 const lhlCode = ((istart[0]) >> 2) & 3; + size_t litSize, lhSize; + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + litSize = MEM_readLE24(istart) >> 4; + if (srcSize<4) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */ + break; + } + if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected); + memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + return lhSize+1; + } + default: + return ERROR(corruption_detected); /* impossible */ + } + } +} + +/* Default FSE distribution tables. + * These are pre-calculated FSE decoding tables using default distributions as defined in specification : + * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions + * They were generated programmatically with following method : + * - start from default distributions, present in /lib/common/zstd_internal.h + * - generate tables normally, using ZSTD_buildFSETable() + * - printout the content of tables + * - pretify output, report below, test with fuzzer to ensure it's correct */ + +/* Default FSE distribution table for Literal Lengths */ +static const ZSTD_seqSymbol LL_defaultDTable[(1<tableLog = 0; + DTableH->fastMode = 0; + + cell->nbBits = 0; + cell->nextState = 0; + assert(nbAddBits < 255); + cell->nbAdditionalBits = (BYTE)nbAddBits; + cell->baseValue = baseValue; +} + + +/* ZSTD_buildFSETable() : + * generate FSE decoding table for one symbol (ll, ml or off) */ +static void +ZSTD_buildFSETable(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U32* nbAdditionalBits, + unsigned tableLog) +{ + ZSTD_seqSymbol* const tableDecode = dt+1; + U16 symbolNext[MaxSeq+1]; + + U32 const maxSV1 = maxSymbolValue + 1; + U32 const tableSize = 1 << tableLog; + U32 highThreshold = tableSize-1; + + /* Sanity Checks */ + assert(maxSymbolValue <= MaxSeq); + assert(tableLog <= MaxFSELog); + + /* Init, lay down lowprob symbols */ + { ZSTD_seqSymbol_header DTableH; + DTableH.tableLog = tableLog; + DTableH.fastMode = 1; + { S16 const largeLimit= (S16)(1 << (tableLog-1)); + U32 s; + for (s=0; s= largeLimit) DTableH.fastMode=0; + symbolNext[s] = normalizedCounter[s]; + } } } + memcpy(dt, &DTableH, sizeof(DTableH)); + } + + /* Spread symbols */ + { U32 const tableMask = tableSize-1; + U32 const step = FSE_TABLESTEP(tableSize); + U32 s, position = 0; + for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } } + assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + } + + /* Build Decoding table */ + { U32 u; + for (u=0; u max) return ERROR(corruption_detected); + { U32 const symbol = *(const BYTE*)src; + U32 const baseline = baseValue[symbol]; + U32 const nbBits = nbAdditionalBits[symbol]; + ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits); + } + *DTablePtr = DTableSpace; + return 1; + case set_basic : + *DTablePtr = defaultTable; + return 0; + case set_repeat: + if (!flagRepeatTable) return ERROR(corruption_detected); + return 0; + case set_compressed : + { U32 tableLog; + S16 norm[MaxSeq+1]; + size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); + if (FSE_isError(headerSize)) return ERROR(corruption_detected); + if (tableLog > maxLog) return ERROR(corruption_detected); + ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog); + *DTablePtr = DTableSpace; + return headerSize; + } + default : /* impossible */ + assert(0); + return ERROR(GENERIC); + } +} + +static const U32 LL_base[MaxLL+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 18, 20, 22, 24, 28, 32, 40, + 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, + 0x2000, 0x4000, 0x8000, 0x10000 }; + +static const U32 OF_base[MaxOff+1] = { + 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, + 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, + 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, + 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; + +static const U32 OF_bits[MaxOff+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 }; + +static const U32 ML_base[MaxML+1] = { + 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 37, 39, 41, 43, 47, 51, 59, + 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, + 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; + + +size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, + const void* src, size_t srcSize) +{ + const BYTE* const istart = (const BYTE* const)src; + const BYTE* const iend = istart + srcSize; + const BYTE* ip = istart; + DEBUGLOG(5, "ZSTD_decodeSeqHeaders"); + + /* check */ + if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong); + + /* SeqHead */ + { int nbSeq = *ip++; + if (!nbSeq) { *nbSeqPtr=0; return 1; } + if (nbSeq > 0x7F) { + if (nbSeq == 0xFF) { + if (ip+2 > iend) return ERROR(srcSize_wrong); + nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; + } else { + if (ip >= iend) return ERROR(srcSize_wrong); + nbSeq = ((nbSeq-0x80)<<8) + *ip++; + } + } + *nbSeqPtr = nbSeq; + } + + /* FSE table descriptors */ + if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */ + { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); + symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); + symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); + ip++; + + /* Build DTables */ + { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, + LLtype, MaxLL, LLFSELog, + ip, iend-ip, + LL_base, LL_bits, + LL_defaultDTable, dctx->fseEntropy); + if (ZSTD_isError(llhSize)) return ERROR(corruption_detected); + ip += llhSize; + } + + { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, + OFtype, MaxOff, OffFSELog, + ip, iend-ip, + OF_base, OF_bits, + OF_defaultDTable, dctx->fseEntropy); + if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected); + ip += ofhSize; + } + + { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, + MLtype, MaxML, MLFSELog, + ip, iend-ip, + ML_base, ML_bits, + ML_defaultDTable, dctx->fseEntropy); + if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected); + ip += mlhSize; + } + } + + return ip-istart; +} + + +typedef struct { + size_t litLength; + size_t matchLength; + size_t offset; + const BYTE* match; +} seq_t; + +typedef struct { + size_t state; + const ZSTD_seqSymbol* table; +} ZSTD_fseState; + +typedef struct { + BIT_DStream_t DStream; + ZSTD_fseState stateLL; + ZSTD_fseState stateOffb; + ZSTD_fseState stateML; + size_t prevOffset[ZSTD_REP_NUM]; + const BYTE* prefixStart; + const BYTE* dictEnd; + size_t pos; +} seqState_t; + + +FORCE_NOINLINE +size_t ZSTD_execSequenceLast7(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + + /* check */ + if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ + if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */ + if (oLitEnd <= oend_w) return ERROR(GENERIC); /* Precondition */ + + /* copy literals */ + if (op < oend_w) { + ZSTD_wildcopy(op, *litPtr, oend_w - op); + *litPtr += oend_w - op; + op = oend_w; + } + while (op < oLitEnd) *op++ = *(*litPtr)++; + + /* copy Match */ + if (sequence.offset > (size_t)(oLitEnd - base)) { + /* offset beyond prefix */ + if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected); + match = dictEnd - (base-match); + if (match + sequence.matchLength <= dictEnd) { + memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = base; + } } + while (op < oMatchEnd) *op++ = *match++; + return sequenceLength; +} + + +HINT_INLINE +size_t ZSTD_execSequence(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + + /* check */ + if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ + if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */ + if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); + + /* copy Literals */ + ZSTD_copy8(op, *litPtr); + if (sequence.litLength > 8) + ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ + op = oLitEnd; + *litPtr = iLitEnd; /* update for next sequence */ + + /* copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix -> go into extDict */ + if (sequence.offset > (size_t)(oLitEnd - virtualStart)) + return ERROR(corruption_detected); + match = dictEnd + (match - prefixStart); + if (match + sequence.matchLength <= dictEnd) { + memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + if (op > oend_w || sequence.matchLength < MINMATCH) { + U32 i; + for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i]; + return sequenceLength; + } + } } + /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */ + + /* match within prefix */ + if (sequence.offset < 8) { + /* close range match, overlap */ + static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ + static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ + int const sub2 = dec64table[sequence.offset]; + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += dec32table[sequence.offset]; + ZSTD_copy4(op+4, match); + match -= sub2; + } else { + ZSTD_copy8(op, match); + } + op += 8; match += 8; + + if (oMatchEnd > oend-(16-MINMATCH)) { + if (op < oend_w) { + ZSTD_wildcopy(op, match, oend_w - op); + match += oend_w - op; + op = oend_w; + } + while (op < oMatchEnd) *op++ = *match++; + } else { + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */ + } + return sequenceLength; +} + + +HINT_INLINE +size_t ZSTD_execSequenceLong(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = sequence.match; + + /* check */ + if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ + if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */ + if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd); + + /* copy Literals */ + ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */ + if (sequence.litLength > 8) + ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ + op = oLitEnd; + *litPtr = iLitEnd; /* update for next sequence */ + + /* copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix */ + if (sequence.offset > (size_t)(oLitEnd - dictStart)) return ERROR(corruption_detected); + if (match + sequence.matchLength <= dictEnd) { + memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + if (op > oend_w || sequence.matchLength < MINMATCH) { + U32 i; + for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i]; + return sequenceLength; + } + } } + assert(op <= oend_w); + assert(sequence.matchLength >= MINMATCH); + + /* match within prefix */ + if (sequence.offset < 8) { + /* close range match, overlap */ + static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ + static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ + int const sub2 = dec64table[sequence.offset]; + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += dec32table[sequence.offset]; + ZSTD_copy4(op+4, match); + match -= sub2; + } else { + ZSTD_copy8(op, match); + } + op += 8; match += 8; + + if (oMatchEnd > oend-(16-MINMATCH)) { + if (op < oend_w) { + ZSTD_wildcopy(op, match, oend_w - op); + match += oend_w - op; + op = oend_w; + } + while (op < oMatchEnd) *op++ = *match++; + } else { + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */ + } + return sequenceLength; +} + +static void +ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt) +{ + const void* ptr = dt; + const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits", + (U32)DStatePtr->state, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +FORCE_INLINE_TEMPLATE void +ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD) +{ + ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.nextState + lowBits; +} + +/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum + * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) + * bits before reloading. This value is the maximum number of bytes we read + * after reloading when we are decoding long offets. + */ +#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \ + (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \ + ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \ + : 0) + +typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; + +FORCE_INLINE_TEMPLATE seq_t +ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) +{ + seq_t seq; + U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits; + U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits; + U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits; + U32 const totalBits = llBits+mlBits+ofBits; + U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue; + U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue; + U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue; + + /* sequence */ + { size_t offset; + if (!ofBits) + offset = 0; + else { + ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); + ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); + assert(ofBits <= MaxOff); + if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { + U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); + offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); + BIT_reloadDStream(&seqState->DStream); + if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); + assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */ + } else { + offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); + } + } + + if (ofBits <= 1) { + offset += (llBase==0); + if (offset) { + size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ + if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + } else { /* offset == 0 */ + offset = seqState->prevOffset[0]; + } + } else { + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } + seq.offset = offset; + } + + seq.matchLength = mlBase + + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */ + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) + BIT_reloadDStream(&seqState->DStream); + if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + BIT_reloadDStream(&seqState->DStream); + /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ + ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); + + seq.litLength = llBase + + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */ + if (MEM_32bits()) + BIT_reloadDStream(&seqState->DStream); + + DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + + /* ANS state update */ + ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ + ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + + return seq; +} + +FORCE_INLINE_TEMPLATE size_t +ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE* const)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); + const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + DEBUGLOG(5, "ZSTD_decompressSequences_body"); + + /* Regen sequences */ + if (nbSeq) { + seqState_t seqState; + dctx->fseEntropy = 1; + { U32 i; for (i=0; ientropy.rep[i]; } + CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + + for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { + nbSeq--; + { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); + size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + op += oneSeqSize; + } } + + /* check if reached exact end */ + DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); + if (nbSeq) return ERROR(corruption_detected); + /* save reps for next block */ + { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall); + memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + + return op-ostart; +} + +static size_t +ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} + + + +FORCE_INLINE_TEMPLATE seq_t +ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets) +{ + seq_t seq; + U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits; + U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits; + U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits; + U32 const totalBits = llBits+mlBits+ofBits; + U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue; + U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue; + U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue; + + /* sequence */ + { size_t offset; + if (!ofBits) + offset = 0; + else { + ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); + ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); + assert(ofBits <= MaxOff); + if (MEM_32bits() && longOffsets) { + U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1); + offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); + if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream); + if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); + } else { + offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); + } + } + + if (ofBits <= 1) { + offset += (llBase==0); + if (offset) { + size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ + if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + } else { + offset = seqState->prevOffset[0]; + } + } else { + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } + seq.offset = offset; + } + + seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) + BIT_reloadDStream(&seqState->DStream); + if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + BIT_reloadDStream(&seqState->DStream); + /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */ + ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); + + seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ + if (MEM_32bits()) + BIT_reloadDStream(&seqState->DStream); + + { size_t const pos = seqState->pos + seq.litLength; + const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; + seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. + * No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */ + seqState->pos = pos + seq.matchLength; + } + + /* ANS state update */ + ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ + ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + + return seq; +} + +FORCE_INLINE_TEMPLATE size_t +ZSTD_decompressSequencesLong_body( + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE* const)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); + const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + + /* Regen sequences */ + if (nbSeq) { +#define STORED_SEQS 4 +#define STOSEQ_MASK (STORED_SEQS-1) +#define ADVANCED_SEQS 4 + seq_t sequences[STORED_SEQS]; + int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); + seqState_t seqState; + int seqNb; + dctx->fseEntropy = 1; + { U32 i; for (i=0; ientropy.rep[i]; } + seqState.prefixStart = prefixStart; + seqState.pos = (size_t)(op-prefixStart); + seqState.dictEnd = dictEnd; + CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + + /* prepare in advance */ + for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNbentropy.rep[i] = (U32)(seqState.prevOffset[i]); } +#undef STORED_SEQS +#undef STOSEQ_MASK +#undef ADVANCED_SEQS + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall); + memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + + return op-ostart; +} + +static size_t +ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} + + + +#if DYNAMIC_BMI2 + +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} + +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} + +#endif + +typedef size_t (*ZSTD_decompressSequences_t)( + ZSTD_DCtx *dctx, void *dst, size_t maxDstSize, + const void *seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset); + +static size_t ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + DEBUGLOG(5, "ZSTD_decompressSequences"); +#if DYNAMIC_BMI2 + if (dctx->bmi2) { + return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + } +#endif + return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} + +static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + DEBUGLOG(5, "ZSTD_decompressSequencesLong"); +#if DYNAMIC_BMI2 + if (dctx->bmi2) { + return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + } +#endif + return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} + +/* ZSTD_getLongOffsetsShare() : + * condition : offTable must be valid + * @return : "share" of long offsets (arbitrarily defined as > (1<<23)) + * compared to maximum possible of (1< 22) total += 1; + } + + assert(tableLog <= OffFSELog); + total <<= (OffFSELog - tableLog); /* scale to OffFSELog */ + + return total; +} + + +static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, const int frame) +{ /* blockType == blockCompressed */ + const BYTE* ip = (const BYTE*)src; + /* isLongOffset must be true if there are long offsets. + * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN. + * We don't expect that to be the case in 64-bit mode. + * In block mode, window size is not known, so we have to be conservative. (note: but it could be evaluated from current-lowLimit) + */ + ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))); + DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); + + if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); + + /* Decode literals section */ + { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); + DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize); + if (ZSTD_isError(litCSize)) return litCSize; + ip += litCSize; + srcSize -= litCSize; + } + + /* Build Decoding Tables */ + { int nbSeq; + size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize); + if (ZSTD_isError(seqHSize)) return seqHSize; + ip += seqHSize; + srcSize -= seqHSize; + + if ( (!frame || dctx->fParams.windowSize > (1<<24)) + && (nbSeq>0) ) { /* could probably use a larger nbSeq limit */ + U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr); + U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ + if (shareLongOffsets >= minShare) + return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); + } + + return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); + } +} + + +static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) +{ + if (dst != dctx->previousDstEnd) { /* not contiguous */ + dctx->dictEnd = dctx->previousDstEnd; + dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); + dctx->prefixStart = dst; + dctx->previousDstEnd = dst; + } +} + +size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t dSize; + ZSTD_checkContinuity(dctx, dst); + dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0); + dctx->previousDstEnd = (char*)dst + dSize; + return dSize; +} + + +/** ZSTD_insertBlock() : + insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ +ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) +{ + ZSTD_checkContinuity(dctx, blockStart); + dctx->previousDstEnd = (const char*)blockStart + blockSize; + return blockSize; +} + + +static size_t ZSTD_generateNxBytes(void* dst, size_t dstCapacity, BYTE byte, size_t length) +{ + if (length > dstCapacity) return ERROR(dstSize_tooSmall); + memset(dst, byte, length); + return length; +} + +/** ZSTD_findFrameCompressedSize() : + * compatible with legacy mode + * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame + * `srcSize` must be at least as large as the frame contained + * @return : the compressed size of the frame starting at `src` */ +size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) +{ +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) + return ZSTD_findFrameCompressedSizeLegacy(src, srcSize); +#endif + if ( (srcSize >= ZSTD_skippableHeaderSize) + && (MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START ) { + return ZSTD_skippableHeaderSize + MEM_readLE32((const BYTE*)src + ZSTD_frameIdSize); + } else { + const BYTE* ip = (const BYTE*)src; + const BYTE* const ipstart = ip; + size_t remainingSize = srcSize; + ZSTD_frameHeader zfh; + + /* Extract Frame Header */ + { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize); + if (ZSTD_isError(ret)) return ret; + if (ret > 0) return ERROR(srcSize_wrong); + } + + ip += zfh.headerSize; + remainingSize -= zfh.headerSize; + + /* Loop on each block */ + while (1) { + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + + if (ZSTD_blockHeaderSize + cBlockSize > remainingSize) + return ERROR(srcSize_wrong); + + ip += ZSTD_blockHeaderSize + cBlockSize; + remainingSize -= ZSTD_blockHeaderSize + cBlockSize; + + if (blockProperties.lastBlock) break; + } + + if (zfh.checksumFlag) { /* Final frame content checksum */ + if (remainingSize < 4) return ERROR(srcSize_wrong); + ip += 4; + remainingSize -= 4; + } + + return ip - ipstart; + } +} + +/*! ZSTD_decompressFrame() : +* @dctx must be properly initialized */ +static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void** srcPtr, size_t *srcSizePtr) +{ + const BYTE* ip = (const BYTE*)(*srcPtr); + BYTE* const ostart = (BYTE* const)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + size_t remainingSize = *srcSizePtr; + + /* check */ + if (remainingSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) + return ERROR(srcSize_wrong); + + /* Frame Header */ + { size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_frameHeaderSize_prefix); + if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; + if (remainingSize < frameHeaderSize+ZSTD_blockHeaderSize) + return ERROR(srcSize_wrong); + CHECK_F( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) ); + ip += frameHeaderSize; remainingSize -= frameHeaderSize; + } + + /* Loop on each block */ + while (1) { + size_t decodedSize; + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + + ip += ZSTD_blockHeaderSize; + remainingSize -= ZSTD_blockHeaderSize; + if (cBlockSize > remainingSize) return ERROR(srcSize_wrong); + + switch(blockProperties.blockType) + { + case bt_compressed: + decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize, /* frame */ 1); + break; + case bt_raw : + decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize); + break; + case bt_rle : + decodedSize = ZSTD_generateNxBytes(op, oend-op, *ip, blockProperties.origSize); + break; + case bt_reserved : + default: + return ERROR(corruption_detected); + } + + if (ZSTD_isError(decodedSize)) return decodedSize; + if (dctx->fParams.checksumFlag) + XXH64_update(&dctx->xxhState, op, decodedSize); + op += decodedSize; + ip += cBlockSize; + remainingSize -= cBlockSize; + if (blockProperties.lastBlock) break; + } + + if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { + if ((U64)(op-ostart) != dctx->fParams.frameContentSize) { + return ERROR(corruption_detected); + } } + if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ + U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); + U32 checkRead; + if (remainingSize<4) return ERROR(checksum_wrong); + checkRead = MEM_readLE32(ip); + if (checkRead != checkCalc) return ERROR(checksum_wrong); + ip += 4; + remainingSize -= 4; + } + + /* Allow caller to get size read */ + *srcPtr = ip; + *srcSizePtr = remainingSize; + return op-ostart; +} + +static const void* ZSTD_DDictDictContent(const ZSTD_DDict* ddict); +static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict); + +static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + const ZSTD_DDict* ddict) +{ + void* const dststart = dst; + int moreThan1Frame = 0; + assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */ + + if (ddict) { + dict = ZSTD_DDictDictContent(ddict); + dictSize = ZSTD_DDictDictSize(ddict); + } + + while (srcSize >= ZSTD_frameHeaderSize_prefix) { + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) { + size_t decodedSize; + size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize); + if (ZSTD_isError(frameSize)) return frameSize; + /* legacy support is not compatible with static dctx */ + if (dctx->staticSize) return ERROR(memory_allocation); + + decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize); + + dst = (BYTE*)dst + decodedSize; + dstCapacity -= decodedSize; + + src = (const BYTE*)src + frameSize; + srcSize -= frameSize; + + continue; + } +#endif + + { U32 const magicNumber = MEM_readLE32(src); + DEBUGLOG(4, "reading magic number %08X (expecting %08X)", + (U32)magicNumber, (U32)ZSTD_MAGICNUMBER); + if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { + size_t skippableSize; + if (srcSize < ZSTD_skippableHeaderSize) + return ERROR(srcSize_wrong); + skippableSize = MEM_readLE32((const BYTE*)src + ZSTD_frameIdSize) + + ZSTD_skippableHeaderSize; + if (srcSize < skippableSize) return ERROR(srcSize_wrong); + + src = (const BYTE *)src + skippableSize; + srcSize -= skippableSize; + continue; + } } + + if (ddict) { + /* we were called from ZSTD_decompress_usingDDict */ + CHECK_F(ZSTD_decompressBegin_usingDDict(dctx, ddict)); + } else { + /* this will initialize correctly with no dict if dict == NULL, so + * use this in all cases but ddict */ + CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize)); + } + ZSTD_checkContinuity(dctx, dst); + + { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, + &src, &srcSize); + if ( (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) + && (moreThan1Frame==1) ) { + /* at least one frame successfully completed, + * but following bytes are garbage : + * it's more likely to be a srcSize error, + * specifying more bytes than compressed size of frame(s). + * This error message replaces ERROR(prefix_unknown), + * which would be confusing, as the first header is actually correct. + * Note that one could be unlucky, it might be a corruption error instead, + * happening right at the place where we expect zstd magic bytes. + * But this is _much_ less likely than a srcSize field error. */ + return ERROR(srcSize_wrong); + } + if (ZSTD_isError(res)) return res; + /* no need to bound check, ZSTD_decompressFrame already has */ + dst = (BYTE*)dst + res; + dstCapacity -= res; + } + moreThan1Frame = 1; + } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ + + if (srcSize) return ERROR(srcSize_wrong); /* input not entirely consumed */ + + return (BYTE*)dst - (BYTE*)dststart; +} + +size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize) +{ + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL); +} + + +size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0); +} + + +size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ +#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1) + size_t regenSize; + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + if (dctx==NULL) return ERROR(memory_allocation); + regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); + ZSTD_freeDCtx(dctx); + return regenSize; +#else /* stack mode */ + ZSTD_DCtx dctx; + ZSTD_initDCtx_internal(&dctx); + return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize); +#endif +} + + +/*-************************************** +* Advanced Streaming Decompression API +* Bufferless and synchronous +****************************************/ +size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; } + +ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) { + switch(dctx->stage) + { + default: /* should not happen */ + assert(0); + case ZSTDds_getFrameHeaderSize: + case ZSTDds_decodeFrameHeader: + return ZSTDnit_frameHeader; + case ZSTDds_decodeBlockHeader: + return ZSTDnit_blockHeader; + case ZSTDds_decompressBlock: + return ZSTDnit_block; + case ZSTDds_decompressLastBlock: + return ZSTDnit_lastBlock; + case ZSTDds_checkChecksum: + return ZSTDnit_checksum; + case ZSTDds_decodeSkippableHeader: + case ZSTDds_skipFrame: + return ZSTDnit_skippableFrame; + } +} + +static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; } + +/** ZSTD_decompressContinue() : + * srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress()) + * @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity) + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (U32)srcSize); + /* Sanity check */ + if (srcSize != dctx->expected) return ERROR(srcSize_wrong); /* not allowed */ + if (dstCapacity) ZSTD_checkContinuity(dctx, dst); + + switch (dctx->stage) + { + case ZSTDds_getFrameHeaderSize : + assert(src != NULL); + if (dctx->format == ZSTD_f_zstd1) { /* allows header */ + assert(srcSize >= ZSTD_frameIdSize); /* to read skippable magic number */ + if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + memcpy(dctx->headerBuffer, src, srcSize); + dctx->expected = ZSTD_skippableHeaderSize - srcSize; /* remaining to load to get full skippable frame header */ + dctx->stage = ZSTDds_decodeSkippableHeader; + return 0; + } } + dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format); + if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize; + memcpy(dctx->headerBuffer, src, srcSize); + dctx->expected = dctx->headerSize - srcSize; + dctx->stage = ZSTDds_decodeFrameHeader; + return 0; + + case ZSTDds_decodeFrameHeader: + assert(src != NULL); + memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); + CHECK_F(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize)); + dctx->expected = ZSTD_blockHeaderSize; + dctx->stage = ZSTDds_decodeBlockHeader; + return 0; + + case ZSTDds_decodeBlockHeader: + { blockProperties_t bp; + size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + dctx->expected = cBlockSize; + dctx->bType = bp.blockType; + dctx->rleSize = bp.origSize; + if (cBlockSize) { + dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock; + return 0; + } + /* empty block */ + if (bp.lastBlock) { + if (dctx->fParams.checksumFlag) { + dctx->expected = 4; + dctx->stage = ZSTDds_checkChecksum; + } else { + dctx->expected = 0; /* end of frame */ + dctx->stage = ZSTDds_getFrameHeaderSize; + } + } else { + dctx->expected = ZSTD_blockHeaderSize; /* jump to next header */ + dctx->stage = ZSTDds_decodeBlockHeader; + } + return 0; + } + + case ZSTDds_decompressLastBlock: + case ZSTDds_decompressBlock: + DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock"); + { size_t rSize; + switch(dctx->bType) + { + case bt_compressed: + DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); + rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1); + break; + case bt_raw : + rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); + break; + case bt_rle : + rSize = ZSTD_setRleBlock(dst, dstCapacity, src, srcSize, dctx->rleSize); + break; + case bt_reserved : /* should never happen */ + default: + return ERROR(corruption_detected); + } + if (ZSTD_isError(rSize)) return rSize; + DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (U32)rSize); + dctx->decodedSize += rSize; + if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize); + + if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ + DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (U32)dctx->decodedSize); + if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { + if (dctx->decodedSize != dctx->fParams.frameContentSize) { + return ERROR(corruption_detected); + } } + if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ + dctx->expected = 4; + dctx->stage = ZSTDds_checkChecksum; + } else { + dctx->expected = 0; /* ends here */ + dctx->stage = ZSTDds_getFrameHeaderSize; + } + } else { + dctx->stage = ZSTDds_decodeBlockHeader; + dctx->expected = ZSTD_blockHeaderSize; + dctx->previousDstEnd = (char*)dst + rSize; + } + return rSize; + } + + case ZSTDds_checkChecksum: + assert(srcSize == 4); /* guaranteed by dctx->expected */ + { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); + U32 const check32 = MEM_readLE32(src); + DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", h32, check32); + if (check32 != h32) return ERROR(checksum_wrong); + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; + } + + case ZSTDds_decodeSkippableHeader: + assert(src != NULL); + assert(srcSize <= ZSTD_skippableHeaderSize); + memcpy(dctx->headerBuffer + (ZSTD_skippableHeaderSize - srcSize), src, srcSize); /* complete skippable header */ + dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_frameIdSize); /* note : dctx->expected can grow seriously large, beyond local buffer size */ + dctx->stage = ZSTDds_skipFrame; + return 0; + + case ZSTDds_skipFrame: + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; + + default: + return ERROR(GENERIC); /* impossible */ + } +} + + +static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + dctx->dictEnd = dctx->previousDstEnd; + dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); + dctx->prefixStart = dict; + dctx->previousDstEnd = (const char*)dict + dictSize; + return 0; +} + +/* ZSTD_loadEntropy() : + * dict : must point at beginning of a valid zstd dictionary + * @return : size of entropy tables read */ +static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy, const void* const dict, size_t const dictSize) +{ + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + + if (dictSize <= 8) return ERROR(dictionary_corrupted); + dictPtr += 8; /* skip header = magic + dictID */ + + + { size_t const hSize = HUF_readDTableX2_wksp( + entropy->hufTable, dictPtr, dictEnd - dictPtr, + entropy->workspace, sizeof(entropy->workspace)); + if (HUF_isError(hSize)) return ERROR(dictionary_corrupted); + dictPtr += hSize; + } + + { short offcodeNCount[MaxOff+1]; + U32 offcodeMaxValue = MaxOff, offcodeLog; + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); + if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); + if (offcodeMaxValue > MaxOff) return ERROR(dictionary_corrupted); + if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted); + ZSTD_buildFSETable(entropy->OFTable, + offcodeNCount, offcodeMaxValue, + OF_base, OF_bits, + offcodeLog); + dictPtr += offcodeHeaderSize; + } + + { short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); + if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); + if (matchlengthMaxValue > MaxML) return ERROR(dictionary_corrupted); + if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted); + ZSTD_buildFSETable(entropy->MLTable, + matchlengthNCount, matchlengthMaxValue, + ML_base, ML_bits, + matchlengthLog); + dictPtr += matchlengthHeaderSize; + } + + { short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog; + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); + if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); + if (litlengthMaxValue > MaxLL) return ERROR(dictionary_corrupted); + if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted); + ZSTD_buildFSETable(entropy->LLTable, + litlengthNCount, litlengthMaxValue, + LL_base, LL_bits, + litlengthLog); + dictPtr += litlengthHeaderSize; + } + + if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted); + { int i; + size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); + for (i=0; i<3; i++) { + U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; + if (rep==0 || rep >= dictContentSize) return ERROR(dictionary_corrupted); + entropy->rep[i] = rep; + } } + + return dictPtr - (const BYTE*)dict; +} + +static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize); + { U32 const magic = MEM_readLE32(dict); + if (magic != ZSTD_MAGIC_DICTIONARY) { + return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */ + } } + dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_frameIdSize); + + /* load entropy tables */ + { size_t const eSize = ZSTD_loadEntropy(&dctx->entropy, dict, dictSize); + if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted); + dict = (const char*)dict + eSize; + dictSize -= eSize; + } + dctx->litEntropy = dctx->fseEntropy = 1; + + /* reference dictionary content */ + return ZSTD_refDictContent(dctx, dict, dictSize); +} + +/* Note : this function cannot fail */ +size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) +{ + assert(dctx != NULL); + dctx->expected = ZSTD_startingInputLength(dctx->format); /* dctx->format must be properly set */ + dctx->stage = ZSTDds_getFrameHeaderSize; + dctx->decodedSize = 0; + dctx->previousDstEnd = NULL; + dctx->prefixStart = NULL; + dctx->virtualStart = NULL; + dctx->dictEnd = NULL; + dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ + dctx->litEntropy = dctx->fseEntropy = 0; + dctx->dictID = 0; + ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); + memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ + dctx->LLTptr = dctx->entropy.LLTable; + dctx->MLTptr = dctx->entropy.MLTable; + dctx->OFTptr = dctx->entropy.OFTable; + dctx->HUFptr = dctx->entropy.hufTable; + return 0; +} + +size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + CHECK_F( ZSTD_decompressBegin(dctx) ); + if (dict && dictSize) + CHECK_E(ZSTD_decompress_insertDictionary(dctx, dict, dictSize), dictionary_corrupted); + return 0; +} + + +/* ====== ZSTD_DDict ====== */ + +struct ZSTD_DDict_s { + void* dictBuffer; + const void* dictContent; + size_t dictSize; + ZSTD_entropyDTables_t entropy; + U32 dictID; + U32 entropyPresent; + ZSTD_customMem cMem; +}; /* typedef'd to ZSTD_DDict within "zstd.h" */ + +static const void* ZSTD_DDictDictContent(const ZSTD_DDict* ddict) +{ + return ddict->dictContent; +} + +static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict) +{ + return ddict->dictSize; +} + +size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dstDCtx, const ZSTD_DDict* ddict) +{ + CHECK_F( ZSTD_decompressBegin(dstDCtx) ); + if (ddict) { /* support begin on NULL */ + dstDCtx->dictID = ddict->dictID; + dstDCtx->prefixStart = ddict->dictContent; + dstDCtx->virtualStart = ddict->dictContent; + dstDCtx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; + dstDCtx->previousDstEnd = dstDCtx->dictEnd; + if (ddict->entropyPresent) { + dstDCtx->litEntropy = 1; + dstDCtx->fseEntropy = 1; + dstDCtx->LLTptr = ddict->entropy.LLTable; + dstDCtx->MLTptr = ddict->entropy.MLTable; + dstDCtx->OFTptr = ddict->entropy.OFTable; + dstDCtx->HUFptr = ddict->entropy.hufTable; + dstDCtx->entropy.rep[0] = ddict->entropy.rep[0]; + dstDCtx->entropy.rep[1] = ddict->entropy.rep[1]; + dstDCtx->entropy.rep[2] = ddict->entropy.rep[2]; + } else { + dstDCtx->litEntropy = 0; + dstDCtx->fseEntropy = 0; + } + } + return 0; +} + +static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict, ZSTD_dictContentType_e dictContentType) +{ + ddict->dictID = 0; + ddict->entropyPresent = 0; + if (dictContentType == ZSTD_dct_rawContent) return 0; + + if (ddict->dictSize < 8) { + if (dictContentType == ZSTD_dct_fullDict) + return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ + return 0; /* pure content mode */ + } + { U32 const magic = MEM_readLE32(ddict->dictContent); + if (magic != ZSTD_MAGIC_DICTIONARY) { + if (dictContentType == ZSTD_dct_fullDict) + return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ + return 0; /* pure content mode */ + } + } + ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_frameIdSize); + + /* load entropy tables */ + CHECK_E( ZSTD_loadEntropy(&ddict->entropy, ddict->dictContent, ddict->dictSize), dictionary_corrupted ); + ddict->entropyPresent = 1; + return 0; +} + + +static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) { + ddict->dictBuffer = NULL; + ddict->dictContent = dict; + } else { + void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem); + ddict->dictBuffer = internalBuffer; + ddict->dictContent = internalBuffer; + if (!internalBuffer) return ERROR(memory_allocation); + memcpy(internalBuffer, dict, dictSize); + } + ddict->dictSize = dictSize; + ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ + + /* parse dictionary content */ + CHECK_F( ZSTD_loadEntropy_inDDict(ddict, dictContentType) ); + + return 0; +} + +ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem) +{ + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + + { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem); + if (!ddict) return NULL; + ddict->cMem = customMem; + + if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, dictLoadMethod, dictContentType) )) { + ZSTD_freeDDict(ddict); + return NULL; + } + + return ddict; + } +} + +/*! ZSTD_createDDict() : +* Create a digested dictionary, to start decompression without startup delay. +* `dict` content is copied inside DDict. +* Consequently, `dict` can be released after `ZSTD_DDict` creation */ +ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator); +} + +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, to start decompression without startup delay. + * Dictionary content is simply referenced, it will be accessed during decompression. + * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */ +ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator); +} + + +const ZSTD_DDict* ZSTD_initStaticDDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + size_t const neededSpace = + sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); + ZSTD_DDict* const ddict = (ZSTD_DDict*)workspace; + assert(workspace != NULL); + assert(dict != NULL); + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ + if (workspaceSize < neededSpace) return NULL; + if (dictLoadMethod == ZSTD_dlm_byCopy) { + memcpy(ddict+1, dict, dictSize); /* local copy */ + dict = ddict+1; + } + if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, ZSTD_dlm_byRef, dictContentType) )) + return NULL; + return ddict; +} + + +size_t ZSTD_freeDDict(ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem const cMem = ddict->cMem; + ZSTD_free(ddict->dictBuffer, cMem); + ZSTD_free(ddict, cMem); + return 0; + } +} + +/*! ZSTD_estimateDDictSize() : + * Estimate amount of memory that will be needed to create a dictionary for decompression. + * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */ +size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod) +{ + return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); +} + +size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; /* support sizeof on NULL */ + return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ; +} + +/*! ZSTD_getDictID_fromDict() : + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. + * It can still be loaded, but as a content-only dictionary. */ +unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize) +{ + if (dictSize < 8) return 0; + if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0; + return MEM_readLE32((const char*)dict + ZSTD_frameIdSize); +} + +/*! ZSTD_getDictID_fromDDict() : + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; + return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); +} + +/*! ZSTD_getDictID_fromFrame() : + * Provides the dictID required to decompresse frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary (most common case). + * - The frame was built with dictID intentionally removed. + * Needed dictionary is a hidden information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, frame header could not be decoded. + * Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`. + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use + * ZSTD_getFrameHeader(), which will provide a more precise error code. */ +unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize) +{ + ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 }; + size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize); + if (ZSTD_isError(hError)) return 0; + return zfp.dictID; +} + + +/*! ZSTD_decompress_usingDDict() : +* Decompression using a pre-digested Dictionary +* Use dictionary without significant overhead. */ +size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict) +{ + /* pass content and size in case legacy frames are encountered */ + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, + NULL, 0, + ddict); +} + + +/*===================================== +* Streaming decompression +*====================================*/ + +ZSTD_DStream* ZSTD_createDStream(void) +{ + DEBUGLOG(3, "ZSTD_createDStream"); + return ZSTD_createDStream_advanced(ZSTD_defaultCMem); +} + +ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize) +{ + return ZSTD_initStaticDCtx(workspace, workspaceSize); +} + +ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem) +{ + return ZSTD_createDCtx_advanced(customMem); +} + +size_t ZSTD_freeDStream(ZSTD_DStream* zds) +{ + return ZSTD_freeDCtx(zds); +} + + +/* *** Initialization *** */ + +size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; } +size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; } + +size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) +{ + if (dctx->streamStage != zdss_init) return ERROR(stage_wrong); + ZSTD_freeDDict(dctx->ddictLocal); + if (dict && dictSize >= 8) { + dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem); + if (dctx->ddictLocal == NULL) return ERROR(memory_allocation); + } else { + dctx->ddictLocal = NULL; + } + dctx->ddict = dctx->ddictLocal; + return 0; +} + +size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); +} + +size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); +} + +size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) +{ + return ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType); +} + +size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize) +{ + return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent); +} + + +/* ZSTD_initDStream_usingDict() : + * return : expected size, aka ZSTD_frameHeaderSize_prefix. + * this function cannot fail */ +size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) +{ + DEBUGLOG(4, "ZSTD_initDStream_usingDict"); + zds->streamStage = zdss_init; + zds->noForwardProgress = 0; + CHECK_F( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) ); + return ZSTD_frameHeaderSize_prefix; +} + +/* note : this variant can't fail */ +size_t ZSTD_initDStream(ZSTD_DStream* zds) +{ + DEBUGLOG(4, "ZSTD_initDStream"); + return ZSTD_initDStream_usingDict(zds, NULL, 0); +} + +size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + if (dctx->streamStage != zdss_init) return ERROR(stage_wrong); + dctx->ddict = ddict; + return 0; +} + +/* ZSTD_initDStream_usingDDict() : + * ddict will just be referenced, and must outlive decompression session + * this function cannot fail */ +size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) +{ + size_t const initResult = ZSTD_initDStream(dctx); + dctx->ddict = ddict; + return initResult; +} + +/* ZSTD_resetDStream() : + * return : expected size, aka ZSTD_frameHeaderSize_prefix. + * this function cannot fail */ +size_t ZSTD_resetDStream(ZSTD_DStream* dctx) +{ + DEBUGLOG(4, "ZSTD_resetDStream"); + dctx->streamStage = zdss_loadHeader; + dctx->lhSize = dctx->inPos = dctx->outStart = dctx->outEnd = 0; + dctx->legacyVersion = 0; + dctx->hostageByte = 0; + return ZSTD_frameHeaderSize_prefix; +} + +size_t ZSTD_setDStreamParameter(ZSTD_DStream* dctx, + ZSTD_DStreamParameter_e paramType, unsigned paramValue) +{ + if (dctx->streamStage != zdss_init) return ERROR(stage_wrong); + switch(paramType) + { + default : return ERROR(parameter_unsupported); + case DStream_p_maxWindowSize : + DEBUGLOG(4, "setting maxWindowSize = %u KB", paramValue >> 10); + dctx->maxWindowSize = paramValue ? paramValue : (U32)(-1); + break; + } + return 0; +} + +size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize) +{ + if (dctx->streamStage != zdss_init) return ERROR(stage_wrong); + dctx->maxWindowSize = maxWindowSize; + return 0; +} + +size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) +{ + DEBUGLOG(4, "ZSTD_DCtx_setFormat : %u", (unsigned)format); + if (dctx->streamStage != zdss_init) return ERROR(stage_wrong); + dctx->format = format; + return 0; +} + + +size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx) +{ + return ZSTD_sizeof_DCtx(dctx); +} + +size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) +{ + size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2); + unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); + size_t const minRBSize = (size_t) neededSize; + if ((unsigned long long)minRBSize != neededSize) return ERROR(frameParameter_windowTooLarge); + return minRBSize; +} + +size_t ZSTD_estimateDStreamSize(size_t windowSize) +{ + size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + size_t const inBuffSize = blockSize; /* no block can be larger */ + size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN); + return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize; +} + +size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize) +{ + U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; /* note : should be user-selectable */ + ZSTD_frameHeader zfh; + size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize); + if (ZSTD_isError(err)) return err; + if (err>0) return ERROR(srcSize_wrong); + if (zfh.windowSize > windowSizeMax) + return ERROR(frameParameter_windowTooLarge); + return ZSTD_estimateDStreamSize((size_t)zfh.windowSize); +} + + +/* ***** Decompression ***** */ + +MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + size_t const length = MIN(dstCapacity, srcSize); + memcpy(dst, src, length); + return length; +} + + +size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + const char* const istart = (const char*)(input->src) + input->pos; + const char* const iend = (const char*)(input->src) + input->size; + const char* ip = istart; + char* const ostart = (char*)(output->dst) + output->pos; + char* const oend = (char*)(output->dst) + output->size; + char* op = ostart; + U32 someMoreWork = 1; + + DEBUGLOG(5, "ZSTD_decompressStream"); + if (input->pos > input->size) { /* forbidden */ + DEBUGLOG(5, "in: pos: %u vs size: %u", + (U32)input->pos, (U32)input->size); + return ERROR(srcSize_wrong); + } + if (output->pos > output->size) { /* forbidden */ + DEBUGLOG(5, "out: pos: %u vs size: %u", + (U32)output->pos, (U32)output->size); + return ERROR(dstSize_tooSmall); + } + DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos)); + + while (someMoreWork) { + switch(zds->streamStage) + { + case zdss_init : + DEBUGLOG(5, "stage zdss_init => transparent reset "); + ZSTD_resetDStream(zds); /* transparent reset on starting decoding a new frame */ + /* fall-through */ + + case zdss_loadHeader : + DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip)); +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) + if (zds->legacyVersion) { + /* legacy support is incompatible with static dctx */ + if (zds->staticSize) return ERROR(memory_allocation); + { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input); + if (hint==0) zds->streamStage = zdss_init; + return hint; + } } +#endif + { size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format); + DEBUGLOG(5, "header size : %u", (U32)hSize); + if (ZSTD_isError(hSize)) { +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) + U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart); + if (legacyVersion) { + const void* const dict = zds->ddict ? zds->ddict->dictContent : NULL; + size_t const dictSize = zds->ddict ? zds->ddict->dictSize : 0; + DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion); + /* legacy support is incompatible with static dctx */ + if (zds->staticSize) return ERROR(memory_allocation); + CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext, + zds->previousLegacyVersion, legacyVersion, + dict, dictSize)); + zds->legacyVersion = zds->previousLegacyVersion = legacyVersion; + { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input); + if (hint==0) zds->streamStage = zdss_init; /* or stay in stage zdss_loadHeader */ + return hint; + } } +#endif + return hSize; /* error */ + } + if (hSize != 0) { /* need more input */ + size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */ + size_t const remainingInput = (size_t)(iend-ip); + assert(iend >= ip); + if (toLoad > remainingInput) { /* not enough input to load full header */ + if (remainingInput > 0) { + memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput); + zds->lhSize += remainingInput; + } + input->pos = input->size; + return (MAX(ZSTD_frameHeaderSize_min, hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ + } + assert(ip != NULL); + memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad; + break; + } } + + /* check for single-pass mode opportunity */ + if (zds->fParams.frameContentSize && zds->fParams.windowSize /* skippable frame if == 0 */ + && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { + size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart); + if (cSize <= (size_t)(iend-istart)) { + /* shortcut : using single-pass mode */ + size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, zds->ddict); + if (ZSTD_isError(decompressedSize)) return decompressedSize; + DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") + ip = istart + cSize; + op += decompressedSize; + zds->expected = 0; + zds->streamStage = zdss_init; + someMoreWork = 0; + break; + } } + + /* Consume header (see ZSTDds_decodeFrameHeader) */ + DEBUGLOG(4, "Consume header"); + CHECK_F(ZSTD_decompressBegin_usingDDict(zds, zds->ddict)); + + if ((MEM_readLE32(zds->headerBuffer) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_frameIdSize); + zds->stage = ZSTDds_skipFrame; + } else { + CHECK_F(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize)); + zds->expected = ZSTD_blockHeaderSize; + zds->stage = ZSTDds_decodeBlockHeader; + } + + /* control buffer memory usage */ + DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)", + (U32)(zds->fParams.windowSize >>10), + (U32)(zds->maxWindowSize >> 10) ); + zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); + if (zds->fParams.windowSize > zds->maxWindowSize) return ERROR(frameParameter_windowTooLarge); + + /* Adapt buffer sizes to frame header instructions */ + { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); + size_t const neededOutBuffSize = ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize); + if ((zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize)) { + size_t const bufferSize = neededInBuffSize + neededOutBuffSize; + DEBUGLOG(4, "inBuff : from %u to %u", + (U32)zds->inBuffSize, (U32)neededInBuffSize); + DEBUGLOG(4, "outBuff : from %u to %u", + (U32)zds->outBuffSize, (U32)neededOutBuffSize); + if (zds->staticSize) { /* static DCtx */ + DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize); + assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */ + if (bufferSize > zds->staticSize - sizeof(ZSTD_DCtx)) + return ERROR(memory_allocation); + } else { + ZSTD_free(zds->inBuff, zds->customMem); + zds->inBuffSize = 0; + zds->outBuffSize = 0; + zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem); + if (zds->inBuff == NULL) return ERROR(memory_allocation); + } + zds->inBuffSize = neededInBuffSize; + zds->outBuff = zds->inBuff + zds->inBuffSize; + zds->outBuffSize = neededOutBuffSize; + } } + zds->streamStage = zdss_read; + /* fall-through */ + + case zdss_read: + DEBUGLOG(5, "stage zdss_read"); + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds); + DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize); + if (neededInSize==0) { /* end of frame */ + zds->streamStage = zdss_init; + someMoreWork = 0; + break; + } + if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ + int const isSkipFrame = ZSTD_isSkipFrame(zds); + size_t const decodedSize = ZSTD_decompressContinue(zds, + zds->outBuff + zds->outStart, (isSkipFrame ? 0 : zds->outBuffSize - zds->outStart), + ip, neededInSize); + if (ZSTD_isError(decodedSize)) return decodedSize; + ip += neededInSize; + if (!decodedSize && !isSkipFrame) break; /* this was just a header */ + zds->outEnd = zds->outStart + decodedSize; + zds->streamStage = zdss_flush; + break; + } } + if (ip==iend) { someMoreWork = 0; break; } /* no more input */ + zds->streamStage = zdss_load; + /* fall-through */ + + case zdss_load: + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds); + size_t const toLoad = neededInSize - zds->inPos; + int const isSkipFrame = ZSTD_isSkipFrame(zds); + size_t loadedSize; + if (isSkipFrame) { + loadedSize = MIN(toLoad, (size_t)(iend-ip)); + } else { + if (toLoad > zds->inBuffSize - zds->inPos) return ERROR(corruption_detected); /* should never happen */ + loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip); + } + ip += loadedSize; + zds->inPos += loadedSize; + if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ + + /* decode loaded input */ + { size_t const decodedSize = ZSTD_decompressContinue(zds, + zds->outBuff + zds->outStart, zds->outBuffSize - zds->outStart, + zds->inBuff, neededInSize); + if (ZSTD_isError(decodedSize)) return decodedSize; + zds->inPos = 0; /* input is consumed */ + if (!decodedSize && !isSkipFrame) { zds->streamStage = zdss_read; break; } /* this was just a header */ + zds->outEnd = zds->outStart + decodedSize; + } } + zds->streamStage = zdss_flush; + /* fall-through */ + + case zdss_flush: + { size_t const toFlushSize = zds->outEnd - zds->outStart; + size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize); + op += flushedSize; + zds->outStart += flushedSize; + if (flushedSize == toFlushSize) { /* flush completed */ + zds->streamStage = zdss_read; + if ( (zds->outBuffSize < zds->fParams.frameContentSize) + && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) { + DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)", + (int)(zds->outBuffSize - zds->outStart), + (U32)zds->fParams.blockSizeMax); + zds->outStart = zds->outEnd = 0; + } + break; + } } + /* cannot complete flush */ + someMoreWork = 0; + break; + + default: return ERROR(GENERIC); /* impossible */ + } } + + /* result */ + input->pos = (size_t)(ip - (const char*)(input->src)); + output->pos = (size_t)(op - (char*)(output->dst)); + if ((ip==istart) && (op==ostart)) { /* no forward progress */ + zds->noForwardProgress ++; + if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) { + if (op==oend) return ERROR(dstSize_tooSmall); + if (ip==iend) return ERROR(srcSize_wrong); + assert(0); + } + } else { + zds->noForwardProgress = 0; + } + { size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds); + if (!nextSrcSizeHint) { /* frame fully decoded */ + if (zds->outEnd == zds->outStart) { /* output fully flushed */ + if (zds->hostageByte) { + if (input->pos >= input->size) { + /* can't release hostage (not present) */ + zds->streamStage = zdss_read; + return 1; + } + input->pos++; /* release hostage */ + } /* zds->hostageByte */ + return 0; + } /* zds->outEnd == zds->outStart */ + if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */ + input->pos--; /* note : pos > 0, otherwise, impossible to finish reading last block */ + zds->hostageByte=1; + } + return 1; + } /* nextSrcSizeHint==0 */ + nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block); /* preload header of next block */ + assert(zds->inPos <= nextSrcSizeHint); + nextSrcSizeHint -= zds->inPos; /* part already loaded*/ + return nextSrcSizeHint; + } +} + + +size_t ZSTD_decompress_generic(ZSTD_DCtx* dctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + return ZSTD_decompressStream(dctx, output, input); +} + +size_t ZSTD_decompress_generic_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos) +{ + ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; + ZSTD_inBuffer input = { src, srcSize, *srcPos }; + /* ZSTD_compress_generic() will check validity of dstPos and srcPos */ + size_t const cErr = ZSTD_decompress_generic(dctx, &output, &input); + *dstPos = output.pos; + *srcPos = input.pos; + return cErr; +} + +void ZSTD_DCtx_reset(ZSTD_DCtx* dctx) +{ + (void)ZSTD_initDStream(dctx); + dctx->format = ZSTD_f_zstd1; + dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; +} diff --git a/deps/SZ/zstd/deprecated/zbuff.h b/deps/SZ/zstd/deprecated/zbuff.h new file mode 100644 index 0000000000000000000000000000000000000000..a93115da4a1c6d134d5eca36429bf4b211745951 --- /dev/null +++ b/deps/SZ/zstd/deprecated/zbuff.h @@ -0,0 +1,213 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* *************************************************************** +* NOTES/WARNINGS +******************************************************************/ +/* The streaming API defined here is deprecated. + * Consider migrating towards ZSTD_compressStream() API in `zstd.h` + * See 'lib/README.md'. + *****************************************************************/ + + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef ZSTD_BUFFERED_H_23987 +#define ZSTD_BUFFERED_H_23987 + +/* ************************************* +* Dependencies +***************************************/ +#include /* size_t */ +#include "zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */ + + +/* *************************************************************** +* Compiler specifics +*****************************************************************/ +/* Deprecation warnings */ +/* Should these warnings be a problem, + it is generally possible to disable them, + typically with -Wno-deprecated-declarations for gcc + or _CRT_SECURE_NO_WARNINGS in Visual. + Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS */ +#ifdef ZBUFF_DISABLE_DEPRECATE_WARNINGS +# define ZBUFF_DEPRECATED(message) ZSTDLIB_API /* disable deprecation warnings */ +#else +# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ +# define ZBUFF_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API +# elif (defined(__GNUC__) && (__GNUC__ >= 5)) || defined(__clang__) +# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message))) +# elif defined(__GNUC__) && (__GNUC__ >= 3) +# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated)) +# elif defined(_MSC_VER) +# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __declspec(deprecated(message)) +# else +# pragma message("WARNING: You need to implement ZBUFF_DEPRECATED for this compiler") +# define ZBUFF_DEPRECATED(message) ZSTDLIB_API +# endif +#endif /* ZBUFF_DISABLE_DEPRECATE_WARNINGS */ + + +/* ************************************* +* Streaming functions +***************************************/ +/* This is the easier "buffered" streaming API, +* using an internal buffer to lift all restrictions on user-provided buffers +* which can be any size, any place, for both input and output. +* ZBUFF and ZSTD are 100% interoperable, +* frames created by one can be decoded by the other one */ + +typedef ZSTD_CStream ZBUFF_CCtx; +ZBUFF_DEPRECATED("use ZSTD_createCStream") ZBUFF_CCtx* ZBUFF_createCCtx(void); +ZBUFF_DEPRECATED("use ZSTD_freeCStream") size_t ZBUFF_freeCCtx(ZBUFF_CCtx* cctx); + +ZBUFF_DEPRECATED("use ZSTD_initCStream") size_t ZBUFF_compressInit(ZBUFF_CCtx* cctx, int compressionLevel); +ZBUFF_DEPRECATED("use ZSTD_initCStream_usingDict") size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); + +ZBUFF_DEPRECATED("use ZSTD_compressStream") size_t ZBUFF_compressContinue(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr, const void* src, size_t* srcSizePtr); +ZBUFF_DEPRECATED("use ZSTD_flushStream") size_t ZBUFF_compressFlush(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr); +ZBUFF_DEPRECATED("use ZSTD_endStream") size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr); + +/*-************************************************* +* Streaming compression - howto +* +* A ZBUFF_CCtx object is required to track streaming operation. +* Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources. +* ZBUFF_CCtx objects can be reused multiple times. +* +* Start by initializing ZBUF_CCtx. +* Use ZBUFF_compressInit() to start a new compression operation. +* Use ZBUFF_compressInitDictionary() for a compression which requires a dictionary. +* +* Use ZBUFF_compressContinue() repetitively to consume input stream. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written within *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present again remaining data. +* The content of `dst` will be overwritten (up to *dstCapacityPtr) at each call, so save its content if it matters or change @dst . +* @return : a hint to preferred nb of bytes to use as input for next function call (it's just a hint, to improve latency) +* or an error code, which can be tested using ZBUFF_isError(). +* +* At any moment, it's possible to flush whatever data remains within buffer, using ZBUFF_compressFlush(). +* The nb of bytes written into `dst` will be reported into *dstCapacityPtr. +* Note that the function cannot output more than *dstCapacityPtr, +* therefore, some content might still be left into internal buffer if *dstCapacityPtr is too small. +* @return : nb of bytes still present into internal buffer (0 if it's empty) +* or an error code, which can be tested using ZBUFF_isError(). +* +* ZBUFF_compressEnd() instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* The epilogue is required for decoders to consider a frame completed. +* Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *dstCapacityPtr is too small. +* In which case, call again ZBUFF_compressFlush() to complete the flush. +* @return : nb of bytes still present into internal buffer (0 if it's empty) +* or an error code, which can be tested using ZBUFF_isError(). +* +* Hint : _recommended buffer_ sizes (not compulsory) : ZBUFF_recommendedCInSize() / ZBUFF_recommendedCOutSize() +* input : ZBUFF_recommendedCInSize==128 KB block size is the internal unit, use this value to reduce intermediate stages (better latency) +* output : ZBUFF_recommendedCOutSize==ZSTD_compressBound(128 KB) + 3 + 3 : ensures it's always possible to write/flush/end a full block. Skip some buffering. +* By using both, it ensures that input will be entirely consumed, and output will always contain the result, reducing intermediate buffering. +* **************************************************/ + + +typedef ZSTD_DStream ZBUFF_DCtx; +ZBUFF_DEPRECATED("use ZSTD_createDStream") ZBUFF_DCtx* ZBUFF_createDCtx(void); +ZBUFF_DEPRECATED("use ZSTD_freeDStream") size_t ZBUFF_freeDCtx(ZBUFF_DCtx* dctx); + +ZBUFF_DEPRECATED("use ZSTD_initDStream") size_t ZBUFF_decompressInit(ZBUFF_DCtx* dctx); +ZBUFF_DEPRECATED("use ZSTD_initDStream_usingDict") size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* dctx, const void* dict, size_t dictSize); + +ZBUFF_DEPRECATED("use ZSTD_decompressStream") size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr); + +/*-*************************************************************************** +* Streaming decompression howto +* +* A ZBUFF_DCtx object is required to track streaming operations. +* Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources. +* Use ZBUFF_decompressInit() to start a new decompression operation, +* or ZBUFF_decompressInitDictionary() if decompression requires a dictionary. +* Note that ZBUFF_DCtx objects can be re-init multiple times. +* +* Use ZBUFF_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`. +* @return : 0 when a frame is completely decoded and fully flushed, +* 1 when there is still some data left within internal buffer to flush, +* >1 when more data is expected, with value being a suggested next input size (it's just a hint, which helps latency), +* or an error code, which can be tested using ZBUFF_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize() and ZBUFF_recommendedDOutSize() +* output : ZBUFF_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded. +* input : ZBUFF_recommendedDInSize == 128KB + 3; +* just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* *******************************************************************************/ + + +/* ************************************* +* Tool functions +***************************************/ +ZBUFF_DEPRECATED("use ZSTD_isError") unsigned ZBUFF_isError(size_t errorCode); +ZBUFF_DEPRECATED("use ZSTD_getErrorName") const char* ZBUFF_getErrorName(size_t errorCode); + +/** Functions below provide recommended buffer sizes for Compression or Decompression operations. +* These sizes are just hints, they tend to offer better latency */ +ZBUFF_DEPRECATED("use ZSTD_CStreamInSize") size_t ZBUFF_recommendedCInSize(void); +ZBUFF_DEPRECATED("use ZSTD_CStreamOutSize") size_t ZBUFF_recommendedCOutSize(void); +ZBUFF_DEPRECATED("use ZSTD_DStreamInSize") size_t ZBUFF_recommendedDInSize(void); +ZBUFF_DEPRECATED("use ZSTD_DStreamOutSize") size_t ZBUFF_recommendedDOutSize(void); + +#endif /* ZSTD_BUFFERED_H_23987 */ + + +#ifdef ZBUFF_STATIC_LINKING_ONLY +#ifndef ZBUFF_STATIC_H_30298098432 +#define ZBUFF_STATIC_H_30298098432 + +/* ==================================================================================== + * The definitions in this section are considered experimental. + * They should never be used in association with a dynamic library, as they may change in the future. + * They are provided for advanced usages. + * Use them only in association with static linking. + * ==================================================================================== */ + +/*--- Dependency ---*/ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters, ZSTD_customMem */ +#include "zstd.h" + + +/*--- Custom memory allocator ---*/ +/*! ZBUFF_createCCtx_advanced() : + * Create a ZBUFF compression context using external alloc and free functions */ +ZBUFF_DEPRECATED("use ZSTD_createCStream_advanced") ZBUFF_CCtx* ZBUFF_createCCtx_advanced(ZSTD_customMem customMem); + +/*! ZBUFF_createDCtx_advanced() : + * Create a ZBUFF decompression context using external alloc and free functions */ +ZBUFF_DEPRECATED("use ZSTD_createDStream_advanced") ZBUFF_DCtx* ZBUFF_createDCtx_advanced(ZSTD_customMem customMem); + + +/*--- Advanced Streaming Initialization ---*/ +ZBUFF_DEPRECATED("use ZSTD_initDStream_usingDict") size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize); + + +#endif /* ZBUFF_STATIC_H_30298098432 */ +#endif /* ZBUFF_STATIC_LINKING_ONLY */ + + +#if defined (__cplusplus) +} +#endif diff --git a/deps/SZ/zstd/deprecated/zbuff_common.c b/deps/SZ/zstd/deprecated/zbuff_common.c new file mode 100644 index 0000000000000000000000000000000000000000..661b9b0e18c5541fbd0f172d3da11c52bdc4ece8 --- /dev/null +++ b/deps/SZ/zstd/deprecated/zbuff_common.c @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/*-************************************* +* Dependencies +***************************************/ +#include "error_private.h" +#include "zbuff.h" + +/*-**************************************** +* ZBUFF Error Management (deprecated) +******************************************/ + +/*! ZBUFF_isError() : +* tells if a return value is an error code */ +unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); } +/*! ZBUFF_getErrorName() : +* provides error code string from function result (useful for debugging) */ +const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } diff --git a/deps/SZ/zstd/deprecated/zbuff_compress.c b/deps/SZ/zstd/deprecated/zbuff_compress.c new file mode 100644 index 0000000000000000000000000000000000000000..f39c60d89f60412ed959020b6c4d7cdb1c469698 --- /dev/null +++ b/deps/SZ/zstd/deprecated/zbuff_compress.c @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +/* ************************************* +* Dependencies +***************************************/ +#define ZBUFF_STATIC_LINKING_ONLY +#include "zbuff.h" + + +/*-*********************************************************** +* Streaming compression +* +* A ZBUFF_CCtx object is required to track streaming operation. +* Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources. +* Use ZBUFF_compressInit() to start a new compression operation. +* ZBUFF_CCtx objects can be reused multiple times. +* +* Use ZBUFF_compressContinue() repetitively to consume your input. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input. +* The content of dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change dst . +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency) +* or an error code, which can be tested using ZBUFF_isError(). +* +* ZBUFF_compressFlush() can be used to instruct ZBUFF to compress and output whatever remains within its buffer. +* Note that it will not output more than *dstCapacityPtr. +* Therefore, some content might still be left into its internal buffer if dst buffer is too small. +* @return : nb of bytes still present into internal buffer (0 if it's empty) +* or an error code, which can be tested using ZBUFF_isError(). +* +* ZBUFF_compressEnd() instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *dstCapacityPtr is too small. +* @return : nb of bytes still present into internal buffer (0 if it's empty) +* or an error code, which can be tested using ZBUFF_isError(). +* +* Hint : recommended buffer sizes (not compulsory) +* input : ZSTD_BLOCKSIZE_MAX (128 KB), internal unit size, it improves latency to use this value. +* output : ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + ZBUFF_endFrameSize : ensures it's always possible to write/flush/end a full block at best speed. +* ***********************************************************/ + +ZBUFF_CCtx* ZBUFF_createCCtx(void) +{ + return ZSTD_createCStream(); +} + +ZBUFF_CCtx* ZBUFF_createCCtx_advanced(ZSTD_customMem customMem) +{ + return ZSTD_createCStream_advanced(customMem); +} + +size_t ZBUFF_freeCCtx(ZBUFF_CCtx* zbc) +{ + return ZSTD_freeCStream(zbc); +} + + +/* ====== Initialization ====== */ + +size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize) +{ + if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* preserve "0 == unknown" behavior */ + return ZSTD_initCStream_advanced(zbc, dict, dictSize, params, pledgedSrcSize); +} + + +size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel) +{ + return ZSTD_initCStream_usingDict(zbc, dict, dictSize, compressionLevel); +} + +size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel) +{ + return ZSTD_initCStream(zbc, compressionLevel); +} + +/* ====== Compression ====== */ + + +size_t ZBUFF_compressContinue(ZBUFF_CCtx* zbc, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr) +{ + size_t result; + ZSTD_outBuffer outBuff; + ZSTD_inBuffer inBuff; + outBuff.dst = dst; + outBuff.pos = 0; + outBuff.size = *dstCapacityPtr; + inBuff.src = src; + inBuff.pos = 0; + inBuff.size = *srcSizePtr; + result = ZSTD_compressStream(zbc, &outBuff, &inBuff); + *dstCapacityPtr = outBuff.pos; + *srcSizePtr = inBuff.pos; + return result; +} + + + +/* ====== Finalize ====== */ + +size_t ZBUFF_compressFlush(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr) +{ + size_t result; + ZSTD_outBuffer outBuff; + outBuff.dst = dst; + outBuff.pos = 0; + outBuff.size = *dstCapacityPtr; + result = ZSTD_flushStream(zbc, &outBuff); + *dstCapacityPtr = outBuff.pos; + return result; +} + + +size_t ZBUFF_compressEnd(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr) +{ + size_t result; + ZSTD_outBuffer outBuff; + outBuff.dst = dst; + outBuff.pos = 0; + outBuff.size = *dstCapacityPtr; + result = ZSTD_endStream(zbc, &outBuff); + *dstCapacityPtr = outBuff.pos; + return result; +} + + + +/* ************************************* +* Tool functions +***************************************/ +size_t ZBUFF_recommendedCInSize(void) { return ZSTD_CStreamInSize(); } +size_t ZBUFF_recommendedCOutSize(void) { return ZSTD_CStreamOutSize(); } diff --git a/deps/SZ/zstd/deprecated/zbuff_decompress.c b/deps/SZ/zstd/deprecated/zbuff_decompress.c new file mode 100644 index 0000000000000000000000000000000000000000..923c22b73c5767cea42514d15c4c1a41775a6c40 --- /dev/null +++ b/deps/SZ/zstd/deprecated/zbuff_decompress.c @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +/* ************************************* +* Dependencies +***************************************/ +#define ZBUFF_STATIC_LINKING_ONLY +#include "zbuff.h" + + +ZBUFF_DCtx* ZBUFF_createDCtx(void) +{ + return ZSTD_createDStream(); +} + +ZBUFF_DCtx* ZBUFF_createDCtx_advanced(ZSTD_customMem customMem) +{ + return ZSTD_createDStream_advanced(customMem); +} + +size_t ZBUFF_freeDCtx(ZBUFF_DCtx* zbd) +{ + return ZSTD_freeDStream(zbd); +} + + +/* *** Initialization *** */ + +size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* zbd, const void* dict, size_t dictSize) +{ + return ZSTD_initDStream_usingDict(zbd, dict, dictSize); +} + +size_t ZBUFF_decompressInit(ZBUFF_DCtx* zbd) +{ + return ZSTD_initDStream(zbd); +} + + +/* *** Decompression *** */ + +size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbd, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr) +{ + ZSTD_outBuffer outBuff; + ZSTD_inBuffer inBuff; + size_t result; + outBuff.dst = dst; + outBuff.pos = 0; + outBuff.size = *dstCapacityPtr; + inBuff.src = src; + inBuff.pos = 0; + inBuff.size = *srcSizePtr; + result = ZSTD_decompressStream(zbd, &outBuff, &inBuff); + *dstCapacityPtr = outBuff.pos; + *srcSizePtr = inBuff.pos; + return result; +} + + +/* ************************************* +* Tool functions +***************************************/ +size_t ZBUFF_recommendedDInSize(void) { return ZSTD_DStreamInSize(); } +size_t ZBUFF_recommendedDOutSize(void) { return ZSTD_DStreamOutSize(); } diff --git a/deps/SZ/zstd/dictBuilder/cover.c b/deps/SZ/zstd/dictBuilder/cover.c new file mode 100644 index 0000000000000000000000000000000000000000..448f713720fa04fa8c0454759b5f9196b5727843 --- /dev/null +++ b/deps/SZ/zstd/dictBuilder/cover.c @@ -0,0 +1,1055 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* ***************************************************************************** + * Constructs a dictionary using a heuristic based on the following paper: + * + * Liao, Petri, Moffat, Wirth + * Effective Construction of Relative Lempel-Ziv Dictionaries + * Published in WWW 2016. + * + * Adapted from code originally written by @ot (Giuseppe Ottaviano). + ******************************************************************************/ + +/*-************************************* +* Dependencies +***************************************/ +#include /* fprintf */ +#include /* malloc, free, qsort */ +#include /* memset */ +#include /* clock */ + +#include "mem.h" /* read */ +#include "pool.h" +#include "threading.h" +#include "zstd_internal.h" /* includes zstd.h */ +#ifndef ZDICT_STATIC_LINKING_ONLY +#define ZDICT_STATIC_LINKING_ONLY +#endif +#include "zdict.h" + +/*-************************************* +* Constants +***************************************/ +#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB)) + +/*-************************************* +* Console display +***************************************/ +static int g_displayLevel = 2; +#define DISPLAY(...) \ + { \ + fprintf(stderr, __VA_ARGS__); \ + fflush(stderr); \ + } +#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \ + if (displayLevel >= l) { \ + DISPLAY(__VA_ARGS__); \ + } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ +#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__) + +#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \ + if (displayLevel >= l) { \ + if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \ + g_time = clock(); \ + DISPLAY(__VA_ARGS__); \ + } \ + } +#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__) +static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100; +static clock_t g_time = 0; + +/*-************************************* +* Hash table +*************************************** +* A small specialized hash map for storing activeDmers. +* The map does not resize, so if it becomes full it will loop forever. +* Thus, the map must be large enough to store every value. +* The map implements linear probing and keeps its load less than 0.5. +*/ + +#define MAP_EMPTY_VALUE ((U32)-1) +typedef struct COVER_map_pair_t_s { + U32 key; + U32 value; +} COVER_map_pair_t; + +typedef struct COVER_map_s { + COVER_map_pair_t *data; + U32 sizeLog; + U32 size; + U32 sizeMask; +} COVER_map_t; + +/** + * Clear the map. + */ +static void COVER_map_clear(COVER_map_t *map) { + memset(map->data, MAP_EMPTY_VALUE, map->size * sizeof(COVER_map_pair_t)); +} + +/** + * Initializes a map of the given size. + * Returns 1 on success and 0 on failure. + * The map must be destroyed with COVER_map_destroy(). + * The map is only guaranteed to be large enough to hold size elements. + */ +static int COVER_map_init(COVER_map_t *map, U32 size) { + map->sizeLog = ZSTD_highbit32(size) + 2; + map->size = (U32)1 << map->sizeLog; + map->sizeMask = map->size - 1; + map->data = (COVER_map_pair_t *)malloc(map->size * sizeof(COVER_map_pair_t)); + if (!map->data) { + map->sizeLog = 0; + map->size = 0; + return 0; + } + COVER_map_clear(map); + return 1; +} + +/** + * Internal hash function + */ +static const U32 prime4bytes = 2654435761U; +static U32 COVER_map_hash(COVER_map_t *map, U32 key) { + return (key * prime4bytes) >> (32 - map->sizeLog); +} + +/** + * Helper function that returns the index that a key should be placed into. + */ +static U32 COVER_map_index(COVER_map_t *map, U32 key) { + const U32 hash = COVER_map_hash(map, key); + U32 i; + for (i = hash;; i = (i + 1) & map->sizeMask) { + COVER_map_pair_t *pos = &map->data[i]; + if (pos->value == MAP_EMPTY_VALUE) { + return i; + } + if (pos->key == key) { + return i; + } + } +} + +/** + * Returns the pointer to the value for key. + * If key is not in the map, it is inserted and the value is set to 0. + * The map must not be full. + */ +static U32 *COVER_map_at(COVER_map_t *map, U32 key) { + COVER_map_pair_t *pos = &map->data[COVER_map_index(map, key)]; + if (pos->value == MAP_EMPTY_VALUE) { + pos->key = key; + pos->value = 0; + } + return &pos->value; +} + +/** + * Deletes key from the map if present. + */ +static void COVER_map_remove(COVER_map_t *map, U32 key) { + U32 i = COVER_map_index(map, key); + COVER_map_pair_t *del = &map->data[i]; + U32 shift = 1; + if (del->value == MAP_EMPTY_VALUE) { + return; + } + for (i = (i + 1) & map->sizeMask;; i = (i + 1) & map->sizeMask) { + COVER_map_pair_t *const pos = &map->data[i]; + /* If the position is empty we are done */ + if (pos->value == MAP_EMPTY_VALUE) { + del->value = MAP_EMPTY_VALUE; + return; + } + /* If pos can be moved to del do so */ + if (((i - COVER_map_hash(map, pos->key)) & map->sizeMask) >= shift) { + del->key = pos->key; + del->value = pos->value; + del = pos; + shift = 1; + } else { + ++shift; + } + } +} + +/** + * Destroyes a map that is inited with COVER_map_init(). + */ +static void COVER_map_destroy(COVER_map_t *map) { + if (map->data) { + free(map->data); + } + map->data = NULL; + map->size = 0; +} + +/*-************************************* +* Context +***************************************/ + +typedef struct { + const BYTE *samples; + size_t *offsets; + const size_t *samplesSizes; + size_t nbSamples; + U32 *suffix; + size_t suffixSize; + U32 *freqs; + U32 *dmerAt; + unsigned d; +} COVER_ctx_t; + +/* We need a global context for qsort... */ +static COVER_ctx_t *g_ctx = NULL; + +/*-************************************* +* Helper functions +***************************************/ + +/** + * Returns the sum of the sample sizes. + */ +static size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) { + size_t sum = 0; + size_t i; + for (i = 0; i < nbSamples; ++i) { + sum += samplesSizes[i]; + } + return sum; +} + +/** + * Returns -1 if the dmer at lp is less than the dmer at rp. + * Return 0 if the dmers at lp and rp are equal. + * Returns 1 if the dmer at lp is greater than the dmer at rp. + */ +static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) { + U32 const lhs = *(U32 const *)lp; + U32 const rhs = *(U32 const *)rp; + return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d); +} +/** + * Faster version for d <= 8. + */ +static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) { + U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1); + U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask; + U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask; + if (lhs < rhs) { + return -1; + } + return (lhs > rhs); +} + +/** + * Same as COVER_cmp() except ties are broken by pointer value + * NOTE: g_ctx must be set to call this function. A global is required because + * qsort doesn't take an opaque pointer. + */ +static int COVER_strict_cmp(const void *lp, const void *rp) { + int result = COVER_cmp(g_ctx, lp, rp); + if (result == 0) { + result = lp < rp ? -1 : 1; + } + return result; +} +/** + * Faster version for d <= 8. + */ +static int COVER_strict_cmp8(const void *lp, const void *rp) { + int result = COVER_cmp8(g_ctx, lp, rp); + if (result == 0) { + result = lp < rp ? -1 : 1; + } + return result; +} + +/** + * Returns the first pointer in [first, last) whose element does not compare + * less than value. If no such element exists it returns last. + */ +static const size_t *COVER_lower_bound(const size_t *first, const size_t *last, + size_t value) { + size_t count = last - first; + while (count != 0) { + size_t step = count / 2; + const size_t *ptr = first; + ptr += step; + if (*ptr < value) { + first = ++ptr; + count -= step + 1; + } else { + count = step; + } + } + return first; +} + +/** + * Generic groupBy function. + * Groups an array sorted by cmp into groups with equivalent values. + * Calls grp for each group. + */ +static void +COVER_groupBy(const void *data, size_t count, size_t size, COVER_ctx_t *ctx, + int (*cmp)(COVER_ctx_t *, const void *, const void *), + void (*grp)(COVER_ctx_t *, const void *, const void *)) { + const BYTE *ptr = (const BYTE *)data; + size_t num = 0; + while (num < count) { + const BYTE *grpEnd = ptr + size; + ++num; + while (num < count && cmp(ctx, ptr, grpEnd) == 0) { + grpEnd += size; + ++num; + } + grp(ctx, ptr, grpEnd); + ptr = grpEnd; + } +} + +/*-************************************* +* Cover functions +***************************************/ + +/** + * Called on each group of positions with the same dmer. + * Counts the frequency of each dmer and saves it in the suffix array. + * Fills `ctx->dmerAt`. + */ +static void COVER_group(COVER_ctx_t *ctx, const void *group, + const void *groupEnd) { + /* The group consists of all the positions with the same first d bytes. */ + const U32 *grpPtr = (const U32 *)group; + const U32 *grpEnd = (const U32 *)groupEnd; + /* The dmerId is how we will reference this dmer. + * This allows us to map the whole dmer space to a much smaller space, the + * size of the suffix array. + */ + const U32 dmerId = (U32)(grpPtr - ctx->suffix); + /* Count the number of samples this dmer shows up in */ + U32 freq = 0; + /* Details */ + const size_t *curOffsetPtr = ctx->offsets; + const size_t *offsetsEnd = ctx->offsets + ctx->nbSamples; + /* Once *grpPtr >= curSampleEnd this occurrence of the dmer is in a + * different sample than the last. + */ + size_t curSampleEnd = ctx->offsets[0]; + for (; grpPtr != grpEnd; ++grpPtr) { + /* Save the dmerId for this position so we can get back to it. */ + ctx->dmerAt[*grpPtr] = dmerId; + /* Dictionaries only help for the first reference to the dmer. + * After that zstd can reference the match from the previous reference. + * So only count each dmer once for each sample it is in. + */ + if (*grpPtr < curSampleEnd) { + continue; + } + freq += 1; + /* Binary search to find the end of the sample *grpPtr is in. + * In the common case that grpPtr + 1 == grpEnd we can skip the binary + * search because the loop is over. + */ + if (grpPtr + 1 != grpEnd) { + const size_t *sampleEndPtr = + COVER_lower_bound(curOffsetPtr, offsetsEnd, *grpPtr); + curSampleEnd = *sampleEndPtr; + curOffsetPtr = sampleEndPtr + 1; + } + } + /* At this point we are never going to look at this segment of the suffix + * array again. We take advantage of this fact to save memory. + * We store the frequency of the dmer in the first position of the group, + * which is dmerId. + */ + ctx->suffix[dmerId] = freq; +} + +/** + * A segment is a range in the source as well as the score of the segment. + */ +typedef struct { + U32 begin; + U32 end; + U32 score; +} COVER_segment_t; + +/** + * Selects the best segment in an epoch. + * Segments of are scored according to the function: + * + * Let F(d) be the frequency of dmer d. + * Let S_i be the dmer at position i of segment S which has length k. + * + * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1}) + * + * Once the dmer d is in the dictionay we set F(d) = 0. + */ +static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs, + COVER_map_t *activeDmers, U32 begin, + U32 end, + ZDICT_cover_params_t parameters) { + /* Constants */ + const U32 k = parameters.k; + const U32 d = parameters.d; + const U32 dmersInK = k - d + 1; + /* Try each segment (activeSegment) and save the best (bestSegment) */ + COVER_segment_t bestSegment = {0, 0, 0}; + COVER_segment_t activeSegment; + /* Reset the activeDmers in the segment */ + COVER_map_clear(activeDmers); + /* The activeSegment starts at the beginning of the epoch. */ + activeSegment.begin = begin; + activeSegment.end = begin; + activeSegment.score = 0; + /* Slide the activeSegment through the whole epoch. + * Save the best segment in bestSegment. + */ + while (activeSegment.end < end) { + /* The dmerId for the dmer at the next position */ + U32 newDmer = ctx->dmerAt[activeSegment.end]; + /* The entry in activeDmers for this dmerId */ + U32 *newDmerOcc = COVER_map_at(activeDmers, newDmer); + /* If the dmer isn't already present in the segment add its score. */ + if (*newDmerOcc == 0) { + /* The paper suggest using the L-0.5 norm, but experiments show that it + * doesn't help. + */ + activeSegment.score += freqs[newDmer]; + } + /* Add the dmer to the segment */ + activeSegment.end += 1; + *newDmerOcc += 1; + + /* If the window is now too large, drop the first position */ + if (activeSegment.end - activeSegment.begin == dmersInK + 1) { + U32 delDmer = ctx->dmerAt[activeSegment.begin]; + U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer); + activeSegment.begin += 1; + *delDmerOcc -= 1; + /* If this is the last occurence of the dmer, subtract its score */ + if (*delDmerOcc == 0) { + COVER_map_remove(activeDmers, delDmer); + activeSegment.score -= freqs[delDmer]; + } + } + + /* If this segment is the best so far save it */ + if (activeSegment.score > bestSegment.score) { + bestSegment = activeSegment; + } + } + { + /* Trim off the zero frequency head and tail from the segment. */ + U32 newBegin = bestSegment.end; + U32 newEnd = bestSegment.begin; + U32 pos; + for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { + U32 freq = freqs[ctx->dmerAt[pos]]; + if (freq != 0) { + newBegin = MIN(newBegin, pos); + newEnd = pos + 1; + } + } + bestSegment.begin = newBegin; + bestSegment.end = newEnd; + } + { + /* Zero out the frequency of each dmer covered by the chosen segment. */ + U32 pos; + for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { + freqs[ctx->dmerAt[pos]] = 0; + } + } + return bestSegment; +} + +/** + * Check the validity of the parameters. + * Returns non-zero if the parameters are valid and 0 otherwise. + */ +static int COVER_checkParameters(ZDICT_cover_params_t parameters, + size_t maxDictSize) { + /* k and d are required parameters */ + if (parameters.d == 0 || parameters.k == 0) { + return 0; + } + /* k <= maxDictSize */ + if (parameters.k > maxDictSize) { + return 0; + } + /* d <= k */ + if (parameters.d > parameters.k) { + return 0; + } + return 1; +} + +/** + * Clean up a context initialized with `COVER_ctx_init()`. + */ +static void COVER_ctx_destroy(COVER_ctx_t *ctx) { + if (!ctx) { + return; + } + if (ctx->suffix) { + free(ctx->suffix); + ctx->suffix = NULL; + } + if (ctx->freqs) { + free(ctx->freqs); + ctx->freqs = NULL; + } + if (ctx->dmerAt) { + free(ctx->dmerAt); + ctx->dmerAt = NULL; + } + if (ctx->offsets) { + free(ctx->offsets); + ctx->offsets = NULL; + } +} + +/** + * Prepare a context for dictionary building. + * The context is only dependent on the parameter `d` and can used multiple + * times. + * Returns 1 on success or zero on error. + * The context must be destroyed with `COVER_ctx_destroy()`. + */ +static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, + const size_t *samplesSizes, unsigned nbSamples, + unsigned d) { + const BYTE *const samples = (const BYTE *)samplesBuffer; + const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples); + /* Checks */ + if (totalSamplesSize < MAX(d, sizeof(U64)) || + totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) { + DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", + (U32)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20)); + return 0; + } + /* Zero the context */ + memset(ctx, 0, sizeof(*ctx)); + DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbSamples, + (U32)totalSamplesSize); + ctx->samples = samples; + ctx->samplesSizes = samplesSizes; + ctx->nbSamples = nbSamples; + /* Partial suffix array */ + ctx->suffixSize = totalSamplesSize - MAX(d, sizeof(U64)) + 1; + ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); + /* Maps index to the dmerID */ + ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); + /* The offsets of each file */ + ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t)); + if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) { + DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n"); + COVER_ctx_destroy(ctx); + return 0; + } + ctx->freqs = NULL; + ctx->d = d; + + /* Fill offsets from the samlesSizes */ + { + U32 i; + ctx->offsets[0] = 0; + for (i = 1; i <= nbSamples; ++i) { + ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1]; + } + } + DISPLAYLEVEL(2, "Constructing partial suffix array\n"); + { + /* suffix is a partial suffix array. + * It only sorts suffixes by their first parameters.d bytes. + * The sort is stable, so each dmer group is sorted by position in input. + */ + U32 i; + for (i = 0; i < ctx->suffixSize; ++i) { + ctx->suffix[i] = i; + } + /* qsort doesn't take an opaque pointer, so pass as a global. + * On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is. + */ + g_ctx = ctx; +#if defined(__OpenBSD__) + mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32), + (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp)); +#else + qsort(ctx->suffix, ctx->suffixSize, sizeof(U32), + (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp)); +#endif + } + DISPLAYLEVEL(2, "Computing frequencies\n"); + /* For each dmer group (group of positions with the same first d bytes): + * 1. For each position we set dmerAt[position] = dmerID. The dmerID is + * (groupBeginPtr - suffix). This allows us to go from position to + * dmerID so we can look up values in freq. + * 2. We calculate how many samples the dmer occurs in and save it in + * freqs[dmerId]. + */ + COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx, + (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group); + ctx->freqs = ctx->suffix; + ctx->suffix = NULL; + return 1; +} + +/** + * Given the prepared context build the dictionary. + */ +static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs, + COVER_map_t *activeDmers, void *dictBuffer, + size_t dictBufferCapacity, + ZDICT_cover_params_t parameters) { + BYTE *const dict = (BYTE *)dictBuffer; + size_t tail = dictBufferCapacity; + /* Divide the data up into epochs of equal size. + * We will select at least one segment from each epoch. + */ + const U32 epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k / 4)); + const U32 epochSize = (U32)(ctx->suffixSize / epochs); + size_t epoch; + DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs, + epochSize); + /* Loop through the epochs until there are no more segments or the dictionary + * is full. + */ + for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) { + const U32 epochBegin = (U32)(epoch * epochSize); + const U32 epochEnd = epochBegin + epochSize; + size_t segmentSize; + /* Select a segment */ + COVER_segment_t segment = COVER_selectSegment( + ctx, freqs, activeDmers, epochBegin, epochEnd, parameters); + /* If the segment covers no dmers, then we are out of content */ + if (segment.score == 0) { + break; + } + /* Trim the segment if necessary and if it is too small then we are done */ + segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); + if (segmentSize < parameters.d) { + break; + } + /* We fill the dictionary from the back to allow the best segments to be + * referenced with the smallest offsets. + */ + tail -= segmentSize; + memcpy(dict + tail, ctx->samples + segment.begin, segmentSize); + DISPLAYUPDATE( + 2, "\r%u%% ", + (U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity)); + } + DISPLAYLEVEL(2, "\r%79s\r", ""); + return tail; +} + +ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( + void *dictBuffer, size_t dictBufferCapacity, + const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t parameters) +{ + BYTE* const dict = (BYTE*)dictBuffer; + COVER_ctx_t ctx; + COVER_map_t activeDmers; + + /* Initialize global data */ + g_displayLevel = parameters.zParams.notificationLevel; + /* Checks */ + if (!COVER_checkParameters(parameters, dictBufferCapacity)) { + DISPLAYLEVEL(1, "Cover parameters incorrect\n"); + return ERROR(GENERIC); + } + if (nbSamples == 0) { + DISPLAYLEVEL(1, "Cover must have at least one input file\n"); + return ERROR(GENERIC); + } + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { + DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", + ZDICT_DICTSIZE_MIN); + return ERROR(dstSize_tooSmall); + } + /* Initialize context and activeDmers */ + if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, + parameters.d)) { + return ERROR(GENERIC); + } + if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { + DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); + COVER_ctx_destroy(&ctx); + return ERROR(GENERIC); + } + + DISPLAYLEVEL(2, "Building dictionary\n"); + { + const size_t tail = + COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer, + dictBufferCapacity, parameters); + const size_t dictionarySize = ZDICT_finalizeDictionary( + dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, + samplesBuffer, samplesSizes, nbSamples, parameters.zParams); + if (!ZSTD_isError(dictionarySize)) { + DISPLAYLEVEL(2, "Constructed dictionary of size %u\n", + (U32)dictionarySize); + } + COVER_ctx_destroy(&ctx); + COVER_map_destroy(&activeDmers); + return dictionarySize; + } +} + +/** + * COVER_best_t is used for two purposes: + * 1. Synchronizing threads. + * 2. Saving the best parameters and dictionary. + * + * All of the methods except COVER_best_init() are thread safe if zstd is + * compiled with multithreaded support. + */ +typedef struct COVER_best_s { + ZSTD_pthread_mutex_t mutex; + ZSTD_pthread_cond_t cond; + size_t liveJobs; + void *dict; + size_t dictSize; + ZDICT_cover_params_t parameters; + size_t compressedSize; +} COVER_best_t; + +/** + * Initialize the `COVER_best_t`. + */ +static void COVER_best_init(COVER_best_t *best) { + if (best==NULL) return; /* compatible with init on NULL */ + (void)ZSTD_pthread_mutex_init(&best->mutex, NULL); + (void)ZSTD_pthread_cond_init(&best->cond, NULL); + best->liveJobs = 0; + best->dict = NULL; + best->dictSize = 0; + best->compressedSize = (size_t)-1; + memset(&best->parameters, 0, sizeof(best->parameters)); +} + +/** + * Wait until liveJobs == 0. + */ +static void COVER_best_wait(COVER_best_t *best) { + if (!best) { + return; + } + ZSTD_pthread_mutex_lock(&best->mutex); + while (best->liveJobs != 0) { + ZSTD_pthread_cond_wait(&best->cond, &best->mutex); + } + ZSTD_pthread_mutex_unlock(&best->mutex); +} + +/** + * Call COVER_best_wait() and then destroy the COVER_best_t. + */ +static void COVER_best_destroy(COVER_best_t *best) { + if (!best) { + return; + } + COVER_best_wait(best); + if (best->dict) { + free(best->dict); + } + ZSTD_pthread_mutex_destroy(&best->mutex); + ZSTD_pthread_cond_destroy(&best->cond); +} + +/** + * Called when a thread is about to be launched. + * Increments liveJobs. + */ +static void COVER_best_start(COVER_best_t *best) { + if (!best) { + return; + } + ZSTD_pthread_mutex_lock(&best->mutex); + ++best->liveJobs; + ZSTD_pthread_mutex_unlock(&best->mutex); +} + +/** + * Called when a thread finishes executing, both on error or success. + * Decrements liveJobs and signals any waiting threads if liveJobs == 0. + * If this dictionary is the best so far save it and its parameters. + */ +static void COVER_best_finish(COVER_best_t *best, size_t compressedSize, + ZDICT_cover_params_t parameters, void *dict, + size_t dictSize) { + if (!best) { + return; + } + { + size_t liveJobs; + ZSTD_pthread_mutex_lock(&best->mutex); + --best->liveJobs; + liveJobs = best->liveJobs; + /* If the new dictionary is better */ + if (compressedSize < best->compressedSize) { + /* Allocate space if necessary */ + if (!best->dict || best->dictSize < dictSize) { + if (best->dict) { + free(best->dict); + } + best->dict = malloc(dictSize); + if (!best->dict) { + best->compressedSize = ERROR(GENERIC); + best->dictSize = 0; + return; + } + } + /* Save the dictionary, parameters, and size */ + memcpy(best->dict, dict, dictSize); + best->dictSize = dictSize; + best->parameters = parameters; + best->compressedSize = compressedSize; + } + ZSTD_pthread_mutex_unlock(&best->mutex); + if (liveJobs == 0) { + ZSTD_pthread_cond_broadcast(&best->cond); + } + } +} + +/** + * Parameters for COVER_tryParameters(). + */ +typedef struct COVER_tryParameters_data_s { + const COVER_ctx_t *ctx; + COVER_best_t *best; + size_t dictBufferCapacity; + ZDICT_cover_params_t parameters; +} COVER_tryParameters_data_t; + +/** + * Tries a set of parameters and upates the COVER_best_t with the results. + * This function is thread safe if zstd is compiled with multithreaded support. + * It takes its parameters as an *OWNING* opaque pointer to support threading. + */ +static void COVER_tryParameters(void *opaque) { + /* Save parameters as local variables */ + COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque; + const COVER_ctx_t *const ctx = data->ctx; + const ZDICT_cover_params_t parameters = data->parameters; + size_t dictBufferCapacity = data->dictBufferCapacity; + size_t totalCompressedSize = ERROR(GENERIC); + /* Allocate space for hash table, dict, and freqs */ + COVER_map_t activeDmers; + BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity); + U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); + if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { + DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); + goto _cleanup; + } + if (!dict || !freqs) { + DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n"); + goto _cleanup; + } + /* Copy the frequencies because we need to modify them */ + memcpy(freqs, ctx->freqs, ctx->suffixSize * sizeof(U32)); + /* Build the dictionary */ + { + const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict, + dictBufferCapacity, parameters); + dictBufferCapacity = ZDICT_finalizeDictionary( + dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, + ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples, + parameters.zParams); + if (ZDICT_isError(dictBufferCapacity)) { + DISPLAYLEVEL(1, "Failed to finalize dictionary\n"); + goto _cleanup; + } + } + /* Check total compressed size */ + { + /* Pointers */ + ZSTD_CCtx *cctx; + ZSTD_CDict *cdict; + void *dst; + /* Local variables */ + size_t dstCapacity; + size_t i; + /* Allocate dst with enough space to compress the maximum sized sample */ + { + size_t maxSampleSize = 0; + for (i = 0; i < ctx->nbSamples; ++i) { + maxSampleSize = MAX(ctx->samplesSizes[i], maxSampleSize); + } + dstCapacity = ZSTD_compressBound(maxSampleSize); + dst = malloc(dstCapacity); + } + /* Create the cctx and cdict */ + cctx = ZSTD_createCCtx(); + cdict = ZSTD_createCDict(dict, dictBufferCapacity, + parameters.zParams.compressionLevel); + if (!dst || !cctx || !cdict) { + goto _compressCleanup; + } + /* Compress each sample and sum their sizes (or error) */ + totalCompressedSize = dictBufferCapacity; + for (i = 0; i < ctx->nbSamples; ++i) { + const size_t size = ZSTD_compress_usingCDict( + cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i], + ctx->samplesSizes[i], cdict); + if (ZSTD_isError(size)) { + totalCompressedSize = ERROR(GENERIC); + goto _compressCleanup; + } + totalCompressedSize += size; + } + _compressCleanup: + ZSTD_freeCCtx(cctx); + ZSTD_freeCDict(cdict); + if (dst) { + free(dst); + } + } + +_cleanup: + COVER_best_finish(data->best, totalCompressedSize, parameters, dict, + dictBufferCapacity); + free(data); + COVER_map_destroy(&activeDmers); + if (dict) { + free(dict); + } + if (freqs) { + free(freqs); + } +} + +ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( + void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, + const size_t *samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t *parameters) { + /* constants */ + const unsigned nbThreads = parameters->nbThreads; + const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d; + const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d; + const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k; + const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k; + const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps; + const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1); + const unsigned kIterations = + (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); + /* Local variables */ + const int displayLevel = parameters->zParams.notificationLevel; + unsigned iteration = 1; + unsigned d; + unsigned k; + COVER_best_t best; + POOL_ctx *pool = NULL; + + /* Checks */ + if (kMinK < kMaxD || kMaxK < kMinK) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); + return ERROR(GENERIC); + } + if (nbSamples == 0) { + DISPLAYLEVEL(1, "Cover must have at least one input file\n"); + return ERROR(GENERIC); + } + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { + DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", + ZDICT_DICTSIZE_MIN); + return ERROR(dstSize_tooSmall); + } + if (nbThreads > 1) { + pool = POOL_create(nbThreads, 1); + if (!pool) { + return ERROR(memory_allocation); + } + } + /* Initialization */ + COVER_best_init(&best); + /* Turn down global display level to clean up display at level 2 and below */ + g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1; + /* Loop through d first because each new value needs a new context */ + LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n", + kIterations); + for (d = kMinD; d <= kMaxD; d += 2) { + /* Initialize the context for this value of d */ + COVER_ctx_t ctx; + LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); + if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d)) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); + COVER_best_destroy(&best); + POOL_free(pool); + return ERROR(GENERIC); + } + /* Loop through k reusing the same context */ + for (k = kMinK; k <= kMaxK; k += kStepSize) { + /* Prepare the arguments */ + COVER_tryParameters_data_t *data = (COVER_tryParameters_data_t *)malloc( + sizeof(COVER_tryParameters_data_t)); + LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k); + if (!data) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n"); + COVER_best_destroy(&best); + COVER_ctx_destroy(&ctx); + POOL_free(pool); + return ERROR(GENERIC); + } + data->ctx = &ctx; + data->best = &best; + data->dictBufferCapacity = dictBufferCapacity; + data->parameters = *parameters; + data->parameters.k = k; + data->parameters.d = d; + data->parameters.steps = kSteps; + data->parameters.zParams.notificationLevel = g_displayLevel; + /* Check the parameters */ + if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) { + DISPLAYLEVEL(1, "Cover parameters incorrect\n"); + free(data); + continue; + } + /* Call the function and pass ownership of data to it */ + COVER_best_start(&best); + if (pool) { + POOL_add(pool, &COVER_tryParameters, data); + } else { + COVER_tryParameters(data); + } + /* Print status */ + LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ", + (U32)((iteration * 100) / kIterations)); + ++iteration; + } + COVER_best_wait(&best); + COVER_ctx_destroy(&ctx); + } + LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", ""); + /* Fill the output buffer and parameters with output of the best parameters */ + { + const size_t dictSize = best.dictSize; + if (ZSTD_isError(best.compressedSize)) { + const size_t compressedSize = best.compressedSize; + COVER_best_destroy(&best); + POOL_free(pool); + return compressedSize; + } + *parameters = best.parameters; + memcpy(dictBuffer, best.dict, dictSize); + COVER_best_destroy(&best); + POOL_free(pool); + return dictSize; + } +} diff --git a/deps/SZ/zstd/dictBuilder/divsufsort.c b/deps/SZ/zstd/dictBuilder/divsufsort.c new file mode 100644 index 0000000000000000000000000000000000000000..60cceb088321ce5b62c297a6e714bafff5ae8e75 --- /dev/null +++ b/deps/SZ/zstd/dictBuilder/divsufsort.c @@ -0,0 +1,1913 @@ +/* + * divsufsort.c for libdivsufsort-lite + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/*- Compiler specifics -*/ +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wshorten-64-to-32" +#endif + +#if defined(_MSC_VER) +# pragma warning(disable : 4244) +# pragma warning(disable : 4127) /* C4127 : Condition expression is constant */ +#endif + + +/*- Dependencies -*/ +#include +#include +#include + +#include "divsufsort.h" + +/*- Constants -*/ +#if defined(INLINE) +# undef INLINE +#endif +#if !defined(INLINE) +# define INLINE __inline +#endif +#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1) +# undef ALPHABET_SIZE +#endif +#if !defined(ALPHABET_SIZE) +# define ALPHABET_SIZE (256) +#endif +#define BUCKET_A_SIZE (ALPHABET_SIZE) +#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE) +#if defined(SS_INSERTIONSORT_THRESHOLD) +# if SS_INSERTIONSORT_THRESHOLD < 1 +# undef SS_INSERTIONSORT_THRESHOLD +# define SS_INSERTIONSORT_THRESHOLD (1) +# endif +#else +# define SS_INSERTIONSORT_THRESHOLD (8) +#endif +#if defined(SS_BLOCKSIZE) +# if SS_BLOCKSIZE < 0 +# undef SS_BLOCKSIZE +# define SS_BLOCKSIZE (0) +# elif 32768 <= SS_BLOCKSIZE +# undef SS_BLOCKSIZE +# define SS_BLOCKSIZE (32767) +# endif +#else +# define SS_BLOCKSIZE (1024) +#endif +/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */ +#if SS_BLOCKSIZE == 0 +# define SS_MISORT_STACKSIZE (96) +#elif SS_BLOCKSIZE <= 4096 +# define SS_MISORT_STACKSIZE (16) +#else +# define SS_MISORT_STACKSIZE (24) +#endif +#define SS_SMERGE_STACKSIZE (32) +#define TR_INSERTIONSORT_THRESHOLD (8) +#define TR_STACKSIZE (64) + + +/*- Macros -*/ +#ifndef SWAP +# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0) +#endif /* SWAP */ +#ifndef MIN +# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b)) +#endif /* MIN */ +#ifndef MAX +# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b)) +#endif /* MAX */ +#define STACK_PUSH(_a, _b, _c, _d)\ + do {\ + assert(ssize < STACK_SIZE);\ + stack[ssize].a = (_a), stack[ssize].b = (_b),\ + stack[ssize].c = (_c), stack[ssize++].d = (_d);\ + } while(0) +#define STACK_PUSH5(_a, _b, _c, _d, _e)\ + do {\ + assert(ssize < STACK_SIZE);\ + stack[ssize].a = (_a), stack[ssize].b = (_b),\ + stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\ + } while(0) +#define STACK_POP(_a, _b, _c, _d)\ + do {\ + assert(0 <= ssize);\ + if(ssize == 0) { return; }\ + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ + (_c) = stack[ssize].c, (_d) = stack[ssize].d;\ + } while(0) +#define STACK_POP5(_a, _b, _c, _d, _e)\ + do {\ + assert(0 <= ssize);\ + if(ssize == 0) { return; }\ + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ + (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\ + } while(0) +#define BUCKET_A(_c0) bucket_A[(_c0)] +#if ALPHABET_SIZE == 256 +#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)]) +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)]) +#else +#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)]) +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)]) +#endif + + +/*- Private Functions -*/ + +static const int lg_table[256]= { + -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 +}; + +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) + +static INLINE +int +ss_ilg(int n) { +#if SS_BLOCKSIZE == 0 + return (n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]); +#elif SS_BLOCKSIZE < 256 + return lg_table[n]; +#else + return (n & 0xff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]; +#endif +} + +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ + +#if SS_BLOCKSIZE != 0 + +static const int sqq_table[256] = { + 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, + 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, + 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, +110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, +128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, +143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, +156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, +169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, +181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, +192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, +202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, +212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, +221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, +230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, +239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, +247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 +}; + +static INLINE +int +ss_isqrt(int x) { + int y, e; + + if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } + e = (x & 0xffff0000) ? + ((x & 0xff000000) ? + 24 + lg_table[(x >> 24) & 0xff] : + 16 + lg_table[(x >> 16) & 0xff]) : + ((x & 0x0000ff00) ? + 8 + lg_table[(x >> 8) & 0xff] : + 0 + lg_table[(x >> 0) & 0xff]); + + if(e >= 16) { + y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7); + if(e >= 24) { y = (y + 1 + x / y) >> 1; } + y = (y + 1 + x / y) >> 1; + } else if(e >= 8) { + y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1; + } else { + return sqq_table[x] >> 4; + } + + return (x < (y * y)) ? y - 1 : y; +} + +#endif /* SS_BLOCKSIZE != 0 */ + + +/*---------------------------------------------------------------------------*/ + +/* Compares two suffixes. */ +static INLINE +int +ss_compare(const unsigned char *T, + const int *p1, const int *p2, + int depth) { + const unsigned char *U1, *U2, *U1n, *U2n; + + for(U1 = T + depth + *p1, + U2 = T + depth + *p2, + U1n = T + *(p1 + 1) + 2, + U2n = T + *(p2 + 1) + 2; + (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); + ++U1, ++U2) { + } + + return U1 < U1n ? + (U2 < U2n ? *U1 - *U2 : 1) : + (U2 < U2n ? -1 : 0); +} + + +/*---------------------------------------------------------------------------*/ + +#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) + +/* Insertionsort for small size groups */ +static +void +ss_insertionsort(const unsigned char *T, const int *PA, + int *first, int *last, int depth) { + int *i, *j; + int t; + int r; + + for(i = last - 2; first <= i; --i) { + for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) { + do { *(j - 1) = *j; } while((++j < last) && (*j < 0)); + if(last <= j) { break; } + } + if(r == 0) { *j = ~*j; } + *(j - 1) = t; + } +} + +#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */ + + +/*---------------------------------------------------------------------------*/ + +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) + +static INLINE +void +ss_fixdown(const unsigned char *Td, const int *PA, + int *SA, int i, int size) { + int j, k; + int v; + int c, d, e; + + for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { + d = Td[PA[SA[k = j++]]]; + if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; } + if(d <= c) { break; } + } + SA[i] = v; +} + +/* Simple top-down heapsort. */ +static +void +ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) { + int i, m; + int t; + + m = size; + if((size % 2) == 0) { + m--; + if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); } + } + + for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); } + if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); } + for(i = m - 1; 0 < i; --i) { + t = SA[0], SA[0] = SA[i]; + ss_fixdown(Td, PA, SA, 0, i); + SA[i] = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Returns the median of three elements. */ +static INLINE +int * +ss_median3(const unsigned char *Td, const int *PA, + int *v1, int *v2, int *v3) { + int *t; + if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); } + if(Td[PA[*v2]] > Td[PA[*v3]]) { + if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } + else { return v3; } + } + return v2; +} + +/* Returns the median of five elements. */ +static INLINE +int * +ss_median5(const unsigned char *Td, const int *PA, + int *v1, int *v2, int *v3, int *v4, int *v5) { + int *t; + if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } + if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } + if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } + if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); } + if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); } + if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; } + return v3; +} + +/* Returns the pivot element. */ +static INLINE +int * +ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) { + int *middle; + int t; + + t = last - first; + middle = first + t / 2; + + if(t <= 512) { + if(t <= 32) { + return ss_median3(Td, PA, first, middle, last - 1); + } else { + t >>= 2; + return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = ss_median3(Td, PA, first, first + t, first + (t << 1)); + middle = ss_median3(Td, PA, middle - t, middle, middle + t); + last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); + return ss_median3(Td, PA, first, middle, last); +} + + +/*---------------------------------------------------------------------------*/ + +/* Binary partition for substrings. */ +static INLINE +int * +ss_partition(const int *PA, + int *first, int *last, int depth) { + int *a, *b; + int t; + for(a = first - 1, b = last;;) { + for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } + for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { } + if(b <= a) { break; } + t = ~*b; + *b = *a; + *a = t; + } + if(first < a) { *first = ~*first; } + return a; +} + +/* Multikey introsort for medium size groups. */ +static +void +ss_mintrosort(const unsigned char *T, const int *PA, + int *first, int *last, + int depth) { +#define STACK_SIZE SS_MISORT_STACKSIZE + struct { int *a, *b, c; int d; } stack[STACK_SIZE]; + const unsigned char *Td; + int *a, *b, *c, *d, *e, *f; + int s, t; + int ssize; + int limit; + int v, x = 0; + + for(ssize = 0, limit = ss_ilg(last - first);;) { + + if((last - first) <= SS_INSERTIONSORT_THRESHOLD) { +#if 1 < SS_INSERTIONSORT_THRESHOLD + if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); } +#endif + STACK_POP(first, last, depth, limit); + continue; + } + + Td = T + depth; + if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); } + if(limit < 0) { + for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) { + if((x = Td[PA[*a]]) != v) { + if(1 < (a - first)) { break; } + v = x; + first = a; + } + } + if(Td[PA[*first] - 1] < v) { + first = ss_partition(PA, first, a, depth); + } + if((a - first) <= (last - a)) { + if(1 < (a - first)) { + STACK_PUSH(a, last, depth, -1); + last = a, depth += 1, limit = ss_ilg(a - first); + } else { + first = a, limit = -1; + } + } else { + if(1 < (last - a)) { + STACK_PUSH(first, a, depth + 1, ss_ilg(a - first)); + first = a, limit = -1; + } else { + last = a, depth += 1, limit = ss_ilg(a - first); + } + } + continue; + } + + /* choose pivot */ + a = ss_pivot(Td, PA, first, last); + v = Td[PA[*a]]; + SWAP(*first, *a); + + /* partition */ + for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { } + if(((a = b) < last) && (x < v)) { + for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + } + for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { } + if((b < (d = c)) && (x > v)) { + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + for(; b < c;) { + SWAP(*b, *c); + for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + + if(a <= d) { + c = b - 1; + + if((s = a - first) > (t = b - a)) { s = t; } + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + if((s = d - c) > (t = last - d - 1)) { s = t; } + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + + a = first + (b - a), c = last - (d - c); + b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); + + if((a - first) <= (last - c)) { + if((last - c) <= (c - b)) { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(c, last, depth, limit); + last = a; + } else if((a - first) <= (c - b)) { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + last = a; + } else { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(first, a, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } + } else { + if((a - first) <= (c - b)) { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(first, a, depth, limit); + first = c; + } else if((last - c) <= (c - b)) { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + first = c; + } else { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(c, last, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } + } + } else { + limit += 1; + if(Td[PA[*first] - 1] < v) { + first = ss_partition(PA, first, last, depth); + limit = ss_ilg(last - first); + } + depth += 1; + } + } +#undef STACK_SIZE +} + +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ + + +/*---------------------------------------------------------------------------*/ + +#if SS_BLOCKSIZE != 0 + +static INLINE +void +ss_blockswap(int *a, int *b, int n) { + int t; + for(; 0 < n; --n, ++a, ++b) { + t = *a, *a = *b, *b = t; + } +} + +static INLINE +void +ss_rotate(int *first, int *middle, int *last) { + int *a, *b, t; + int l, r; + l = middle - first, r = last - middle; + for(; (0 < l) && (0 < r);) { + if(l == r) { ss_blockswap(first, middle, l); break; } + if(l < r) { + a = last - 1, b = middle - 1; + t = *a; + do { + *a-- = *b, *b-- = *a; + if(b < first) { + *a = t; + last = a; + if((r -= l + 1) <= l) { break; } + a -= 1, b = middle - 1; + t = *a; + } + } while(1); + } else { + a = first, b = middle; + t = *a; + do { + *a++ = *b, *b++ = *a; + if(last <= b) { + *a = t; + first = a + 1; + if((l -= r + 1) <= r) { break; } + a += 1, b = middle; + t = *a; + } + } while(1); + } + } +} + + +/*---------------------------------------------------------------------------*/ + +static +void +ss_inplacemerge(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int depth) { + const int *p; + int *a, *b; + int len, half; + int q, r; + int x; + + for(;;) { + if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); } + else { x = 0; p = PA + *(last - 1); } + for(a = first, len = middle - first, half = len >> 1, r = -1; + 0 < len; + len = half, half >>= 1) { + b = a + half; + q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth); + if(q < 0) { + a = b + 1; + half -= (len & 1) ^ 1; + } else { + r = q; + } + } + if(a < middle) { + if(r == 0) { *a = ~*a; } + ss_rotate(a, middle, last); + last -= middle - a; + middle = a; + if(first == middle) { break; } + } + --last; + if(x != 0) { while(*--last < 0) { } } + if(middle == last) { break; } + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Merge-forward with internal buffer. */ +static +void +ss_mergeforward(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int depth) { + int *a, *b, *c, *bufend; + int t; + int r; + + bufend = buf + (middle - first) - 1; + ss_blockswap(buf, first, middle - first); + + for(t = *(a = first), b = buf, c = middle;;) { + r = ss_compare(T, PA + *b, PA + *c, depth); + if(r < 0) { + do { + *a++ = *b; + if(bufend <= b) { *bufend = t; return; } + *b++ = *a; + } while(*b < 0); + } else if(r > 0) { + do { + *a++ = *c, *c++ = *a; + if(last <= c) { + while(b < bufend) { *a++ = *b, *b++ = *a; } + *a = *b, *b = t; + return; + } + } while(*c < 0); + } else { + *c = ~*c; + do { + *a++ = *b; + if(bufend <= b) { *bufend = t; return; } + *b++ = *a; + } while(*b < 0); + + do { + *a++ = *c, *c++ = *a; + if(last <= c) { + while(b < bufend) { *a++ = *b, *b++ = *a; } + *a = *b, *b = t; + return; + } + } while(*c < 0); + } + } +} + +/* Merge-backward with internal buffer. */ +static +void +ss_mergebackward(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int depth) { + const int *p1, *p2; + int *a, *b, *c, *bufend; + int t; + int r; + int x; + + bufend = buf + (last - middle) - 1; + ss_blockswap(buf, middle, last - middle); + + x = 0; + if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; } + else { p1 = PA + *bufend; } + if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; } + else { p2 = PA + *(middle - 1); } + for(t = *(a = last - 1), b = bufend, c = middle - 1;;) { + r = ss_compare(T, p1, p2, depth); + if(0 < r) { + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } + *a-- = *b; + if(b <= buf) { *buf = t; break; } + *b-- = *a; + if(*b < 0) { p1 = PA + ~*b; x |= 1; } + else { p1 = PA + *b; } + } else if(r < 0) { + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } + *a-- = *c, *c-- = *a; + if(c < first) { + while(buf < b) { *a-- = *b, *b-- = *a; } + *a = *b, *b = t; + break; + } + if(*c < 0) { p2 = PA + ~*c; x |= 2; } + else { p2 = PA + *c; } + } else { + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } + *a-- = ~*b; + if(b <= buf) { *buf = t; break; } + *b-- = *a; + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } + *a-- = *c, *c-- = *a; + if(c < first) { + while(buf < b) { *a-- = *b, *b-- = *a; } + *a = *b, *b = t; + break; + } + if(*b < 0) { p1 = PA + ~*b; x |= 1; } + else { p1 = PA + *b; } + if(*c < 0) { p2 = PA + ~*c; x |= 2; } + else { p2 = PA + *c; } + } + } +} + +/* D&C based merge. */ +static +void +ss_swapmerge(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int bufsize, int depth) { +#define STACK_SIZE SS_SMERGE_STACKSIZE +#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a))) +#define MERGE_CHECK(a, b, c)\ + do {\ + if(((c) & 1) ||\ + (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\ + *(a) = ~*(a);\ + }\ + if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\ + *(b) = ~*(b);\ + }\ + } while(0) + struct { int *a, *b, *c; int d; } stack[STACK_SIZE]; + int *l, *r, *lm, *rm; + int m, len, half; + int ssize; + int check, next; + + for(check = 0, ssize = 0;;) { + if((last - middle) <= bufsize) { + if((first < middle) && (middle < last)) { + ss_mergebackward(T, PA, first, middle, last, buf, depth); + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + continue; + } + + if((middle - first) <= bufsize) { + if(first < middle) { + ss_mergeforward(T, PA, first, middle, last, buf, depth); + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + continue; + } + + for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1; + 0 < len; + len = half, half >>= 1) { + if(ss_compare(T, PA + GETIDX(*(middle + m + half)), + PA + GETIDX(*(middle - m - half - 1)), depth) < 0) { + m += half + 1; + half -= (len & 1) ^ 1; + } + } + + if(0 < m) { + lm = middle - m, rm = middle + m; + ss_blockswap(lm, middle, m); + l = r = middle, next = 0; + if(rm < last) { + if(*rm < 0) { + *rm = ~*rm; + if(first < lm) { for(; *--l < 0;) { } next |= 4; } + next |= 1; + } else if(first < lm) { + for(; *r < 0; ++r) { } + next |= 2; + } + } + + if((l - first) <= (last - r)) { + STACK_PUSH(r, rm, last, (next & 3) | (check & 4)); + middle = lm, last = l, check = (check & 3) | (next & 4); + } else { + if((next & 2) && (r == middle)) { next ^= 6; } + STACK_PUSH(first, lm, l, (check & 3) | (next & 4)); + first = r, middle = rm, check = (next & 3) | (check & 4); + } + } else { + if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) { + *middle = ~*middle; + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + } + } +#undef STACK_SIZE +} + +#endif /* SS_BLOCKSIZE != 0 */ + + +/*---------------------------------------------------------------------------*/ + +/* Substring sort */ +static +void +sssort(const unsigned char *T, const int *PA, + int *first, int *last, + int *buf, int bufsize, + int depth, int n, int lastsuffix) { + int *a; +#if SS_BLOCKSIZE != 0 + int *b, *middle, *curbuf; + int j, k, curbufsize, limit; +#endif + int i; + + if(lastsuffix != 0) { ++first; } + +#if SS_BLOCKSIZE == 0 + ss_mintrosort(T, PA, first, last, depth); +#else + if((bufsize < SS_BLOCKSIZE) && + (bufsize < (last - first)) && + (bufsize < (limit = ss_isqrt(last - first)))) { + if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; } + buf = middle = last - limit, bufsize = limit; + } else { + middle = last, limit = 0; + } + for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) { +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth); +#endif + curbufsize = last - (a + SS_BLOCKSIZE); + curbuf = a + SS_BLOCKSIZE; + if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; } + for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) { + ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth); + } + } +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, a, middle, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, a, middle, depth); +#endif + for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) { + if(i & 1) { + ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth); + a -= k; + } + } + if(limit != 0) { +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, middle, last, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, middle, last, depth); +#endif + ss_inplacemerge(T, PA, first, middle, last, depth); + } +#endif + + if(lastsuffix != 0) { + /* Insert last type B* suffix. */ + int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; + for(a = first, i = *(first - 1); + (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); + ++a) { + *(a - 1) = *a; + } + *(a - 1) = i; + } +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +int +tr_ilg(int n) { + return (n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]); +} + + +/*---------------------------------------------------------------------------*/ + +/* Simple insertionsort for small size groups. */ +static +void +tr_insertionsort(const int *ISAd, int *first, int *last) { + int *a, *b; + int t, r; + + for(a = first + 1; a < last; ++a) { + for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) { + do { *(b + 1) = *b; } while((first <= --b) && (*b < 0)); + if(b < first) { break; } + } + if(r == 0) { *b = ~*b; } + *(b + 1) = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +void +tr_fixdown(const int *ISAd, int *SA, int i, int size) { + int j, k; + int v; + int c, d, e; + + for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { + d = ISAd[SA[k = j++]]; + if(d < (e = ISAd[SA[j]])) { k = j; d = e; } + if(d <= c) { break; } + } + SA[i] = v; +} + +/* Simple top-down heapsort. */ +static +void +tr_heapsort(const int *ISAd, int *SA, int size) { + int i, m; + int t; + + m = size; + if((size % 2) == 0) { + m--; + if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); } + } + + for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); } + if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); } + for(i = m - 1; 0 < i; --i) { + t = SA[0], SA[0] = SA[i]; + tr_fixdown(ISAd, SA, 0, i); + SA[i] = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Returns the median of three elements. */ +static INLINE +int * +tr_median3(const int *ISAd, int *v1, int *v2, int *v3) { + int *t; + if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); } + if(ISAd[*v2] > ISAd[*v3]) { + if(ISAd[*v1] > ISAd[*v3]) { return v1; } + else { return v3; } + } + return v2; +} + +/* Returns the median of five elements. */ +static INLINE +int * +tr_median5(const int *ISAd, + int *v1, int *v2, int *v3, int *v4, int *v5) { + int *t; + if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); } + if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); } + if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); } + if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); } + if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); } + if(ISAd[*v3] > ISAd[*v4]) { return v4; } + return v3; +} + +/* Returns the pivot element. */ +static INLINE +int * +tr_pivot(const int *ISAd, int *first, int *last) { + int *middle; + int t; + + t = last - first; + middle = first + t / 2; + + if(t <= 512) { + if(t <= 32) { + return tr_median3(ISAd, first, middle, last - 1); + } else { + t >>= 2; + return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = tr_median3(ISAd, first, first + t, first + (t << 1)); + middle = tr_median3(ISAd, middle - t, middle, middle + t); + last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1); + return tr_median3(ISAd, first, middle, last); +} + + +/*---------------------------------------------------------------------------*/ + +typedef struct _trbudget_t trbudget_t; +struct _trbudget_t { + int chance; + int remain; + int incval; + int count; +}; + +static INLINE +void +trbudget_init(trbudget_t *budget, int chance, int incval) { + budget->chance = chance; + budget->remain = budget->incval = incval; +} + +static INLINE +int +trbudget_check(trbudget_t *budget, int size) { + if(size <= budget->remain) { budget->remain -= size; return 1; } + if(budget->chance == 0) { budget->count += size; return 0; } + budget->remain += budget->incval - size; + budget->chance -= 1; + return 1; +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +void +tr_partition(const int *ISAd, + int *first, int *middle, int *last, + int **pa, int **pb, int v) { + int *a, *b, *c, *d, *e, *f; + int t, s; + int x = 0; + + for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { } + if(((a = b) < last) && (x < v)) { + for(; (++b < last) && ((x = ISAd[*b]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + } + for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { } + if((b < (d = c)) && (x > v)) { + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + for(; b < c;) { + SWAP(*b, *c); + for(; (++b < c) && ((x = ISAd[*b]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + + if(a <= d) { + c = b - 1; + if((s = a - first) > (t = b - a)) { s = t; } + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + if((s = d - c) > (t = last - d - 1)) { s = t; } + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + first += (b - a), last -= (d - c); + } + *pa = first, *pb = last; +} + +static +void +tr_copy(int *ISA, const int *SA, + int *first, int *a, int *b, int *last, + int depth) { + /* sort suffixes of middle partition + by using sorted order of suffixes of left and right partition. */ + int *c, *d, *e; + int s, v; + + v = b - SA - 1; + for(c = first, d = a - 1; c <= d; ++c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *++d = s; + ISA[s] = d - SA; + } + } + for(c = last - 1, e = d + 1, d = b; e < d; --c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *--d = s; + ISA[s] = d - SA; + } + } +} + +static +void +tr_partialcopy(int *ISA, const int *SA, + int *first, int *a, int *b, int *last, + int depth) { + int *c, *d, *e; + int s, v; + int rank, lastrank, newrank = -1; + + v = b - SA - 1; + lastrank = -1; + for(c = first, d = a - 1; c <= d; ++c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *++d = s; + rank = ISA[s + depth]; + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } + ISA[s] = newrank; + } + } + + lastrank = -1; + for(e = d; first <= e; --e) { + rank = ISA[*e]; + if(lastrank != rank) { lastrank = rank; newrank = e - SA; } + if(newrank != rank) { ISA[*e] = newrank; } + } + + lastrank = -1; + for(c = last - 1, e = d + 1, d = b; e < d; --c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *--d = s; + rank = ISA[s + depth]; + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } + ISA[s] = newrank; + } + } +} + +static +void +tr_introsort(int *ISA, const int *ISAd, + int *SA, int *first, int *last, + trbudget_t *budget) { +#define STACK_SIZE TR_STACKSIZE + struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE]; + int *a, *b, *c; + int t; + int v, x = 0; + int incr = ISAd - ISA; + int limit, next; + int ssize, trlink = -1; + + for(ssize = 0, limit = tr_ilg(last - first);;) { + + if(limit < 0) { + if(limit == -1) { + /* tandem repeat partition */ + tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1); + + /* update ranks */ + if(a < last) { + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } + } + if(b < last) { + for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } + } + + /* push */ + if(1 < (b - a)) { + STACK_PUSH5(NULL, a, b, 0, 0); + STACK_PUSH5(ISAd - incr, first, last, -2, trlink); + trlink = ssize - 2; + } + if((a - first) <= (last - b)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink); + last = a, limit = tr_ilg(a - first); + } else if(1 < (last - b)) { + first = b, limit = tr_ilg(last - b); + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } else { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink); + first = b, limit = tr_ilg(last - b); + } else if(1 < (a - first)) { + last = a, limit = tr_ilg(a - first); + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } else if(limit == -2) { + /* tandem repeat copy */ + a = stack[--ssize].b, b = stack[ssize].c; + if(stack[ssize].d == 0) { + tr_copy(ISA, SA, first, a, b, last, ISAd - ISA); + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA); + } + STACK_POP5(ISAd, first, last, limit, trlink); + } else { + /* sorted partition */ + if(0 <= *first) { + a = first; + do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a)); + first = a; + } + if(first < last) { + a = first; do { *a = ~*a; } while(*++a < 0); + next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1; + if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } } + + /* push */ + if(trbudget_check(budget, a - first)) { + if((a - first) <= (last - a)) { + STACK_PUSH5(ISAd, a, last, -3, trlink); + ISAd += incr, last = a, limit = next; + } else { + if(1 < (last - a)) { + STACK_PUSH5(ISAd + incr, first, a, next, trlink); + first = a, limit = -3; + } else { + ISAd += incr, last = a, limit = next; + } + } + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + if(1 < (last - a)) { + first = a, limit = -3; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + continue; + } + + if((last - first) <= TR_INSERTIONSORT_THRESHOLD) { + tr_insertionsort(ISAd, first, last); + limit = -3; + continue; + } + + if(limit-- == 0) { + tr_heapsort(ISAd, first, last - first); + for(a = last - 1; first < a; a = b) { + for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; } + } + limit = -3; + continue; + } + + /* choose pivot */ + a = tr_pivot(ISAd, first, last); + SWAP(*first, *a); + v = ISAd[*first]; + + /* partition */ + tr_partition(ISAd, first, first + 1, last, &a, &b, v); + if((last - first) != (b - a)) { + next = (ISA[*a] != v) ? tr_ilg(b - a) : -1; + + /* update ranks */ + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } + if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } } + + /* push */ + if((1 < (b - a)) && (trbudget_check(budget, b - a))) { + if((a - first) <= (last - b)) { + if((last - b) <= (b - a)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + STACK_PUSH5(ISAd, b, last, limit, trlink); + last = a; + } else if(1 < (last - b)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + first = b; + } else { + ISAd += incr, first = a, last = b, limit = next; + } + } else if((a - first) <= (b - a)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, limit, trlink); + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + last = a; + } else { + STACK_PUSH5(ISAd, b, last, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + STACK_PUSH5(ISAd, b, last, limit, trlink); + STACK_PUSH5(ISAd, first, a, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + if((a - first) <= (b - a)) { + if(1 < (last - b)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + STACK_PUSH5(ISAd, first, a, limit, trlink); + first = b; + } else if(1 < (a - first)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + last = a; + } else { + ISAd += incr, first = a, last = b, limit = next; + } + } else if((last - b) <= (b - a)) { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, limit, trlink); + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + first = b; + } else { + STACK_PUSH5(ISAd, first, a, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + STACK_PUSH5(ISAd, first, a, limit, trlink); + STACK_PUSH5(ISAd, b, last, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } + } else { + if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; } + if((a - first) <= (last - b)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, limit, trlink); + last = a; + } else if(1 < (last - b)) { + first = b; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } else { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, limit, trlink); + first = b; + } else if(1 < (a - first)) { + last = a; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } + } else { + if(trbudget_check(budget, last - first)) { + limit = tr_ilg(last - first), ISAd += incr; + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } +#undef STACK_SIZE +} + + + +/*---------------------------------------------------------------------------*/ + +/* Tandem repeat sort */ +static +void +trsort(int *ISA, int *SA, int n, int depth) { + int *ISAd; + int *first, *last; + trbudget_t budget; + int t, skip, unsorted; + + trbudget_init(&budget, tr_ilg(n) * 2 / 3, n); +/* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */ + for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) { + first = SA; + skip = 0; + unsorted = 0; + do { + if((t = *first) < 0) { first -= t; skip += t; } + else { + if(skip != 0) { *(first + skip) = skip; skip = 0; } + last = SA + ISA[t] + 1; + if(1 < (last - first)) { + budget.count = 0; + tr_introsort(ISA, ISAd, SA, first, last, &budget); + if(budget.count != 0) { unsorted += budget.count; } + else { skip = first - last; } + } else if((last - first) == 1) { + skip = -1; + } + first = last; + } + } while(first < (SA + n)); + if(skip != 0) { *(first + skip) = skip; } + if(unsorted == 0) { break; } + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Sorts suffixes of type B*. */ +static +int +sort_typeBstar(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int openMP) { + int *PAb, *ISAb, *buf; +#ifdef LIBBSC_OPENMP + int *curbuf; + int l; +#endif + int i, j, k, t, m, bufsize; + int c0, c1; +#ifdef LIBBSC_OPENMP + int d0, d1; +#endif + (void)openMP; + + /* Initialize bucket arrays. */ + for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; } + for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; } + + /* Count the number of occurrences of the first one or two characters of each + type A, B and B* suffix. Moreover, store the beginning position of all + type B* suffixes into the array SA. */ + for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) { + /* type A suffix. */ + do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1)); + if(0 <= i) { + /* type B* suffix. */ + ++BUCKET_BSTAR(c0, c1); + SA[--m] = i; + /* type B suffix. */ + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { + ++BUCKET_B(c0, c1); + } + } + } + m = n - m; +/* +note: + A type B* suffix is lexicographically smaller than a type B suffix that + begins with the same first two characters. +*/ + + /* Calculate the index of start/end point of each bucket. */ + for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) { + t = i + BUCKET_A(c0); + BUCKET_A(c0) = i + j; /* start point */ + i = t + BUCKET_B(c0, c0); + for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) { + j += BUCKET_BSTAR(c0, c1); + BUCKET_BSTAR(c0, c1) = j; /* end point */ + i += BUCKET_B(c0, c1); + } + } + + if(0 < m) { + /* Sort the type B* suffixes by their first two characters. */ + PAb = SA + n - m; ISAb = SA + m; + for(i = m - 2; 0 <= i; --i) { + t = PAb[i], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = i; + } + t = PAb[m - 1], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = m - 1; + + /* Sort the type B* substrings using sssort. */ +#ifdef LIBBSC_OPENMP + if (openMP) + { + buf = SA + m; + c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; +#pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1) + { + bufsize = (n - (2 * m)) / omp_get_num_threads(); + curbuf = buf + omp_get_thread_num() * bufsize; + k = 0; + for(;;) { + #pragma omp critical(sssort_lock) + { + if(0 < (l = j)) { + d0 = c0, d1 = c1; + do { + k = BUCKET_BSTAR(d0, d1); + if(--d1 <= d0) { + d1 = ALPHABET_SIZE - 1; + if(--d0 < 0) { break; } + } + } while(((l - k) <= 1) && (0 < (l = k))); + c0 = d0, c1 = d1, j = k; + } + } + if(l == 0) { break; } + sssort(T, PAb, SA + k, SA + l, + curbuf, bufsize, 2, n, *(SA + k) == (m - 1)); + } + } + } + else + { + buf = SA + m, bufsize = n - (2 * m); + for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { + for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { + i = BUCKET_BSTAR(c0, c1); + if(1 < (j - i)) { + sssort(T, PAb, SA + i, SA + j, + buf, bufsize, 2, n, *(SA + i) == (m - 1)); + } + } + } + } +#else + buf = SA + m, bufsize = n - (2 * m); + for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { + for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { + i = BUCKET_BSTAR(c0, c1); + if(1 < (j - i)) { + sssort(T, PAb, SA + i, SA + j, + buf, bufsize, 2, n, *(SA + i) == (m - 1)); + } + } + } +#endif + + /* Compute ranks of type B* substrings. */ + for(i = m - 1; 0 <= i; --i) { + if(0 <= SA[i]) { + j = i; + do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i])); + SA[i + 1] = i - j; + if(i <= 0) { break; } + } + j = i; + do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0); + ISAb[SA[i]] = j; + } + + /* Construct the inverse suffix array of type B* suffixes using trsort. */ + trsort(ISAb, SA, m, 1); + + /* Set the sorted order of tyoe B* suffixes. */ + for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) { + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { } + if(0 <= i) { + t = i; + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { } + SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; + } + } + + /* Calculate the index of start/end point of each bucket. */ + BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */ + for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) { + i = BUCKET_A(c0 + 1) - 1; + for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) { + t = i - BUCKET_B(c0, c1); + BUCKET_B(c0, c1) = i; /* end point */ + + /* Move all type B* suffixes to the correct position. */ + for(i = t, j = BUCKET_BSTAR(c0, c1); + j <= k; + --i, --k) { SA[i] = SA[k]; } + } + BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */ + BUCKET_B(c0, c0) = i; /* end point */ + } + } + + return m; +} + +/* Constructs the suffix array by using the sorted order of type B* suffixes. */ +static +void +construct_SA(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m) { + int *i, *j, *k; + int s; + int c0, c1, c2; + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + *j = ~s; + c0 = T[--s]; + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); + *k-- = s; + } else { + assert(((s == 0) && (T[s] == c1)) || (s < 0)); + *j = ~s; + } + } + } + } + + /* Construct the suffix array by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1); + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + if((s == 0) || (T[s - 1] < c0)) { s = ~s; } + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } else { + assert(s < 0); + *i = ~s; + } + } +} + +/* Constructs the burrows-wheeler transformed string directly + by using the sorted order of type B* suffixes. */ +static +int +construct_BWT(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m) { + int *i, *j, *k, *orig; + int s; + int c0, c1, c2; + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + c0 = T[--s]; + *j = ~((int)c0); + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); + *k-- = s; + } else if(s != 0) { + *j = ~s; +#ifndef NDEBUG + } else { + assert(T[s] == c1); +#endif + } + } + } + } + + /* Construct the BWTed string by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1); + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n, orig = SA; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + *i = c0; + if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); } + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } else if(s != 0) { + *i = ~s; + } else { + orig = i; + } + } + + return orig - SA; +} + +/* Constructs the burrows-wheeler transformed string directly + by using the sorted order of type B* suffixes. */ +static +int +construct_BWT_indexes(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m, + unsigned char * num_indexes, int * indexes) { + int *i, *j, *k, *orig; + int s; + int c0, c1, c2; + + int mod = n / 8; + { + mod |= mod >> 1; mod |= mod >> 2; + mod |= mod >> 4; mod |= mod >> 8; + mod |= mod >> 16; mod >>= 1; + + *num_indexes = (unsigned char)((n - 1) / (mod + 1)); + } + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j - SA; + + c0 = T[--s]; + *j = ~((int)c0); + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); + *k-- = s; + } else if(s != 0) { + *j = ~s; +#ifndef NDEBUG + } else { + assert(T[s] == c1); +#endif + } + } + } + } + + /* Construct the BWTed string by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + if (T[n - 2] < c2) { + if (((n - 1) & mod) == 0) indexes[(n - 1) / (mod + 1) - 1] = k - SA; + *k++ = ~((int)T[n - 2]); + } + else { + *k++ = n - 1; + } + + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n, orig = SA; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i - SA; + + c0 = T[--s]; + *i = c0; + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + if((0 < s) && (T[s - 1] < c0)) { + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k - SA; + *k++ = ~((int)T[s - 1]); + } else + *k++ = s; + } else if(s != 0) { + *i = ~s; + } else { + orig = i; + } + } + + return orig - SA; +} + + +/*---------------------------------------------------------------------------*/ + +/*- Function -*/ + +int +divsufsort(const unsigned char *T, int *SA, int n, int openMP) { + int *bucket_A, *bucket_B; + int m; + int err = 0; + + /* Check arguments. */ + if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; } + else if(n == 0) { return 0; } + else if(n == 1) { SA[0] = 0; return 0; } + else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } + + bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); + bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); + + /* Suffixsort. */ + if((bucket_A != NULL) && (bucket_B != NULL)) { + m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, openMP); + construct_SA(T, SA, bucket_A, bucket_B, n, m); + } else { + err = -2; + } + + free(bucket_B); + free(bucket_A); + + return err; +} + +int +divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP) { + int *B; + int *bucket_A, *bucket_B; + int m, pidx, i; + + /* Check arguments. */ + if((T == NULL) || (U == NULL) || (n < 0)) { return -1; } + else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; } + + if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); } + bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); + bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); + + /* Burrows-Wheeler Transform. */ + if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) { + m = sort_typeBstar(T, B, bucket_A, bucket_B, n, openMP); + + if (num_indexes == NULL || indexes == NULL) { + pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m); + } else { + pidx = construct_BWT_indexes(T, B, bucket_A, bucket_B, n, m, num_indexes, indexes); + } + + /* Copy to output string. */ + U[0] = T[n - 1]; + for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; } + for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; } + pidx += 1; + } else { + pidx = -2; + } + + free(bucket_B); + free(bucket_A); + if(A == NULL) { free(B); } + + return pidx; +} diff --git a/deps/SZ/zstd/dictBuilder/divsufsort.h b/deps/SZ/zstd/dictBuilder/divsufsort.h new file mode 100644 index 0000000000000000000000000000000000000000..5440994af15c1bf054207f0dca90dd285bb95aa6 --- /dev/null +++ b/deps/SZ/zstd/dictBuilder/divsufsort.h @@ -0,0 +1,67 @@ +/* + * divsufsort.h for libdivsufsort-lite + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DIVSUFSORT_H +#define _DIVSUFSORT_H 1 + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + + +/*- Prototypes -*/ + +/** + * Constructs the suffix array of a given string. + * @param T [0..n-1] The input string. + * @param SA [0..n-1] The output array of suffixes. + * @param n The length of the given string. + * @param openMP enables OpenMP optimization. + * @return 0 if no error occurred, -1 or -2 otherwise. + */ +int +divsufsort(const unsigned char *T, int *SA, int n, int openMP); + +/** + * Constructs the burrows-wheeler transformed string of a given string. + * @param T [0..n-1] The input string. + * @param U [0..n-1] The output string. (can be T) + * @param A [0..n-1] The temporary array. (can be NULL) + * @param n The length of the given string. + * @param num_indexes The length of secondary indexes array. (can be NULL) + * @param indexes The secondary indexes array. (can be NULL) + * @param openMP enables OpenMP optimization. + * @return The primary index if no error occurred, -1 or -2 otherwise. + */ +int +divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP); + + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* _DIVSUFSORT_H */ diff --git a/deps/SZ/zstd/dictBuilder/zdict.c b/deps/SZ/zstd/dictBuilder/zdict.c new file mode 100644 index 0000000000000000000000000000000000000000..2024e0bbbd498b728ceeeb087f761f4f42fed3e7 --- /dev/null +++ b/deps/SZ/zstd/dictBuilder/zdict.c @@ -0,0 +1,1108 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/*-************************************** +* Tuning parameters +****************************************/ +#define MINRATIO 4 /* minimum nb of apparition to be selected in dictionary */ +#define ZDICT_MAX_SAMPLES_SIZE (2000U << 20) +#define ZDICT_MIN_SAMPLES_SIZE (ZDICT_CONTENTSIZE_MIN * MINRATIO) + + +/*-************************************** +* Compiler Options +****************************************/ +/* Unix Large Files support (>4GB) */ +#define _FILE_OFFSET_BITS 64 +#if (defined(__sun__) && (!defined(__LP64__))) /* Sun Solaris 32-bits requires specific definitions */ +# define _LARGEFILE_SOURCE +#elif ! defined(__LP64__) /* No point defining Large file for 64 bit */ +# define _LARGEFILE64_SOURCE +#endif + + +/*-************************************* +* Dependencies +***************************************/ +#include /* malloc, free */ +#include /* memset */ +#include /* fprintf, fopen, ftello64 */ +#include /* clock */ + +#include "mem.h" /* read */ +#include "fse.h" /* FSE_normalizeCount, FSE_writeNCount */ +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" /* HUF_buildCTable, HUF_writeCTable */ +#include "zstd_internal.h" /* includes zstd.h */ +#include "xxhash.h" /* XXH64 */ +#include "divsufsort.h" +#ifndef ZDICT_STATIC_LINKING_ONLY +# define ZDICT_STATIC_LINKING_ONLY +#endif +#include "zdict.h" + + +/*-************************************* +* Constants +***************************************/ +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define DICTLISTSIZE_DEFAULT 10000 + +#define NOISELENGTH 32 + +static const int g_compressionLevel_default = 3; +static const U32 g_selectivity_default = 9; + + +/*-************************************* +* Console display +***************************************/ +#define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); } +#define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ + +static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; } + +static void ZDICT_printHex(const void* ptr, size_t length) +{ + const BYTE* const b = (const BYTE*)ptr; + size_t u; + for (u=0; u126) c = '.'; /* non-printable char */ + DISPLAY("%c", c); + } +} + + +/*-******************************************************** +* Helper functions +**********************************************************/ +unsigned ZDICT_isError(size_t errorCode) { return ERR_isError(errorCode); } + +const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } + +unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize) +{ + if (dictSize < 8) return 0; + if (MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return 0; + return MEM_readLE32((const char*)dictBuffer + 4); +} + + +/*-******************************************************** +* Dictionary training functions +**********************************************************/ +static unsigned ZDICT_NbCommonBytes (size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanForward64( &r, (U64)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r=0; + _BitScanForward( &r, (U32)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanReverse64( &r, val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r = 0; + _BitScanReverse( &r, (unsigned long)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } } +} + + +/*! ZDICT_count() : + Count the nb of common bytes between 2 pointers. + Note : this function presumes end of buffer followed by noisy guard band. +*/ +static size_t ZDICT_count(const void* pIn, const void* pMatch) +{ + const char* const pStart = (const char*)pIn; + for (;;) { + size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { + pIn = (const char*)pIn+sizeof(size_t); + pMatch = (const char*)pMatch+sizeof(size_t); + continue; + } + pIn = (const char*)pIn+ZDICT_NbCommonBytes(diff); + return (size_t)((const char*)pIn - pStart); + } +} + + +typedef struct { + U32 pos; + U32 length; + U32 savings; +} dictItem; + +static void ZDICT_initDictItem(dictItem* d) +{ + d->pos = 1; + d->length = 0; + d->savings = (U32)(-1); +} + + +#define LLIMIT 64 /* heuristic determined experimentally */ +#define MINMATCHLENGTH 7 /* heuristic determined experimentally */ +static dictItem ZDICT_analyzePos( + BYTE* doneMarks, + const int* suffix, U32 start, + const void* buffer, U32 minRatio, U32 notificationLevel) +{ + U32 lengthList[LLIMIT] = {0}; + U32 cumulLength[LLIMIT] = {0}; + U32 savings[LLIMIT] = {0}; + const BYTE* b = (const BYTE*)buffer; + size_t maxLength = LLIMIT; + size_t pos = suffix[start]; + U32 end = start; + dictItem solution; + + /* init */ + memset(&solution, 0, sizeof(solution)); + doneMarks[pos] = 1; + + /* trivial repetition cases */ + if ( (MEM_read16(b+pos+0) == MEM_read16(b+pos+2)) + ||(MEM_read16(b+pos+1) == MEM_read16(b+pos+3)) + ||(MEM_read16(b+pos+2) == MEM_read16(b+pos+4)) ) { + /* skip and mark segment */ + U16 const pattern16 = MEM_read16(b+pos+4); + U32 u, patternEnd = 6; + while (MEM_read16(b+pos+patternEnd) == pattern16) patternEnd+=2 ; + if (b[pos+patternEnd] == b[pos+patternEnd-1]) patternEnd++; + for (u=1; u= MINMATCHLENGTH); + } + + /* look backward */ + { size_t length; + do { + length = ZDICT_count(b + pos, b + *(suffix+start-1)); + if (length >=MINMATCHLENGTH) start--; + } while(length >= MINMATCHLENGTH); + } + + /* exit if not found a minimum nb of repetitions */ + if (end-start < minRatio) { + U32 idx; + for(idx=start; idx= %i at pos %7u ", (U32)(end-start), MINMATCHLENGTH, (U32)pos); + DISPLAYLEVEL(4, "\n"); + + for (searchLength = MINMATCHLENGTH ; ; searchLength++) { + BYTE currentChar = 0; + U32 currentCount = 0; + U32 currentID = refinedStart; + U32 id; + U32 selectedCount = 0; + U32 selectedID = currentID; + for (id =refinedStart; id < refinedEnd; id++) { + if (b[suffix[id] + searchLength] != currentChar) { + if (currentCount > selectedCount) { + selectedCount = currentCount; + selectedID = currentID; + } + currentID = id; + currentChar = b[ suffix[id] + searchLength]; + currentCount = 0; + } + currentCount ++; + } + if (currentCount > selectedCount) { /* for last */ + selectedCount = currentCount; + selectedID = currentID; + } + + if (selectedCount < minRatio) + break; + refinedStart = selectedID; + refinedEnd = refinedStart + selectedCount; + } + + /* evaluate gain based on new ref */ + start = refinedStart; + pos = suffix[refinedStart]; + end = start; + memset(lengthList, 0, sizeof(lengthList)); + + /* look forward */ + { size_t length; + do { + end++; + length = ZDICT_count(b + pos, b + suffix[end]); + if (length >= LLIMIT) length = LLIMIT-1; + lengthList[length]++; + } while (length >=MINMATCHLENGTH); + } + + /* look backward */ + { size_t length = MINMATCHLENGTH; + while ((length >= MINMATCHLENGTH) & (start > 0)) { + length = ZDICT_count(b + pos, b + suffix[start - 1]); + if (length >= LLIMIT) length = LLIMIT - 1; + lengthList[length]++; + if (length >= MINMATCHLENGTH) start--; + } + } + + /* largest useful length */ + memset(cumulLength, 0, sizeof(cumulLength)); + cumulLength[maxLength-1] = lengthList[maxLength-1]; + for (i=(int)(maxLength-2); i>=0; i--) + cumulLength[i] = cumulLength[i+1] + lengthList[i]; + + for (i=LLIMIT-1; i>=MINMATCHLENGTH; i--) if (cumulLength[i]>=minRatio) break; + maxLength = i; + + /* reduce maxLength in case of final into repetitive data */ + { U32 l = (U32)maxLength; + BYTE const c = b[pos + maxLength-1]; + while (b[pos+l-2]==c) l--; + maxLength = l; + } + if (maxLength < MINMATCHLENGTH) return solution; /* skip : no long-enough solution */ + + /* calculate savings */ + savings[5] = 0; + for (i=MINMATCHLENGTH; i<=(int)maxLength; i++) + savings[i] = savings[i-1] + (lengthList[i] * (i-3)); + + DISPLAYLEVEL(4, "Selected ref at position %u, of length %u : saves %u (ratio: %.2f) \n", + (U32)pos, (U32)maxLength, savings[maxLength], (double)savings[maxLength] / maxLength); + + solution.pos = (U32)pos; + solution.length = (U32)maxLength; + solution.savings = savings[maxLength]; + + /* mark positions done */ + { U32 id; + for (id=start; id solution.length) length = solution.length; + } + pEnd = (U32)(testedPos + length); + for (p=testedPos; ppos; + const U32 eltEnd = elt.pos + elt.length; + const char* const buf = (const char*) buffer; + + /* tail overlap */ + U32 u; for (u=1; u elt.pos) && (table[u].pos <= eltEnd)) { /* overlap, existing > new */ + /* append */ + U32 const addedLength = table[u].pos - elt.pos; + table[u].length += addedLength; + table[u].pos = elt.pos; + table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */ + table[u].savings += elt.length / 8; /* rough approx bonus */ + elt = table[u]; + /* sort : improve rank */ + while ((u>1) && (table[u-1].savings < elt.savings)) + table[u] = table[u-1], u--; + table[u] = elt; + return u; + } } + + /* front overlap */ + for (u=1; u= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */ + /* append */ + int const addedLength = (int)eltEnd - (table[u].pos + table[u].length); + table[u].savings += elt.length / 8; /* rough approx bonus */ + if (addedLength > 0) { /* otherwise, elt fully included into existing */ + table[u].length += addedLength; + table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */ + } + /* sort : improve rank */ + elt = table[u]; + while ((u>1) && (table[u-1].savings < elt.savings)) + table[u] = table[u-1], u--; + table[u] = elt; + return u; + } + + if (MEM_read64(buf + table[u].pos) == MEM_read64(buf + elt.pos + 1)) { + if (isIncluded(buf + table[u].pos, buf + elt.pos + 1, table[u].length)) { + size_t const addedLength = MAX( (int)elt.length - (int)table[u].length , 1 ); + table[u].pos = elt.pos; + table[u].savings += (U32)(elt.savings * addedLength / elt.length); + table[u].length = MIN(elt.length, table[u].length + 1); + return u; + } + } + } + + return 0; +} + + +static void ZDICT_removeDictItem(dictItem* table, U32 id) +{ + /* convention : table[0].pos stores nb of elts */ + U32 const max = table[0].pos; + U32 u; + if (!id) return; /* protection, should never happen */ + for (u=id; upos--; +} + + +static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt, const void* buffer) +{ + /* merge if possible */ + U32 mergeId = ZDICT_tryMerge(table, elt, 0, buffer); + if (mergeId) { + U32 newMerge = 1; + while (newMerge) { + newMerge = ZDICT_tryMerge(table, table[mergeId], mergeId, buffer); + if (newMerge) ZDICT_removeDictItem(table, mergeId); + mergeId = newMerge; + } + return; + } + + /* insert */ + { U32 current; + U32 nextElt = table->pos; + if (nextElt >= maxSize) nextElt = maxSize-1; + current = nextElt-1; + while (table[current].savings < elt.savings) { + table[current+1] = table[current]; + current--; + } + table[current+1] = elt; + table->pos = nextElt+1; + } +} + + +static U32 ZDICT_dictSize(const dictItem* dictList) +{ + U32 u, dictSize = 0; + for (u=1; u=l) { \ + if (ZDICT_clockSpan(displayClock) > refreshRate) \ + { displayClock = clock(); DISPLAY(__VA_ARGS__); \ + if (notificationLevel>=4) fflush(stderr); } } + + /* init */ + DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ + if (!suffix0 || !reverseSuffix || !doneMarks || !filePos) { + result = ERROR(memory_allocation); + goto _cleanup; + } + if (minRatio < MINRATIO) minRatio = MINRATIO; + memset(doneMarks, 0, bufferSize+16); + + /* limit sample set size (divsufsort limitation)*/ + if (bufferSize > ZDICT_MAX_SAMPLES_SIZE) DISPLAYLEVEL(3, "sample set too large : reduced to %u MB ...\n", (U32)(ZDICT_MAX_SAMPLES_SIZE>>20)); + while (bufferSize > ZDICT_MAX_SAMPLES_SIZE) bufferSize -= fileSizes[--nbFiles]; + + /* sort */ + DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (U32)(bufferSize>>20)); + { int const divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0); + if (divSuftSortResult != 0) { result = ERROR(GENERIC); goto _cleanup; } + } + suffix[bufferSize] = (int)bufferSize; /* leads into noise */ + suffix0[0] = (int)bufferSize; /* leads into noise */ + /* build reverse suffix sort */ + { size_t pos; + for (pos=0; pos < bufferSize; pos++) + reverseSuffix[suffix[pos]] = (U32)pos; + /* note filePos tracks borders between samples. + It's not used at this stage, but planned to become useful in a later update */ + filePos[0] = 0; + for (pos=1; pos> 21); + } +} + + +typedef struct +{ + ZSTD_CCtx* ref; /* contains reference to dictionary */ + ZSTD_CCtx* zc; /* working context */ + void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */ +} EStats_ress_t; + +#define MAXREPOFFSET 1024 + +static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params, + U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets, + const void* src, size_t srcSize, + U32 notificationLevel) +{ + size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog); + size_t cSize; + + if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */ + { size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0); + if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; } + } + cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize); + if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; } + + if (cSize) { /* if == 0; block is not compressible */ + const seqStore_t* const seqStorePtr = ZSTD_getSeqStore(esr.zc); + + /* literals stats */ + { const BYTE* bytePtr; + for(bytePtr = seqStorePtr->litStart; bytePtr < seqStorePtr->lit; bytePtr++) + countLit[*bytePtr]++; + } + + /* seqStats */ + { U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + ZSTD_seqToCodes(seqStorePtr); + + { const BYTE* codePtr = seqStorePtr->ofCode; + U32 u; + for (u=0; umlCode; + U32 u; + for (u=0; ullCode; + U32 u; + for (u=0; u= 2) { /* rep offsets */ + const seqDef* const seq = seqStorePtr->sequencesStart; + U32 offset1 = seq[0].offset - 3; + U32 offset2 = seq[1].offset - 3; + if (offset1 >= MAXREPOFFSET) offset1 = 0; + if (offset2 >= MAXREPOFFSET) offset2 = 0; + repOffsets[offset1] += 3; + repOffsets[offset2] += 1; + } } } +} + +static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles) +{ + size_t total=0; + unsigned u; + for (u=0; u0; u--) { + offsetCount_t tmp; + if (table[u-1].count >= table[u].count) break; + tmp = table[u-1]; + table[u-1] = table[u]; + table[u] = tmp; + } +} + +/* ZDICT_flatLit() : + * rewrite `countLit` to contain a mostly flat but still compressible distribution of literals. + * necessary to avoid generating a non-compressible distribution that HUF_writeCTable() cannot encode. + */ +static void ZDICT_flatLit(U32* countLit) +{ + int u; + for (u=1; u<256; u++) countLit[u] = 2; + countLit[0] = 4; + countLit[253] = 1; + countLit[254] = 1; +} + +#define OFFCODE_MAX 30 /* only applicable to first block */ +static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, + unsigned compressionLevel, + const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles, + const void* dictBuffer, size_t dictBufferSize, + unsigned notificationLevel) +{ + U32 countLit[256]; + HUF_CREATE_STATIC_CTABLE(hufTable, 255); + U32 offcodeCount[OFFCODE_MAX+1]; + short offcodeNCount[OFFCODE_MAX+1]; + U32 offcodeMax = ZSTD_highbit32((U32)(dictBufferSize + 128 KB)); + U32 matchLengthCount[MaxML+1]; + short matchLengthNCount[MaxML+1]; + U32 litLengthCount[MaxLL+1]; + short litLengthNCount[MaxLL+1]; + U32 repOffset[MAXREPOFFSET]; + offsetCount_t bestRepOffset[ZSTD_REP_NUM+1]; + EStats_ress_t esr; + ZSTD_parameters params; + U32 u, huffLog = 11, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total; + size_t pos = 0, errorCode; + size_t eSize = 0; + size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles); + size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles); + BYTE* dstPtr = (BYTE*)dstBuffer; + + /* init */ + DEBUGLOG(4, "ZDICT_analyzeEntropy"); + esr.ref = ZSTD_createCCtx(); + esr.zc = ZSTD_createCCtx(); + esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX); + if (!esr.ref || !esr.zc || !esr.workPlace) { + eSize = ERROR(memory_allocation); + DISPLAYLEVEL(1, "Not enough memory \n"); + goto _cleanup; + } + if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionaryCreation_failed); goto _cleanup; } /* too large dictionary */ + for (u=0; u<256; u++) countLit[u] = 1; /* any character must be described */ + for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1; + for (u=0; u<=MaxML; u++) matchLengthCount[u] = 1; + for (u=0; u<=MaxLL; u++) litLengthCount[u] = 1; + memset(repOffset, 0, sizeof(repOffset)); + repOffset[1] = repOffset[4] = repOffset[8] = 1; + memset(bestRepOffset, 0, sizeof(bestRepOffset)); + if (compressionLevel==0) compressionLevel = g_compressionLevel_default; + params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize); + { size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0); + if (ZSTD_isError(beginResult)) { + DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced() failed : %s \n", ZSTD_getErrorName(beginResult)); + eSize = ERROR(GENERIC); + goto _cleanup; + } } + + /* collect stats on all samples */ + for (u=0; u dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize; + { size_t const dictSize = hSize + dictContentSize; + char* dictEnd = (char*)dictBuffer + dictSize; + memmove(dictEnd - dictContentSize, customDictContent, dictContentSize); + memcpy(dictBuffer, header, hSize); + return dictSize; + } +} + + +size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_params_t params) +{ + int const compressionLevel = (params.compressionLevel == 0) ? g_compressionLevel_default : params.compressionLevel; + U32 const notificationLevel = params.notificationLevel; + size_t hSize = 8; + + /* calculate entropy tables */ + DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ + DISPLAYLEVEL(2, "statistics ... \n"); + { size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize, + compressionLevel, + samplesBuffer, samplesSizes, nbSamples, + (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, + notificationLevel); + if (ZDICT_isError(eSize)) return eSize; + hSize += eSize; + } + + /* add dictionary header (after entropy tables) */ + MEM_writeLE32(dictBuffer, ZSTD_MAGIC_DICTIONARY); + { U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0); + U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768; + U32 const dictID = params.dictID ? params.dictID : compliantID; + MEM_writeLE32((char*)dictBuffer+4, dictID); + } + + if (hSize + dictContentSize < dictBufferCapacity) + memmove((char*)dictBuffer + hSize, (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize); + return MIN(dictBufferCapacity, hSize+dictContentSize); +} + + +/*! ZDICT_trainFromBuffer_unsafe_legacy() : +* Warning : `samplesBuffer` must be followed by noisy guard band. +* @return : size of dictionary, or an error code which can be tested with ZDICT_isError() +*/ +size_t ZDICT_trainFromBuffer_unsafe_legacy( + void* dictBuffer, size_t maxDictSize, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_legacy_params_t params) +{ + U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16)); + dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList)); + unsigned const selectivity = params.selectivityLevel == 0 ? g_selectivity_default : params.selectivityLevel; + unsigned const minRep = (selectivity > 30) ? MINRATIO : nbSamples >> selectivity; + size_t const targetDictSize = maxDictSize; + size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples); + size_t dictSize = 0; + U32 const notificationLevel = params.zParams.notificationLevel; + + /* checks */ + if (!dictList) return ERROR(memory_allocation); + if (maxDictSize < ZDICT_DICTSIZE_MIN) { free(dictList); return ERROR(dstSize_tooSmall); } /* requested dictionary size is too small */ + if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return ERROR(dictionaryCreation_failed); } /* not enough source to create dictionary */ + + /* init */ + ZDICT_initDictItem(dictList); + + /* build dictionary */ + ZDICT_trainBuffer_legacy(dictList, dictListSize, + samplesBuffer, samplesBuffSize, + samplesSizes, nbSamples, + minRep, notificationLevel); + + /* display best matches */ + if (params.zParams.notificationLevel>= 3) { + U32 const nb = MIN(25, dictList[0].pos); + U32 const dictContentSize = ZDICT_dictSize(dictList); + U32 u; + DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos-1, dictContentSize); + DISPLAYLEVEL(3, "list %u best segments \n", nb-1); + for (u=1; u samplesBuffSize) || ((pos + length) > samplesBuffSize)) + return ERROR(GENERIC); /* should never happen */ + DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |", + u, length, pos, dictList[u].savings); + ZDICT_printHex((const char*)samplesBuffer+pos, printedLength); + DISPLAYLEVEL(3, "| \n"); + } } + + + /* create dictionary */ + { U32 dictContentSize = ZDICT_dictSize(dictList); + if (dictContentSize < ZDICT_CONTENTSIZE_MIN) { free(dictList); return ERROR(dictionaryCreation_failed); } /* dictionary content too small */ + if (dictContentSize < targetDictSize/4) { + DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (U32)maxDictSize); + if (samplesBuffSize < 10 * targetDictSize) + DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20)); + if (minRep > MINRATIO) { + DISPLAYLEVEL(2, "! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1); + DISPLAYLEVEL(2, "! note : larger dictionaries are not necessarily better, test its efficiency on samples \n"); + } + } + + if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) { + U32 proposedSelectivity = selectivity-1; + while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; } + DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (U32)maxDictSize); + DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity); + DISPLAYLEVEL(2, "! always test dictionary efficiency on real samples \n"); + } + + /* limit dictionary size */ + { U32 const max = dictList->pos; /* convention : nb of useful elts within dictList */ + U32 currentSize = 0; + U32 n; for (n=1; n targetDictSize) { currentSize -= dictList[n].length; break; } + } + dictList->pos = n; + dictContentSize = currentSize; + } + + /* build dict content */ + { U32 u; + BYTE* ptr = (BYTE*)dictBuffer + maxDictSize; + for (u=1; upos; u++) { + U32 l = dictList[u].length; + ptr -= l; + if (ptr<(BYTE*)dictBuffer) { free(dictList); return ERROR(GENERIC); } /* should not happen */ + memcpy(ptr, (const char*)samplesBuffer+dictList[u].pos, l); + } } + + dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize, + samplesBuffer, samplesSizes, nbSamples, + params.zParams); + } + + /* clean up */ + free(dictList); + return dictSize; +} + + +/* ZDICT_trainFromBuffer_legacy() : + * issue : samplesBuffer need to be followed by a noisy guard band. + * work around : duplicate the buffer, and add the noise */ +size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_legacy_params_t params) +{ + size_t result; + void* newBuff; + size_t const sBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples); + if (sBuffSize < ZDICT_MIN_SAMPLES_SIZE) return 0; /* not enough content => no dictionary */ + + newBuff = malloc(sBuffSize + NOISELENGTH); + if (!newBuff) return ERROR(memory_allocation); + + memcpy(newBuff, samplesBuffer, sBuffSize); + ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */ + + result = + ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, dictBufferCapacity, newBuff, + samplesSizes, nbSamples, params); + free(newBuff); + return result; +} + + +size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples) +{ + ZDICT_cover_params_t params; + DEBUGLOG(3, "ZDICT_trainFromBuffer"); + memset(¶ms, 0, sizeof(params)); + params.d = 8; + params.steps = 4; + /* Default to level 6 since no compression level information is available */ + params.zParams.compressionLevel = 6; +#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=1) + params.zParams.notificationLevel = DEBUGLEVEL; +#endif + return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity, + samplesBuffer, samplesSizes, nbSamples, + ¶ms); +} + +size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples) +{ + ZDICT_params_t params; + memset(¶ms, 0, sizeof(params)); + return ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, dictBufferCapacity, + samplesBuffer, samplesSizes, nbSamples, + params); +} diff --git a/deps/SZ/zstd/dictBuilder/zdict.h b/deps/SZ/zstd/dictBuilder/zdict.h new file mode 100644 index 0000000000000000000000000000000000000000..ad459c2d7d532b6c7bf0dc924a2bd5146a26c344 --- /dev/null +++ b/deps/SZ/zstd/dictBuilder/zdict.h @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef DICTBUILDER_H_001 +#define DICTBUILDER_H_001 + +#if defined (__cplusplus) +extern "C" { +#endif + + +/*====== Dependencies ======*/ +#include /* size_t */ + + +/* ===== ZDICTLIB_API : control library symbols visibility ===== */ +#ifndef ZDICTLIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define ZDICTLIB_VISIBILITY +# endif +#endif +#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY +#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) +# define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define ZDICTLIB_API ZDICTLIB_VISIBILITY +#endif + + +/*! ZDICT_trainFromBuffer(): + * Train a dictionary from an array of samples. + * Redirect towards ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4. + * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + * The resulting dictionary will be saved into `dictBuffer`. + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte. + * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. + * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. + * In general, it's recommended to provide a few thousands samples, though this can vary a lot. + * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. + */ +ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); + + +/*====== Helper functions ======*/ +ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */ +ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode); +ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode); + + + +#ifdef ZDICT_STATIC_LINKING_ONLY + +/* ==================================================================================== + * The definitions in this section are considered experimental. + * They should never be used with a dynamic library, as they may change in the future. + * They are provided for advanced usages. + * Use them only in association with static linking. + * ==================================================================================== */ + +typedef struct { + int compressionLevel; /* optimize for a specific zstd compression level; 0 means default */ + unsigned notificationLevel; /* Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */ + unsigned dictID; /* force dictID value; 0 means auto mode (32-bits random value) */ +} ZDICT_params_t; + +/*! ZDICT_cover_params_t: + * k and d are the only required parameters. + * For others, value 0 means default. + */ +typedef struct { + unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */ + unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */ + unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */ + unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ + ZDICT_params_t zParams; +} ZDICT_cover_params_t; + + +/*! ZDICT_trainFromBuffer_cover(): + * Train a dictionary from an array of samples using the COVER algorithm. + * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + * The resulting dictionary will be saved into `dictBuffer`. + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte. + * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. + * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. + * In general, it's recommended to provide a few thousands samples, though this can vary a lot. + * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. + */ +ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( + void *dictBuffer, size_t dictBufferCapacity, + const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t parameters); + +/*! ZDICT_optimizeTrainFromBuffer_cover(): + * The same requirements as above hold for all the parameters except `parameters`. + * This function tries many parameter combinations and picks the best parameters. + * `*parameters` is filled with the best parameters found, + * dictionary constructed with those parameters is stored in `dictBuffer`. + * + * All of the parameters d, k, steps are optional. + * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}. + * if steps is zero it defaults to its default value. + * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048]. + * + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * On success `*parameters` contains the parameters selected. + * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread. + */ +ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( + void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t* parameters); + +/*! ZDICT_finalizeDictionary(): + * Given a custom content as a basis for dictionary, and a set of samples, + * finalize dictionary by adding headers and statistics. + * + * Samples must be stored concatenated in a flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample in order. + * + * dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes. + * maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes. + * + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`), + * or an error code, which can be tested by ZDICT_isError(). + * Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0. + * Note 2: dictBuffer and dictContent can overlap + */ +#define ZDICT_CONTENTSIZE_MIN 128 +#define ZDICT_DICTSIZE_MIN 256 +ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity, + const void* dictContent, size_t dictContentSize, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_params_t parameters); + +typedef struct { + unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */ + ZDICT_params_t zParams; +} ZDICT_legacy_params_t; + +/*! ZDICT_trainFromBuffer_legacy(): + * Train a dictionary from an array of samples. + * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + * The resulting dictionary will be saved into `dictBuffer`. + * `parameters` is optional and can be provided with values set to 0 to mean "default". + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. + * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. + * In general, it's recommended to provide a few thousands samples, though this can vary a lot. + * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. + * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0. + */ +ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy( + void *dictBuffer, size_t dictBufferCapacity, + const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, + ZDICT_legacy_params_t parameters); + +/* Deprecation warnings */ +/* It is generally possible to disable deprecation warnings from compiler, + for example with -Wno-deprecated-declarations for gcc + or _CRT_SECURE_NO_WARNINGS in Visual. + Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */ +#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS +# define ZDICT_DEPRECATED(message) ZDICTLIB_API /* disable deprecation warnings */ +#else +# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ +# define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API +# elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__) +# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message))) +# elif (ZDICT_GCC_VERSION >= 301) +# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated)) +# elif defined(_MSC_VER) +# define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message)) +# else +# pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler") +# define ZDICT_DEPRECATED(message) ZDICTLIB_API +# endif +#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */ + +ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead") +size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); + + +#endif /* ZDICT_STATIC_LINKING_ONLY */ + +#if defined (__cplusplus) +} +#endif + +#endif /* DICTBUILDER_H_001 */ diff --git a/deps/SZ/zstd/legacy/zstd_legacy.h b/deps/SZ/zstd/legacy/zstd_legacy.h new file mode 100644 index 0000000000000000000000000000000000000000..5893cb9657e62be90e8dcbda5176197dc253159f --- /dev/null +++ b/deps/SZ/zstd/legacy/zstd_legacy.h @@ -0,0 +1,381 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LEGACY_H +#define ZSTD_LEGACY_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include "mem.h" /* MEM_STATIC */ +#include "error_private.h" /* ERROR */ +#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer */ + +#if !defined (ZSTD_LEGACY_SUPPORT) || (ZSTD_LEGACY_SUPPORT == 0) +# undef ZSTD_LEGACY_SUPPORT +# define ZSTD_LEGACY_SUPPORT 8 +#endif + +#if (ZSTD_LEGACY_SUPPORT <= 1) +# include "zstd_v01.h" +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) +# include "zstd_v02.h" +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) +# include "zstd_v03.h" +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) +# include "zstd_v04.h" +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) +# include "zstd_v05.h" +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) +# include "zstd_v06.h" +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) +# include "zstd_v07.h" +#endif + +/** ZSTD_isLegacy() : + @return : > 0 if supported by legacy decoder. 0 otherwise. + return value is the version. +*/ +MEM_STATIC unsigned ZSTD_isLegacy(const void* src, size_t srcSize) +{ + U32 magicNumberLE; + if (srcSize<4) return 0; + magicNumberLE = MEM_readLE32(src); + switch(magicNumberLE) + { +#if (ZSTD_LEGACY_SUPPORT <= 1) + case ZSTDv01_magicNumberLE:return 1; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) + case ZSTDv02_magicNumber : return 2; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) + case ZSTDv03_magicNumber : return 3; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) + case ZSTDv04_magicNumber : return 4; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case ZSTDv05_MAGICNUMBER : return 5; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case ZSTDv06_MAGICNUMBER : return 6; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case ZSTDv07_MAGICNUMBER : return 7; +#endif + default : return 0; + } +} + + +MEM_STATIC unsigned long long ZSTD_getDecompressedSize_legacy(const void* src, size_t srcSize) +{ + U32 const version = ZSTD_isLegacy(src, srcSize); + if (version < 5) return 0; /* no decompressed size in frame header, or not a legacy format */ +#if (ZSTD_LEGACY_SUPPORT <= 5) + if (version==5) { + ZSTDv05_parameters fParams; + size_t const frResult = ZSTDv05_getFrameParams(&fParams, src, srcSize); + if (frResult != 0) return 0; + return fParams.srcSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + if (version==6) { + ZSTDv06_frameParams fParams; + size_t const frResult = ZSTDv06_getFrameParams(&fParams, src, srcSize); + if (frResult != 0) return 0; + return fParams.frameContentSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + if (version==7) { + ZSTDv07_frameParams fParams; + size_t const frResult = ZSTDv07_getFrameParams(&fParams, src, srcSize); + if (frResult != 0) return 0; + return fParams.frameContentSize; + } +#endif + return 0; /* should not be possible */ +} + + +MEM_STATIC size_t ZSTD_decompressLegacy( + void* dst, size_t dstCapacity, + const void* src, size_t compressedSize, + const void* dict,size_t dictSize) +{ + U32 const version = ZSTD_isLegacy(src, compressedSize); + (void)dst; (void)dstCapacity; (void)dict; (void)dictSize; /* unused when ZSTD_LEGACY_SUPPORT >= 8 */ + switch(version) + { +#if (ZSTD_LEGACY_SUPPORT <= 1) + case 1 : + return ZSTDv01_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) + case 2 : + return ZSTDv02_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) + case 3 : + return ZSTDv03_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + return ZSTDv04_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + { size_t result; + ZSTDv05_DCtx* const zd = ZSTDv05_createDCtx(); + if (zd==NULL) return ERROR(memory_allocation); + result = ZSTDv05_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); + ZSTDv05_freeDCtx(zd); + return result; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + { size_t result; + ZSTDv06_DCtx* const zd = ZSTDv06_createDCtx(); + if (zd==NULL) return ERROR(memory_allocation); + result = ZSTDv06_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); + ZSTDv06_freeDCtx(zd); + return result; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + { size_t result; + ZSTDv07_DCtx* const zd = ZSTDv07_createDCtx(); + if (zd==NULL) return ERROR(memory_allocation); + result = ZSTDv07_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); + ZSTDv07_freeDCtx(zd); + return result; + } +#endif + default : + return ERROR(prefix_unknown); + } +} + +MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src, + size_t compressedSize) +{ + U32 const version = ZSTD_isLegacy(src, compressedSize); + switch(version) + { +#if (ZSTD_LEGACY_SUPPORT <= 1) + case 1 : + return ZSTDv01_findFrameCompressedSize(src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) + case 2 : + return ZSTDv02_findFrameCompressedSize(src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) + case 3 : + return ZSTDv03_findFrameCompressedSize(src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + return ZSTDv04_findFrameCompressedSize(src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + return ZSTDv05_findFrameCompressedSize(src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + return ZSTDv06_findFrameCompressedSize(src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + return ZSTDv07_findFrameCompressedSize(src, compressedSize); +#endif + default : + return ERROR(prefix_unknown); + } +} + +MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version) +{ + switch(version) + { + default : + case 1 : + case 2 : + case 3 : + (void)legacyContext; + return ERROR(version_unsupported); +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : return ZBUFFv04_freeDCtx((ZBUFFv04_DCtx*)legacyContext); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : return ZBUFFv05_freeDCtx((ZBUFFv05_DCtx*)legacyContext); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : return ZBUFFv06_freeDCtx((ZBUFFv06_DCtx*)legacyContext); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : return ZBUFFv07_freeDCtx((ZBUFFv07_DCtx*)legacyContext); +#endif + } +} + + +MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U32 newVersion, + const void* dict, size_t dictSize) +{ + DEBUGLOG(5, "ZSTD_initLegacyStream for v0.%u", newVersion); + if (prevVersion != newVersion) ZSTD_freeLegacyStreamContext(*legacyContext, prevVersion); + switch(newVersion) + { + default : + case 1 : + case 2 : + case 3 : + (void)dict; (void)dictSize; + return 0; +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + { + ZBUFFv04_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv04_createDCtx() : (ZBUFFv04_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv04_decompressInit(dctx); + ZBUFFv04_decompressWithDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + { + ZBUFFv05_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv05_createDCtx() : (ZBUFFv05_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv05_decompressInitDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + { + ZBUFFv06_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv06_createDCtx() : (ZBUFFv06_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv06_decompressInitDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + { + ZBUFFv07_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv07_createDCtx() : (ZBUFFv07_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv07_decompressInitDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif + } +} + + + +MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version, + ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + DEBUGLOG(5, "ZSTD_decompressLegacyStream for v0.%u", version); + switch(version) + { + default : + case 1 : + case 2 : + case 3 : + (void)legacyContext; (void)output; (void)input; + return ERROR(version_unsupported); +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + { + ZBUFFv04_DCtx* dctx = (ZBUFFv04_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv04_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + { + ZBUFFv05_DCtx* dctx = (ZBUFFv05_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv05_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + { + ZBUFFv06_DCtx* dctx = (ZBUFFv06_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv06_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + { + ZBUFFv07_DCtx* dctx = (ZBUFFv07_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv07_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif + } +} + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_LEGACY_H */ diff --git a/deps/SZ/zstd/legacy/zstd_v01.c b/deps/SZ/zstd/legacy/zstd_v01.c new file mode 100644 index 0000000000000000000000000000000000000000..ae1cb2ce5aa068c70db9f0d20a37b761e9716e84 --- /dev/null +++ b/deps/SZ/zstd/legacy/zstd_v01.c @@ -0,0 +1,2127 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/****************************************** +* Includes +******************************************/ +#include /* size_t, ptrdiff_t */ +#include "zstd_v01.h" +#include "error_private.h" + + +/****************************************** +* Static allocation +******************************************/ +/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */ +#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect +* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ +#define FSE_MAX_MEMORY_USAGE 14 +#define FSE_DEFAULT_MEMORY_USAGE 13 + +/* FSE_MAX_SYMBOL_VALUE : +* Maximum symbol value authorized. +* Required for proper stack allocation */ +#define FSE_MAX_SYMBOL_VALUE 255 + + +/**************************************************************** +* template functions type & suffix +****************************************************************/ +#define FSE_FUNCTION_TYPE BYTE +#define FSE_FUNCTION_EXTENSION + + +/**************************************************************** +* Byte symbol type +****************************************************************/ +typedef struct +{ + unsigned short newState; + unsigned char symbol; + unsigned char nbBits; +} FSE_decode_t; /* size == U32 */ + + + +/**************************************************************** +* Compiler specifics +****************************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include /* For Visual 2005 */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ +#else +# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +# else +# define FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +#endif + + +/**************************************************************** +* Includes +****************************************************************/ +#include /* malloc, free, qsort */ +#include /* memcpy, memset */ +#include /* printf (debug) */ + + +#ifndef MEM_ACCESS_MODULE +#define MEM_ACCESS_MODULE +/**************************************************************** +* Basic Types +*****************************************************************/ +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# include +typedef uint8_t BYTE; +typedef uint16_t U16; +typedef int16_t S16; +typedef uint32_t U32; +typedef int32_t S32; +typedef uint64_t U64; +typedef int64_t S64; +#else +typedef unsigned char BYTE; +typedef unsigned short U16; +typedef signed short S16; +typedef unsigned int U32; +typedef signed int S32; +typedef unsigned long long U64; +typedef signed long long S64; +#endif + +#endif /* MEM_ACCESS_MODULE */ + +/**************************************************************** +* Memory I/O +*****************************************************************/ +/* FSE_FORCE_MEMORY_ACCESS + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets generating assembly depending on alignment. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef FSE_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define FSE_FORCE_MEMORY_ACCESS 2 +# elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) +# define FSE_FORCE_MEMORY_ACCESS 1 +# endif +#endif + + +static unsigned FSE_32bits(void) +{ + return sizeof(void*)==4; +} + +static unsigned FSE_isLittleEndian(void) +{ + const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + +#if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2) + +static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; } +static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; } +static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; } + +#elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; + +static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } +static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } + +#else + +static U16 FSE_read16(const void* memPtr) +{ + U16 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +static U32 FSE_read32(const void* memPtr) +{ + U32 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +static U64 FSE_read64(const void* memPtr) +{ + U64 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +#endif // FSE_FORCE_MEMORY_ACCESS + +static U16 FSE_readLE16(const void* memPtr) +{ + if (FSE_isLittleEndian()) + return FSE_read16(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U16)(p[0] + (p[1]<<8)); + } +} + +static U32 FSE_readLE32(const void* memPtr) +{ + if (FSE_isLittleEndian()) + return FSE_read32(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); + } +} + + +static U64 FSE_readLE64(const void* memPtr) +{ + if (FSE_isLittleEndian()) + return FSE_read64(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24) + + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56)); + } +} + +static size_t FSE_readLEST(const void* memPtr) +{ + if (FSE_32bits()) + return (size_t)FSE_readLE32(memPtr); + else + return (size_t)FSE_readLE64(memPtr); +} + + + +/**************************************************************** +* Constants +*****************************************************************/ +#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) +#define FSE_MAX_TABLESIZE (1U< FSE_TABLELOG_ABSOLUTE_MAX +#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" +#endif + + +/**************************************************************** +* Error Management +****************************************************************/ +#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/**************************************************************** +* Complex types +****************************************************************/ +typedef struct +{ + int deltaFindState; + U32 deltaNbBits; +} FSE_symbolCompressionTransform; /* total 8 bytes */ + +typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; + +/**************************************************************** +* Internal functions +****************************************************************/ +FORCE_INLINE unsigned FSE_highbit32 (U32 val) +{ +# if defined(_MSC_VER) /* Visual */ + unsigned long r; + _BitScanReverse ( &r, val ); + return (unsigned) r; +# elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */ + return 31 - __builtin_clz (val); +# else /* Software version */ + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + unsigned r; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; + return r; +# endif +} + + +/**************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + + +static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; } + +#define FSE_DECODE_TYPE FSE_decode_t + + +typedef struct { + U16 tableLog; + U16 fastMode; +} FSE_DTableHeader; /* sizeof U32 */ + +static size_t FSE_buildDTable +(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)(ptr) + 1; /* because dt is unsigned, 32-bits aligned on 32-bits */ + const U32 tableSize = 1 << tableLog; + const U32 tableMask = tableSize-1; + const U32 step = FSE_tableStep(tableSize); + U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; + U32 position = 0; + U32 highThreshold = tableSize-1; + const S16 largeLimit= (S16)(1 << (tableLog-1)); + U32 noLarge = 1; + U32 s; + + /* Sanity Checks */ + if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge; + if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge; + + /* Init, lay down lowprob symbols */ + DTableH[0].tableLog = (U16)tableLog; + for (s=0; s<=maxSymbolValue; s++) + { + if (normalizedCounter[s]==-1) + { + tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; + symbolNext[s] = 1; + } + else + { + if (normalizedCounter[s] >= largeLimit) noLarge=0; + symbolNext[s] = normalizedCounter[s]; + } + } + + /* Spread symbols */ + for (s=0; s<=maxSymbolValue; s++) + { + int i; + for (i=0; i highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } + } + + if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + + /* Build Decoding table */ + { + U32 i; + for (i=0; ifastMode = (U16)noLarge; + return 0; +} + + +/****************************************** +* FSE byte symbol +******************************************/ +#ifndef FSE_COMMONDEFS_ONLY + +static unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); } + +static short FSE_abs(short a) +{ + return a<0? -a : a; +} + + +/**************************************************************** +* Header bitstream management +****************************************************************/ +static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + const BYTE* const istart = (const BYTE*) headerBuffer; + const BYTE* const iend = istart + hbSize; + const BYTE* ip = istart; + int nbBits; + int remaining; + int threshold; + U32 bitStream; + int bitCount; + unsigned charnum = 0; + int previous0 = 0; + + if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong; + bitStream = FSE_readLE32(ip); + nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ + if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge; + bitStream >>= 4; + bitCount = 4; + *tableLogPtr = nbBits; + remaining = (1<1) && (charnum<=*maxSVPtr)) + { + if (previous0) + { + unsigned n0 = charnum; + while ((bitStream & 0xFFFF) == 0xFFFF) + { + n0+=24; + if (ip < iend-5) + { + ip+=2; + bitStream = FSE_readLE32(ip) >> bitCount; + } + else + { + bitStream >>= 16; + bitCount+=16; + } + } + while ((bitStream & 3) == 3) + { + n0+=3; + bitStream>>=2; + bitCount+=2; + } + n0 += bitStream & 3; + bitCount += 2; + if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall; + while (charnum < n0) normalizedCounter[charnum++] = 0; + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) + { + ip += bitCount>>3; + bitCount &= 7; + bitStream = FSE_readLE32(ip) >> bitCount; + } + else + bitStream >>= 2; + } + { + const short max = (short)((2*threshold-1)-remaining); + short count; + + if ((bitStream & (threshold-1)) < (U32)max) + { + count = (short)(bitStream & (threshold-1)); + bitCount += nbBits-1; + } + else + { + count = (short)(bitStream & (2*threshold-1)); + if (count >= threshold) count -= max; + bitCount += nbBits; + } + + count--; /* extra accuracy */ + remaining -= FSE_abs(count); + normalizedCounter[charnum++] = count; + previous0 = !count; + while (remaining < threshold) + { + nbBits--; + threshold >>= 1; + } + + { + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) + { + ip += bitCount>>3; + bitCount &= 7; + } + else + { + bitCount -= (int)(8 * (iend - 4 - ip)); + ip = iend - 4; + } + bitStream = FSE_readLE32(ip) >> (bitCount & 31); + } + } + } + if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC; + *maxSVPtr = charnum-1; + + ip += (bitCount+7)>>3; + if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong; + return ip-istart; +} + + +/********************************************************* +* Decompression (Byte symbols) +*********************************************************/ +static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */ + + DTableH->tableLog = 0; + DTableH->fastMode = 0; + + cell->newState = 0; + cell->symbol = symbolValue; + cell->nbBits = 0; + + return 0; +} + + +static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */ + const unsigned tableSize = 1 << nbBits; + const unsigned tableMask = tableSize - 1; + const unsigned maxSymbolValue = tableMask; + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */ + + /* Build Decoding Table */ + DTableH->tableLog = (U16)nbBits; + DTableH->fastMode = 1; + for (s=0; s<=maxSymbolValue; s++) + { + dinfo[s].newState = 0; + dinfo[s].symbol = (BYTE)s; + dinfo[s].nbBits = (BYTE)nbBits; + } + + return 0; +} + + +/* FSE_initDStream + * Initialize a FSE_DStream_t. + * srcBuffer must point at the beginning of an FSE block. + * The function result is the size of the FSE_block (== srcSize). + * If srcSize is too small, the function will return an errorCode; + */ +static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize) +{ + if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong; + + if (srcSize >= sizeof(size_t)) + { + U32 contain32; + bitD->start = (const char*)srcBuffer; + bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t); + bitD->bitContainer = FSE_readLEST(bitD->ptr); + contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; + if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */ + bitD->bitsConsumed = 8 - FSE_highbit32(contain32); + } + else + { + U32 contain32; + bitD->start = (const char*)srcBuffer; + bitD->ptr = bitD->start; + bitD->bitContainer = *(const BYTE*)(bitD->start); + switch(srcSize) + { + case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16); + case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24); + case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32); + case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24; + case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16; + case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8; + default:; + } + contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; + if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */ + bitD->bitsConsumed = 8 - FSE_highbit32(contain32); + bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8; + } + + return srcSize; +} + + +/*!FSE_lookBits + * Provides next n bits from the bitContainer. + * bitContainer is not modified (bits are still present for next read/look) + * On 32-bits, maxNbBits==25 + * On 64-bits, maxNbBits==57 + * return : value extracted. + */ +static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits) +{ + const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; + return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); +} + +static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */ +{ + const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; + return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); +} + +static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits) +{ + bitD->bitsConsumed += nbBits; +} + + +/*!FSE_readBits + * Read next n bits from the bitContainer. + * On 32-bits, don't read more than maxNbBits==25 + * On 64-bits, don't read more than maxNbBits==57 + * Use the fast variant *only* if n >= 1. + * return : value extracted. + */ +static size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits) +{ + size_t value = FSE_lookBits(bitD, nbBits); + FSE_skipBits(bitD, nbBits); + return value; +} + +static size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */ +{ + size_t value = FSE_lookBitsFast(bitD, nbBits); + FSE_skipBits(bitD, nbBits); + return value; +} + +static unsigned FSE_reloadDStream(FSE_DStream_t* bitD) +{ + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */ + return FSE_DStream_tooFar; + + if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) + { + bitD->ptr -= bitD->bitsConsumed >> 3; + bitD->bitsConsumed &= 7; + bitD->bitContainer = FSE_readLEST(bitD->ptr); + return FSE_DStream_unfinished; + } + if (bitD->ptr == bitD->start) + { + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer; + return FSE_DStream_completed; + } + { + U32 nbBytes = bitD->bitsConsumed >> 3; + U32 result = FSE_DStream_unfinished; + if (bitD->ptr - nbBytes < bitD->start) + { + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ + result = FSE_DStream_endOfBuffer; + } + bitD->ptr -= nbBytes; + bitD->bitsConsumed -= nbBytes*8; + bitD->bitContainer = FSE_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */ + return result; + } +} + + +static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; + DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog); + FSE_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +static BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD) +{ + const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + const U32 nbBits = DInfo.nbBits; + BYTE symbol = DInfo.symbol; + size_t lowBits = FSE_readBits(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +static BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD) +{ + const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + const U32 nbBits = DInfo.nbBits; + BYTE symbol = DInfo.symbol; + size_t lowBits = FSE_readBitsFast(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +/* FSE_endOfDStream + Tells if bitD has reached end of bitStream or not */ + +static unsigned FSE_endOfDStream(const FSE_DStream_t* bitD) +{ + return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8)); +} + +static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) +{ + return DStatePtr->state == 0; +} + + +FORCE_INLINE size_t FSE_decompress_usingDTable_generic( + void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt, const unsigned fast) +{ + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const omax = op + maxDstSize; + BYTE* const olimit = omax-3; + + FSE_DStream_t bitD; + FSE_DState_t state1; + FSE_DState_t state2; + size_t errorCode; + + /* Init */ + errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */ + if (FSE_isError(errorCode)) return errorCode; + + FSE_initDState(&state1, &bitD, dt); + FSE_initDState(&state2, &bitD, dt); + +#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) + + /* 4 symbols per loop */ + for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op sizeof(bitD.bitContainer)*8) /* This test must be static */ + FSE_reloadDStream(&bitD); + + op[1] = FSE_GETSYMBOL(&state2); + + if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } } + + op[2] = FSE_GETSYMBOL(&state1); + + if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + FSE_reloadDStream(&bitD); + + op[3] = FSE_GETSYMBOL(&state2); + } + + /* tail */ + /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */ + while (1) + { + if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) ) + break; + + *op++ = FSE_GETSYMBOL(&state1); + + if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) ) + break; + + *op++ = FSE_GETSYMBOL(&state2); + } + + /* end ? */ + if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2)) + return op-ostart; + + if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */ + + return (size_t)-FSE_ERROR_corruptionDetected; +} + + +static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt) +{ + FSE_DTableHeader DTableH; + memcpy(&DTableH, dt, sizeof(DTableH)); /* memcpy() into local variable, to avoid strict aliasing warning */ + + /* select fast mode (static) */ + if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); + return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); +} + + +static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize) +{ + const BYTE* const istart = (const BYTE*)cSrc; + const BYTE* ip = istart; + short counting[FSE_MAX_SYMBOL_VALUE+1]; + DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ + unsigned tableLog; + unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; + size_t errorCode; + + if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */ + + /* normal FSE decoding mode */ + errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); + if (FSE_isError(errorCode)) return errorCode; + if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */ + ip += errorCode; + cSrcSize -= errorCode; + + errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog); + if (FSE_isError(errorCode)) return errorCode; + + /* always return, even if it is an error code */ + return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt); +} + + + +/* ******************************************************* +* Huff0 : Huffman block compression +*********************************************************/ +#define HUF_MAX_SYMBOL_VALUE 255 +#define HUF_DEFAULT_TABLELOG 12 /* used by default, when not specified */ +#define HUF_MAX_TABLELOG 12 /* max possible tableLog; for allocation purpose; can be modified */ +#define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ +#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG) +# error "HUF_MAX_TABLELOG is too large !" +#endif + +typedef struct HUF_CElt_s { + U16 val; + BYTE nbBits; +} HUF_CElt ; + +typedef struct nodeElt_s { + U32 count; + U16 parent; + BYTE byte; + BYTE nbBits; +} nodeElt; + + +/* ******************************************************* +* Huff0 : Huffman block decompression +*********************************************************/ +typedef struct { + BYTE byte; + BYTE nbBits; +} HUF_DElt; + +static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize) +{ + BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; + U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */ + U32 weightTotal; + U32 maxBits; + const BYTE* ip = (const BYTE*) src; + size_t iSize; + size_t oSize; + U32 n; + U32 nextRankStart; + void* ptr = DTable+1; + HUF_DElt* const dt = (HUF_DElt*)ptr; + + if (!srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; + iSize = ip[0]; + + FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16)); /* if compilation fails here, assertion is false */ + //memset(huffWeight, 0, sizeof(huffWeight)); /* should not be necessary, but some analyzer complain ... */ + if (iSize >= 128) /* special header */ + { + if (iSize >= (242)) /* RLE */ + { + static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 }; + oSize = l[iSize-242]; + memset(huffWeight, 1, sizeof(huffWeight)); + iSize = 0; + } + else /* Incompressible */ + { + oSize = iSize - 127; + iSize = ((oSize+1)/2); + if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; + ip += 1; + for (n=0; n> 4; + huffWeight[n+1] = ip[n/2] & 15; + } + } + } + else /* header compressed with FSE (normal case) */ + { + if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; + oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize); /* max 255 values decoded, last one is implied */ + if (FSE_isError(oSize)) return oSize; + } + + /* collect weight stats */ + memset(rankVal, 0, sizeof(rankVal)); + weightTotal = 0; + for (n=0; n= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected; + rankVal[huffWeight[n]]++; + weightTotal += (1 << huffWeight[n]) >> 1; + } + if (weightTotal == 0) return (size_t)-FSE_ERROR_corruptionDetected; + + /* get last non-null symbol weight (implied, total must be 2^n) */ + maxBits = FSE_highbit32(weightTotal) + 1; + if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge; /* DTable is too small */ + DTable[0] = (U16)maxBits; + { + U32 total = 1 << maxBits; + U32 rest = total - weightTotal; + U32 verif = 1 << FSE_highbit32(rest); + U32 lastWeight = FSE_highbit32(rest) + 1; + if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected; /* last value must be a clean power of 2 */ + huffWeight[oSize] = (BYTE)lastWeight; + rankVal[lastWeight]++; + } + + /* check tree construction validity */ + if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected; /* by construction : at least 2 elts of rank 1, must be even */ + + /* Prepare ranks */ + nextRankStart = 0; + for (n=1; n<=maxBits; n++) + { + U32 current = nextRankStart; + nextRankStart += (rankVal[n] << (n-1)); + rankVal[n] = current; + } + + /* fill DTable */ + for (n=0; n<=oSize; n++) + { + const U32 w = huffWeight[n]; + const U32 length = (1 << w) >> 1; + U32 i; + HUF_DElt D; + D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w); + for (i = rankVal[w]; i < rankVal[w] + length; i++) + dt[i] = D; + rankVal[w] += length; + } + + return iSize+1; +} + + +static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog) +{ + const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ + const BYTE c = dt[val].byte; + FSE_skipBits(Dstream, dt[val].nbBits); + return c; +} + +static size_t HUF_decompress_usingDTable( /* -3% slower when non static */ + void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const U16* DTable) +{ + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const omax = op + maxDstSize; + BYTE* const olimit = omax-15; + + const void* ptr = DTable; + const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1; + const U32 dtLog = DTable[0]; + size_t errorCode; + U32 reloadStatus; + + /* Init */ + + const U16* jumpTable = (const U16*)cSrc; + const size_t length1 = FSE_readLE16(jumpTable); + const size_t length2 = FSE_readLE16(jumpTable+1); + const size_t length3 = FSE_readLE16(jumpTable+2); + const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !! + const char* const start1 = (const char*)(cSrc) + 6; + const char* const start2 = start1 + length1; + const char* const start3 = start2 + length2; + const char* const start4 = start3 + length3; + FSE_DStream_t bitD1, bitD2, bitD3, bitD4; + + if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; + + errorCode = FSE_initDStream(&bitD1, start1, length1); + if (FSE_isError(errorCode)) return errorCode; + errorCode = FSE_initDStream(&bitD2, start2, length2); + if (FSE_isError(errorCode)) return errorCode; + errorCode = FSE_initDStream(&bitD3, start3, length3); + if (FSE_isError(errorCode)) return errorCode; + errorCode = FSE_initDStream(&bitD4, start4, length4); + if (FSE_isError(errorCode)) return errorCode; + + reloadStatus=FSE_reloadDStream(&bitD2); + + /* 16 symbols per loop */ + for ( ; (reloadStatus12)) FSE_reloadDStream(&Dstream) + +#define HUF_DECODE_SYMBOL_2(n, Dstream) \ + op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \ + if (FSE_32bits()) FSE_reloadDStream(&Dstream) + + HUF_DECODE_SYMBOL_1( 0, bitD1); + HUF_DECODE_SYMBOL_1( 1, bitD2); + HUF_DECODE_SYMBOL_1( 2, bitD3); + HUF_DECODE_SYMBOL_1( 3, bitD4); + HUF_DECODE_SYMBOL_2( 4, bitD1); + HUF_DECODE_SYMBOL_2( 5, bitD2); + HUF_DECODE_SYMBOL_2( 6, bitD3); + HUF_DECODE_SYMBOL_2( 7, bitD4); + HUF_DECODE_SYMBOL_1( 8, bitD1); + HUF_DECODE_SYMBOL_1( 9, bitD2); + HUF_DECODE_SYMBOL_1(10, bitD3); + HUF_DECODE_SYMBOL_1(11, bitD4); + HUF_DECODE_SYMBOL_0(12, bitD1); + HUF_DECODE_SYMBOL_0(13, bitD2); + HUF_DECODE_SYMBOL_0(14, bitD3); + HUF_DECODE_SYMBOL_0(15, bitD4); + } + + if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */ + return (size_t)-FSE_ERROR_corruptionDetected; + + /* tail */ + { + // bitTail = bitD1; // *much* slower : -20% !??! + FSE_DStream_t bitTail; + bitTail.ptr = bitD1.ptr; + bitTail.bitsConsumed = bitD1.bitsConsumed; + bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer + bitTail.start = start1; + for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; + ip += errorCode; + cSrcSize -= errorCode; + + return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable); +} + + +#endif /* FSE_COMMONDEFS_ONLY */ + +/* + zstd - standard compression library + Copyright (C) 2014-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd source repository : https://github.com/Cyan4973/zstd + - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + +/**************************************************************** +* Tuning parameters +*****************************************************************/ +/* MEMORY_USAGE : +* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect */ +#define ZSTD_MEMORY_USAGE 17 + + +/************************************** + CPU Feature Detection +**************************************/ +/* + * Automated efficient unaligned memory access detection + * Based on known hardware architectures + * This list will be updated thanks to feedbacks + */ +#if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \ + || defined(__ARM_FEATURE_UNALIGNED) \ + || defined(__i386__) || defined(__x86_64__) \ + || defined(_M_IX86) || defined(_M_X64) \ + || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \ + || (defined(_M_ARM) && (_M_ARM >= 7)) +# define ZSTD_UNALIGNED_ACCESS 1 +#else +# define ZSTD_UNALIGNED_ACCESS 0 +#endif + + +/******************************************************** +* Includes +*********************************************************/ +#include /* calloc */ +#include /* memcpy, memmove */ +#include /* debug : printf */ + + +/******************************************************** +* Compiler specifics +*********************************************************/ +#ifdef __AVX2__ +# include /* AVX2 intrinsics */ +#endif + +#ifdef _MSC_VER /* Visual Studio */ +# include /* For Visual 2005 */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4324) /* disable: C4324: padded structure */ +#endif + + +#ifndef MEM_ACCESS_MODULE +#define MEM_ACCESS_MODULE +/******************************************************** +* Basic Types +*********************************************************/ +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# include +typedef uint8_t BYTE; +typedef uint16_t U16; +typedef int16_t S16; +typedef uint32_t U32; +typedef int32_t S32; +typedef uint64_t U64; +#else +typedef unsigned char BYTE; +typedef unsigned short U16; +typedef signed short S16; +typedef unsigned int U32; +typedef signed int S32; +typedef unsigned long long U64; +#endif + +#endif /* MEM_ACCESS_MODULE */ + + +/******************************************************** +* Constants +*********************************************************/ +static const U32 ZSTD_magicNumber = 0xFD2FB51E; /* 3rd version : seqNb header */ + +#define HASH_LOG (ZSTD_MEMORY_USAGE - 2) +#define HASH_TABLESIZE (1 << HASH_LOG) +#define HASH_MASK (HASH_TABLESIZE - 1) + +#define KNUTH 2654435761 + +#define BIT7 128 +#define BIT6 64 +#define BIT5 32 +#define BIT4 16 + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define BLOCKSIZE (128 KB) /* define, for static allocation */ + +#define WORKPLACESIZE (BLOCKSIZE*3) +#define MINMATCH 4 +#define MLbits 7 +#define LLbits 6 +#define Offbits 5 +#define MaxML ((1<>3]; +#else + U32 hashTable[HASH_TABLESIZE]; +#endif + BYTE buffer[WORKPLACESIZE]; +} cctxi_t; + + + + +/************************************** +* Error Management +**************************************/ +/* published entry point */ +unsigned ZSTDv01_isError(size_t code) { return ERR_isError(code); } + + +/************************************** +* Tool functions +**************************************/ +#define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */ +#define ZSTD_VERSION_MINOR 1 /* for new (non-breaking) interface capabilities */ +#define ZSTD_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */ +#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) + +/************************************************************** +* Decompression code +**************************************************************/ + +size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) +{ + const BYTE* const in = (const BYTE* const)src; + BYTE headerFlags; + U32 cSize; + + if (srcSize < 3) return ERROR(srcSize_wrong); + + headerFlags = *in; + cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16); + + bpPtr->blockType = (blockType_t)(headerFlags >> 6); + bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0; + + if (bpPtr->blockType == bt_end) return 0; + if (bpPtr->blockType == bt_rle) return 1; + return cSize; +} + + +static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall); + memcpy(dst, src, srcSize); + return srcSize; +} + + +static size_t ZSTD_decompressLiterals(void* ctx, + void* dst, size_t maxDstSize, + const void* src, size_t srcSize) +{ + BYTE* op = (BYTE*)dst; + BYTE* const oend = op + maxDstSize; + const BYTE* ip = (const BYTE*)src; + size_t errorCode; + size_t litSize; + + /* check : minimum 2, for litSize, +1, for content */ + if (srcSize <= 3) return ERROR(corruption_detected); + + litSize = ip[1] + (ip[0]<<8); + litSize += ((ip[-3] >> 3) & 7) << 16; // mmmmh.... + op = oend - litSize; + + (void)ctx; + if (litSize > maxDstSize) return ERROR(dstSize_tooSmall); + errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2); + if (FSE_isError(errorCode)) return ERROR(GENERIC); + return litSize; +} + + +size_t ZSTDv01_decodeLiteralsBlock(void* ctx, + void* dst, size_t maxDstSize, + const BYTE** litStart, size_t* litSize, + const void* src, size_t srcSize) +{ + const BYTE* const istart = (const BYTE* const)src; + const BYTE* ip = istart; + BYTE* const ostart = (BYTE* const)dst; + BYTE* const oend = ostart + maxDstSize; + blockProperties_t litbp; + + size_t litcSize = ZSTDv01_getcBlockSize(src, srcSize, &litbp); + if (ZSTDv01_isError(litcSize)) return litcSize; + if (litcSize > srcSize - ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + ip += ZSTD_blockHeaderSize; + + switch(litbp.blockType) + { + case bt_raw: + *litStart = ip; + ip += litcSize; + *litSize = litcSize; + break; + case bt_rle: + { + size_t rleSize = litbp.origSize; + if (rleSize>maxDstSize) return ERROR(dstSize_tooSmall); + if (!srcSize) return ERROR(srcSize_wrong); + memset(oend - rleSize, *ip, rleSize); + *litStart = oend - rleSize; + *litSize = rleSize; + ip++; + break; + } + case bt_compressed: + { + size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize); + if (ZSTDv01_isError(decodedLitSize)) return decodedLitSize; + *litStart = oend - decodedLitSize; + *litSize = decodedLitSize; + ip += litcSize; + break; + } + case bt_end: + default: + return ERROR(GENERIC); + } + + return ip-istart; +} + + +size_t ZSTDv01_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, + FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, + const void* src, size_t srcSize) +{ + const BYTE* const istart = (const BYTE* const)src; + const BYTE* ip = istart; + const BYTE* const iend = istart + srcSize; + U32 LLtype, Offtype, MLtype; + U32 LLlog, Offlog, MLlog; + size_t dumpsLength; + + /* check */ + if (srcSize < 5) return ERROR(srcSize_wrong); + + /* SeqHead */ + *nbSeq = ZSTD_readLE16(ip); ip+=2; + LLtype = *ip >> 6; + Offtype = (*ip >> 4) & 3; + MLtype = (*ip >> 2) & 3; + if (*ip & 2) + { + dumpsLength = ip[2]; + dumpsLength += ip[1] << 8; + ip += 3; + } + else + { + dumpsLength = ip[1]; + dumpsLength += (ip[0] & 1) << 8; + ip += 2; + } + *dumpsPtr = ip; + ip += dumpsLength; + *dumpsLengthPtr = dumpsLength; + + /* check */ + if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ + + /* sequences */ + { + S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL and MaxOff */ + size_t headerSize; + + /* Build DTables */ + switch(LLtype) + { + case bt_rle : + LLlog = 0; + FSE_buildDTable_rle(DTableLL, *ip++); break; + case bt_raw : + LLlog = LLbits; + FSE_buildDTable_raw(DTableLL, LLbits); break; + default : + { U32 max = MaxLL; + headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip); + if (FSE_isError(headerSize)) return ERROR(GENERIC); + if (LLlog > LLFSELog) return ERROR(corruption_detected); + ip += headerSize; + FSE_buildDTable(DTableLL, norm, max, LLlog); + } } + + switch(Offtype) + { + case bt_rle : + Offlog = 0; + if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */ + FSE_buildDTable_rle(DTableOffb, *ip++); break; + case bt_raw : + Offlog = Offbits; + FSE_buildDTable_raw(DTableOffb, Offbits); break; + default : + { U32 max = MaxOff; + headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip); + if (FSE_isError(headerSize)) return ERROR(GENERIC); + if (Offlog > OffFSELog) return ERROR(corruption_detected); + ip += headerSize; + FSE_buildDTable(DTableOffb, norm, max, Offlog); + } } + + switch(MLtype) + { + case bt_rle : + MLlog = 0; + if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */ + FSE_buildDTable_rle(DTableML, *ip++); break; + case bt_raw : + MLlog = MLbits; + FSE_buildDTable_raw(DTableML, MLbits); break; + default : + { U32 max = MaxML; + headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip); + if (FSE_isError(headerSize)) return ERROR(GENERIC); + if (MLlog > MLFSELog) return ERROR(corruption_detected); + ip += headerSize; + FSE_buildDTable(DTableML, norm, max, MLlog); + } } } + + return ip-istart; +} + + +typedef struct { + size_t litLength; + size_t offset; + size_t matchLength; +} seq_t; + +typedef struct { + FSE_DStream_t DStream; + FSE_DState_t stateLL; + FSE_DState_t stateOffb; + FSE_DState_t stateML; + size_t prevOffset; + const BYTE* dumps; + const BYTE* dumpsEnd; +} seqState_t; + + +static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) +{ + size_t litLength; + size_t prevOffset; + size_t offset; + size_t matchLength; + const BYTE* dumps = seqState->dumps; + const BYTE* const de = seqState->dumpsEnd; + + /* Literal length */ + litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); + prevOffset = litLength ? seq->offset : seqState->prevOffset; + seqState->prevOffset = seq->offset; + if (litLength == MaxLL) + { + U32 add = dumps 1 byte */ + dumps += 3; + } + } + } + + /* Offset */ + { + U32 offsetCode, nbBits; + offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); + if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream)); + nbBits = offsetCode - 1; + if (offsetCode==0) nbBits = 0; /* cmove */ + offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits); + if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream)); + if (offsetCode==0) offset = prevOffset; + } + + /* MatchLength */ + matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); + if (matchLength == MaxML) + { + U32 add = dumps 1 byte */ + dumps += 3; + } + } + } + matchLength += MINMATCH; + + /* save result */ + seq->litLength = litLength; + seq->offset = offset; + seq->matchLength = matchLength; + seqState->dumps = dumps; +} + + +static size_t ZSTD_execSequence(BYTE* op, + seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + BYTE* const base, BYTE* const oend) +{ + static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */ + static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */ + const BYTE* const ostart = op; + const size_t litLength = sequence.litLength; + BYTE* const endMatch = op + litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */ + const BYTE* const litEnd = *litPtr + litLength; + + /* check */ + if (endMatch > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */ + if (litEnd > litLimit) return ERROR(corruption_detected); + if (sequence.matchLength > (size_t)(*litPtr-op)) return ERROR(dstSize_tooSmall); /* overwrite literal segment */ + + /* copy Literals */ + if (((size_t)(*litPtr - op) < 8) || ((size_t)(oend-litEnd) < 8) || (op+litLength > oend-8)) + memmove(op, *litPtr, litLength); /* overwrite risk */ + else + ZSTD_wildcopy(op, *litPtr, litLength); + op += litLength; + *litPtr = litEnd; /* update for next sequence */ + + /* check : last match must be at a minimum distance of 8 from end of dest buffer */ + if (oend-op < 8) return ERROR(dstSize_tooSmall); + + /* copy Match */ + { + const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12); + const BYTE* match = op - sequence.offset; /* possible underflow at op - offset ? */ + size_t qutt = 12; + U64 saved[2]; + + /* check */ + if (match < base) return ERROR(corruption_detected); + if (sequence.offset > (size_t)base) return ERROR(corruption_detected); + + /* save beginning of literal sequence, in case of write overlap */ + if (overlapRisk) + { + if ((endMatch + qutt) > oend) qutt = oend-endMatch; + memcpy(saved, endMatch, qutt); + } + + if (sequence.offset < 8) + { + const int dec64 = dec64table[sequence.offset]; + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += dec32table[sequence.offset]; + ZSTD_copy4(op+4, match); + match -= dec64; + } else { ZSTD_copy8(op, match); } + op += 8; match += 8; + + if (endMatch > oend-(16-MINMATCH)) + { + if (op < oend-8) + { + ZSTD_wildcopy(op, match, (oend-8) - op); + match += (oend-8) - op; + op = oend-8; + } + while (opLLTable; + U32* DTableML = dctx->MLTable; + U32* DTableOffb = dctx->OffTable; + BYTE* const base = (BYTE*) (dctx->base); + + /* Build Decoding Tables */ + errorCode = ZSTDv01_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength, + DTableLL, DTableML, DTableOffb, + ip, iend-ip); + if (ZSTDv01_isError(errorCode)) return errorCode; + ip += errorCode; + + /* Regen sequences */ + { + seq_t sequence; + seqState_t seqState; + + memset(&sequence, 0, sizeof(sequence)); + seqState.dumps = dumps; + seqState.dumpsEnd = dumps + dumpsLength; + seqState.prevOffset = 1; + errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip); + if (FSE_isError(errorCode)) return ERROR(corruption_detected); + FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL); + FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb); + FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML); + + for ( ; (FSE_reloadDStream(&(seqState.DStream)) <= FSE_DStream_completed) && (nbSeq>0) ; ) + { + size_t oneSeqSize; + nbSeq--; + ZSTD_decodeSequence(&sequence, &seqState); + oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend); + if (ZSTDv01_isError(oneSeqSize)) return oneSeqSize; + op += oneSeqSize; + } + + /* check if reached exact end */ + if ( !FSE_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected); /* requested too much : data is corrupted */ + if (nbSeq<0) return ERROR(corruption_detected); /* requested too many sequences : data is corrupted */ + + /* last literal segment */ + { + size_t lastLLSize = litEnd - litPtr; + if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall); + if (op != litPtr) memmove(op, litPtr, lastLLSize); + op += lastLLSize; + } + } + + return op-ostart; +} + + +static size_t ZSTD_decompressBlock( + void* ctx, + void* dst, size_t maxDstSize, + const void* src, size_t srcSize) +{ + /* blockType == blockCompressed, srcSize is trusted */ + const BYTE* ip = (const BYTE*)src; + const BYTE* litPtr = NULL; + size_t litSize = 0; + size_t errorCode; + + /* Decode literals sub-block */ + errorCode = ZSTDv01_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize); + if (ZSTDv01_isError(errorCode)) return errorCode; + ip += errorCode; + srcSize -= errorCode; + + return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize); +} + + +size_t ZSTDv01_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + const BYTE* iend = ip + srcSize; + BYTE* const ostart = (BYTE* const)dst; + BYTE* op = ostart; + BYTE* const oend = ostart + maxDstSize; + size_t remainingSize = srcSize; + U32 magicNumber; + size_t errorCode=0; + blockProperties_t blockProperties; + + /* Frame Header */ + if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + magicNumber = ZSTD_readBE32(src); + if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); + ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize; + + /* Loop on each block */ + while (1) + { + size_t blockSize = ZSTDv01_getcBlockSize(ip, iend-ip, &blockProperties); + if (ZSTDv01_isError(blockSize)) return blockSize; + + ip += ZSTD_blockHeaderSize; + remainingSize -= ZSTD_blockHeaderSize; + if (blockSize > remainingSize) return ERROR(srcSize_wrong); + + switch(blockProperties.blockType) + { + case bt_compressed: + errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize); + break; + case bt_raw : + errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize); + break; + case bt_rle : + return ERROR(GENERIC); /* not yet supported */ + break; + case bt_end : + /* end of frame */ + if (remainingSize) return ERROR(srcSize_wrong); + break; + default: + return ERROR(GENERIC); + } + if (blockSize == 0) break; /* bt_end */ + + if (ZSTDv01_isError(errorCode)) return errorCode; + op += errorCode; + ip += blockSize; + remainingSize -= blockSize; + } + + return op-ostart; +} + +size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + dctx_t ctx; + ctx.base = dst; + return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize); +} + +size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + size_t remainingSize = srcSize; + U32 magicNumber; + blockProperties_t blockProperties; + + /* Frame Header */ + if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + magicNumber = ZSTD_readBE32(src); + if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); + ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize; + + /* Loop on each block */ + while (1) + { + size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties); + if (ZSTDv01_isError(blockSize)) return blockSize; + + ip += ZSTD_blockHeaderSize; + remainingSize -= ZSTD_blockHeaderSize; + if (blockSize > remainingSize) return ERROR(srcSize_wrong); + + if (blockSize == 0) break; /* bt_end */ + + ip += blockSize; + remainingSize -= blockSize; + } + + return ip - (const BYTE*)src; +} + +/******************************* +* Streaming Decompression API +*******************************/ + +size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx) +{ + dctx->expected = ZSTD_frameHeaderSize; + dctx->phase = 0; + dctx->previousDstEnd = NULL; + dctx->base = NULL; + return 0; +} + +ZSTDv01_Dctx* ZSTDv01_createDCtx(void) +{ + ZSTDv01_Dctx* dctx = (ZSTDv01_Dctx*)malloc(sizeof(ZSTDv01_Dctx)); + if (dctx==NULL) return NULL; + ZSTDv01_resetDCtx(dctx); + return dctx; +} + +size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx) +{ + free(dctx); + return 0; +} + +size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx) +{ + return ((dctx_t*)dctx)->expected; +} + +size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + dctx_t* ctx = (dctx_t*)dctx; + + /* Sanity check */ + if (srcSize != ctx->expected) return ERROR(srcSize_wrong); + if (dst != ctx->previousDstEnd) /* not contiguous */ + ctx->base = dst; + + /* Decompress : frame header */ + if (ctx->phase == 0) + { + /* Check frame magic header */ + U32 magicNumber = ZSTD_readBE32(src); + if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); + ctx->phase = 1; + ctx->expected = ZSTD_blockHeaderSize; + return 0; + } + + /* Decompress : block header */ + if (ctx->phase == 1) + { + blockProperties_t bp; + size_t blockSize = ZSTDv01_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); + if (ZSTDv01_isError(blockSize)) return blockSize; + if (bp.blockType == bt_end) + { + ctx->expected = 0; + ctx->phase = 0; + } + else + { + ctx->expected = blockSize; + ctx->bType = bp.blockType; + ctx->phase = 2; + } + + return 0; + } + + /* Decompress : block content */ + { + size_t rSize; + switch(ctx->bType) + { + case bt_compressed: + rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize); + break; + case bt_raw : + rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize); + break; + case bt_rle : + return ERROR(GENERIC); /* not yet handled */ + break; + case bt_end : /* should never happen (filtered at phase 1) */ + rSize = 0; + break; + default: + return ERROR(GENERIC); + } + ctx->phase = 1; + ctx->expected = ZSTD_blockHeaderSize; + ctx->previousDstEnd = (void*)( ((char*)dst) + rSize); + return rSize; + } + +} diff --git a/deps/SZ/zstd/legacy/zstd_v01.h b/deps/SZ/zstd/legacy/zstd_v01.h new file mode 100644 index 0000000000000000000000000000000000000000..42f0897c7d2337cc8505d761b5b082423fc9d1eb --- /dev/null +++ b/deps/SZ/zstd/legacy/zstd_v01.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_V01_H_28739879432 +#define ZSTD_V01_H_28739879432 + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include /* size_t */ + + +/* ************************************* +* Simple one-step function +***************************************/ +/** +ZSTDv01_decompress() : decompress ZSTD frames compliant with v0.1.x format + compressedSize : is the exact source size + maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. + It must be equal or larger than originalSize, otherwise decompression will fail. + return : the number of bytes decompressed into destination buffer (originalSize) + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv01_decompress( void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + +/** +ZSTDv01_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.1.x format + compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + return : the number of bytes that would be read to decompress this frame + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t compressedSize); + +/** +ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error +*/ +unsigned ZSTDv01_isError(size_t code); + + +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTDv01_Dctx_s ZSTDv01_Dctx; +ZSTDv01_Dctx* ZSTDv01_createDCtx(void); +size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx); + +size_t ZSTDv01_decompressDCtx(void* ctx, + void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + +/* ************************************* +* Streaming functions +***************************************/ +size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx); + +size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx); +size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +/** + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ + +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTDv01_magicNumber 0xFD2FB51E /* Big Endian version */ +#define ZSTDv01_magicNumberLE 0x1EB52FFD /* Little Endian version */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_V01_H_28739879432 */ diff --git a/deps/SZ/zstd/legacy/zstd_v02.c b/deps/SZ/zstd/legacy/zstd_v02.c new file mode 100644 index 0000000000000000000000000000000000000000..8bc0eceeda8fab391f378f51b0005eb3c9d66247 --- /dev/null +++ b/deps/SZ/zstd/legacy/zstd_v02.c @@ -0,0 +1,3483 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#include /* size_t, ptrdiff_t */ +#include "zstd_v02.h" +#include "error_private.h" + + +/****************************************** +* Compiler-specific +******************************************/ +#if defined(_MSC_VER) /* Visual Studio */ +# include /* _byteswap_ulong */ +# include /* _byteswap_* */ +#endif + + +/* ****************************************************************** + mem.h + low-level memory access routines + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#ifndef MEM_H_MODULE +#define MEM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + +/****************************************** +* Includes +******************************************/ +#include /* size_t, ptrdiff_t */ +#include /* memcpy */ + + +/****************************************** +* Compiler-specific +******************************************/ +#if defined(__GNUC__) +# define MEM_STATIC static __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define MEM_STATIC static inline +#elif defined(_MSC_VER) +# define MEM_STATIC static __inline +#else +# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/**************************************************************** +* Basic Types +*****************************************************************/ +#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef int16_t S16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef int64_t S64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef signed short S16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; + typedef signed long long S64; +#endif + + +/**************************************************************** +* Memory I/O +*****************************************************************/ +/* MEM_FORCE_MEMORY_ACCESS + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets generating assembly depending on alignment. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define MEM_FORCE_MEMORY_ACCESS 2 +# elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) +# define MEM_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; } +MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; } + +MEM_STATIC unsigned MEM_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + +#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) + +/* violates C standard on structure alignment. +Only use if no other choice to achieve best performance on target platform */ +MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } +MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } +MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } + +#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; + +MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } + +#else + +/* default method, safe and standard. + can sometimes prove slower */ + +MEM_STATIC U16 MEM_read16(const void* memPtr) +{ + U16 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U32 MEM_read32(const void* memPtr) +{ + U32 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U64 MEM_read64(const void* memPtr) +{ + U64 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +#endif // MEM_FORCE_MEMORY_ACCESS + + +MEM_STATIC U16 MEM_readLE16(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read16(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U16)(p[0] + (p[1]<<8)); + } +} + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) +{ + if (MEM_isLittleEndian()) + { + MEM_write16(memPtr, val); + } + else + { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val; + p[1] = (BYTE)(val>>8); + } +} + +MEM_STATIC U32 MEM_readLE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read32(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); + } +} + + +MEM_STATIC U64 MEM_readLE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read64(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24) + + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56)); + } +} + + +MEM_STATIC size_t MEM_readLEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readLE32(memPtr); + else + return (size_t)MEM_readLE64(memPtr); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* MEM_H_MODULE */ + + +/* ****************************************************************** + bitstream + Part of NewGen Entropy library + header file (to include) + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#ifndef BITSTREAM_H_MODULE +#define BITSTREAM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* +* This API consists of small unitary functions, which highly benefit from being inlined. +* Since link-time-optimization is not available for all compilers, +* these functions are defined into a .h to be included. +*/ + + +/********************************************** +* bitStream decompression API (read backward) +**********************************************/ +typedef struct +{ + size_t bitContainer; + unsigned bitsConsumed; + const char* ptr; + const char* start; +} BIT_DStream_t; + +typedef enum { BIT_DStream_unfinished = 0, + BIT_DStream_endOfBuffer = 1, + BIT_DStream_completed = 2, + BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ + /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ + +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); + + +/****************************************** +* unsafe API +******************************************/ +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); +/* faster, but works only if nbBits >= 1 */ + + + +/**************************************************************** +* Helper functions +****************************************************************/ +MEM_STATIC unsigned BIT_highbit32 (U32 val) +{ +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + _BitScanReverse ( &r, val ); + return (unsigned) r; +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ + return 31 - __builtin_clz (val); +# else /* Software version */ + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + unsigned r; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; + return r; +# endif +} + + + +/********************************************************** +* bitStream decoding +**********************************************************/ + +/*!BIT_initDStream +* Initialize a BIT_DStream_t. +* @bitD : a pointer to an already allocated BIT_DStream_t structure +* @srcBuffer must point at the beginning of a bitStream +* @srcSize must be the exact size of the bitStream +* @result : size of stream (== srcSize) or an errorCode if a problem is detected +*/ +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) +{ + if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } + + if (srcSize >= sizeof(size_t)) /* normal case */ + { + U32 contain32; + bitD->start = (const char*)srcBuffer; + bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t); + bitD->bitContainer = MEM_readLEST(bitD->ptr); + contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; + if (contain32 == 0) return ERROR(GENERIC); /* endMark not present */ + bitD->bitsConsumed = 8 - BIT_highbit32(contain32); + } + else + { + U32 contain32; + bitD->start = (const char*)srcBuffer; + bitD->ptr = bitD->start; + bitD->bitContainer = *(const BYTE*)(bitD->start); + switch(srcSize) + { + case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16); + case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24); + case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32); + case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24; + case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16; + case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8; + default:; + } + contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; + if (contain32 == 0) return ERROR(GENERIC); /* endMark not present */ + bitD->bitsConsumed = 8 - BIT_highbit32(contain32); + bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8; + } + + return srcSize; +} + +MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits) +{ + const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; + return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); +} + +/*! BIT_lookBitsFast : +* unsafe version; only works only if nbBits >= 1 */ +MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits) +{ + const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; + return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); +} + +MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) +{ + bitD->bitsConsumed += nbBits; +} + +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) +{ + size_t value = BIT_lookBits(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*!BIT_readBitsFast : +* unsafe version; only works only if nbBits >= 1 */ +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) +{ + size_t value = BIT_lookBitsFast(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) +{ + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */ + return BIT_DStream_overflow; + + if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) + { + bitD->ptr -= bitD->bitsConsumed >> 3; + bitD->bitsConsumed &= 7; + bitD->bitContainer = MEM_readLEST(bitD->ptr); + return BIT_DStream_unfinished; + } + if (bitD->ptr == bitD->start) + { + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; + return BIT_DStream_completed; + } + { + U32 nbBytes = bitD->bitsConsumed >> 3; + BIT_DStream_status result = BIT_DStream_unfinished; + if (bitD->ptr - nbBytes < bitD->start) + { + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ + result = BIT_DStream_endOfBuffer; + } + bitD->ptr -= nbBytes; + bitD->bitsConsumed -= nbBytes*8; + bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */ + return result; + } +} + +/*! BIT_endOfDStream +* @return Tells if DStream has reached its exact end +*/ +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) +{ + return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* BITSTREAM_H_MODULE */ +/* ****************************************************************** + Error codes and messages + Copyright (C) 2013-2015, Yann Collet + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#ifndef ERROR_H_MODULE +#define ERROR_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/****************************************** +* Compiler-specific +******************************************/ +#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define ERR_STATIC static inline +#elif defined(_MSC_VER) +# define ERR_STATIC static __inline +#elif defined(__GNUC__) +# define ERR_STATIC static __attribute__((unused)) +#else +# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/****************************************** +* Error Management +******************************************/ +#define PREFIX(name) ZSTD_error_##name + +#define ERROR(name) (size_t)-PREFIX(name) + +#define ERROR_LIST(ITEM) \ + ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \ + ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \ + ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \ + ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \ + ITEM(PREFIX(maxCode)) + +#define ERROR_GENERATE_ENUM(ENUM) ENUM, +typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes; /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */ + +#define ERROR_CONVERTTOSTRING(STRING) #STRING, +#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR) +static const char* ERR_strings[] = { ERROR_LIST(ERROR_GENERATE_STRING) }; + +ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } + +ERR_STATIC const char* ERR_getErrorName(size_t code) +{ + static const char* codeError = "Unspecified error code"; + if (ERR_isError(code)) return ERR_strings[-(int)(code)]; + return codeError; +} + + +#if defined (__cplusplus) +} +#endif + +#endif /* ERROR_H_MODULE */ +/* +Constructor and Destructor of type FSE_CTable + Note that its size depends on 'tableLog' and 'maxSymbolValue' */ +typedef unsigned FSE_CTable; /* don't allocate that. It's just a way to be more restrictive than void* */ +typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ + + +/* ****************************************************************** + FSE : Finite State Entropy coder + header file for static linking (only) + Copyright (C) 2013-2015, Yann Collet + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#if defined (__cplusplus) +extern "C" { +#endif + + +/****************************************** +* Static allocation +******************************************/ +/* FSE buffer bounds */ +#define FSE_NCOUNTBOUND 512 +#define FSE_BLOCKBOUND(size) (size + (size>>7)) +#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */ +#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) +#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<= 1 (otherwise, result will be corrupted) */ + + +/****************************************** +* Implementation of inline functions +******************************************/ + +/* decompression */ + +typedef struct { + U16 tableLog; + U16 fastMode; +} FSE_DTableHeader; /* sizeof U32 */ + +typedef struct +{ + unsigned short newState; + unsigned char symbol; + unsigned char nbBits; +} FSE_decode_t; /* size == U32 */ + +MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) +{ + FSE_DTableHeader DTableH; + memcpy(&DTableH, dt, sizeof(DTableH)); + DStatePtr->state = BIT_readBits(bitD, DTableH.tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + const U32 nbBits = DInfo.nbBits; + BYTE symbol = DInfo.symbol; + size_t lowBits = BIT_readBits(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + const U32 nbBits = DInfo.nbBits; + BYTE symbol = DInfo.symbol; + size_t lowBits = BIT_readBitsFast(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) +{ + return DStatePtr->state == 0; +} + + +#if defined (__cplusplus) +} +#endif +/* ****************************************************************** + Huff0 : Huffman coder, part of New Generation Entropy library + header file for static linking (only) + Copyright (C) 2013-2015, Yann Collet + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +#if defined (__cplusplus) +extern "C" { +#endif + +/****************************************** +* Static allocation macros +******************************************/ +/* Huff0 buffer bounds */ +#define HUF_CTABLEBOUND 129 +#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true if incompressible pre-filtered with fast heuristic */ +#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* static allocation of Huff0's DTable */ +#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1< /* size_t */ + + +/* ************************************* +* Version +***************************************/ +#define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */ +#define ZSTD_VERSION_MINOR 2 /* for new (non-breaking) interface capabilities */ +#define ZSTD_VERSION_RELEASE 2 /* for tweaks, bug-fixes, or development */ +#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) + + +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTD_CCtx_s ZSTD_CCtx; /* incomplete type */ + +#if defined (__cplusplus) +} +#endif +/* + zstd - standard compression library + Header File for static linking only + Copyright (C) 2014-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd source repository : https://github.com/Cyan4973/zstd + - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + +/* The objects defined into this file should be considered experimental. + * They are not labelled stable, as their prototype may change in the future. + * You can use them for tests, provide feedback, or if you can endure risk of future changes. + */ + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Streaming functions +***************************************/ + +typedef struct ZSTD_DCtx_s ZSTD_DCtx; + +/* + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ + +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTD_magicNumber 0xFD2FB522 /* v0.2 (current)*/ + + +#if defined (__cplusplus) +} +#endif +/* ****************************************************************** + FSE : Finite State Entropy coder + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +#ifndef FSE_COMMONDEFS_ONLY + +/**************************************************************** +* Tuning parameters +****************************************************************/ +/* MEMORY_USAGE : +* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect +* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ +#define FSE_MAX_MEMORY_USAGE 14 +#define FSE_DEFAULT_MEMORY_USAGE 13 + +/* FSE_MAX_SYMBOL_VALUE : +* Maximum symbol value authorized. +* Required for proper stack allocation */ +#define FSE_MAX_SYMBOL_VALUE 255 + + +/**************************************************************** +* template functions type & suffix +****************************************************************/ +#define FSE_FUNCTION_TYPE BYTE +#define FSE_FUNCTION_EXTENSION + + +/**************************************************************** +* Byte symbol type +****************************************************************/ +#endif /* !FSE_COMMONDEFS_ONLY */ + + +/**************************************************************** +* Compiler specifics +****************************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include /* For Visual 2005 */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ +#else +# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +# else +# define FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +#endif + + +/**************************************************************** +* Includes +****************************************************************/ +#include /* malloc, free, qsort */ +#include /* memcpy, memset */ +#include /* printf (debug) */ + +/**************************************************************** +* Constants +*****************************************************************/ +#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) +#define FSE_MAX_TABLESIZE (1U< FSE_TABLELOG_ABSOLUTE_MAX +#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" +#endif + + +/**************************************************************** +* Error Management +****************************************************************/ +#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/**************************************************************** +* Complex types +****************************************************************/ +typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; + + +/**************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ + +#define FSE_DECODE_TYPE FSE_decode_t + +static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; } + +static size_t FSE_buildDTable +(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + void* ptr = dt+1; + FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)ptr; + FSE_DTableHeader DTableH; + const U32 tableSize = 1 << tableLog; + const U32 tableMask = tableSize-1; + const U32 step = FSE_tableStep(tableSize); + U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; + U32 position = 0; + U32 highThreshold = tableSize-1; + const S16 largeLimit= (S16)(1 << (tableLog-1)); + U32 noLarge = 1; + U32 s; + + /* Sanity Checks */ + if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + + /* Init, lay down lowprob symbols */ + DTableH.tableLog = (U16)tableLog; + for (s=0; s<=maxSymbolValue; s++) + { + if (normalizedCounter[s]==-1) + { + tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; + symbolNext[s] = 1; + } + else + { + if (normalizedCounter[s] >= largeLimit) noLarge=0; + symbolNext[s] = normalizedCounter[s]; + } + } + + /* Spread symbols */ + for (s=0; s<=maxSymbolValue; s++) + { + int i; + for (i=0; i highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } + } + + if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + + /* Build Decoding table */ + { + U32 i; + for (i=0; i FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); + bitStream >>= 4; + bitCount = 4; + *tableLogPtr = nbBits; + remaining = (1<1) && (charnum<=*maxSVPtr)) + { + if (previous0) + { + unsigned n0 = charnum; + while ((bitStream & 0xFFFF) == 0xFFFF) + { + n0+=24; + if (ip < iend-5) + { + ip+=2; + bitStream = MEM_readLE32(ip) >> bitCount; + } + else + { + bitStream >>= 16; + bitCount+=16; + } + } + while ((bitStream & 3) == 3) + { + n0+=3; + bitStream>>=2; + bitCount+=2; + } + n0 += bitStream & 3; + bitCount += 2; + if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); + while (charnum < n0) normalizedCounter[charnum++] = 0; + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) + { + ip += bitCount>>3; + bitCount &= 7; + bitStream = MEM_readLE32(ip) >> bitCount; + } + else + bitStream >>= 2; + } + { + const short max = (short)((2*threshold-1)-remaining); + short count; + + if ((bitStream & (threshold-1)) < (U32)max) + { + count = (short)(bitStream & (threshold-1)); + bitCount += nbBits-1; + } + else + { + count = (short)(bitStream & (2*threshold-1)); + if (count >= threshold) count -= max; + bitCount += nbBits; + } + + count--; /* extra accuracy */ + remaining -= FSE_abs(count); + normalizedCounter[charnum++] = count; + previous0 = !count; + while (remaining < threshold) + { + nbBits--; + threshold >>= 1; + } + + { + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) + { + ip += bitCount>>3; + bitCount &= 7; + } + else + { + bitCount -= (int)(8 * (iend - 4 - ip)); + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> (bitCount & 31); + } + } + } + if (remaining != 1) return ERROR(GENERIC); + *maxSVPtr = charnum-1; + + ip += (bitCount+7)>>3; + if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong); + return ip-istart; +} + + +/********************************************************* +* Decompression (Byte symbols) +*********************************************************/ +static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */ + + DTableH->tableLog = 0; + DTableH->fastMode = 0; + + cell->newState = 0; + cell->symbol = symbolValue; + cell->nbBits = 0; + + return 0; +} + + +static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */ + const unsigned tableSize = 1 << nbBits; + const unsigned tableMask = tableSize - 1; + const unsigned maxSymbolValue = tableMask; + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ + + /* Build Decoding Table */ + DTableH->tableLog = (U16)nbBits; + DTableH->fastMode = 1; + for (s=0; s<=maxSymbolValue; s++) + { + dinfo[s].newState = 0; + dinfo[s].symbol = (BYTE)s; + dinfo[s].nbBits = (BYTE)nbBits; + } + + return 0; +} + +FORCE_INLINE size_t FSE_decompress_usingDTable_generic( + void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt, const unsigned fast) +{ + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const omax = op + maxDstSize; + BYTE* const olimit = omax-3; + + BIT_DStream_t bitD; + FSE_DState_t state1; + FSE_DState_t state2; + size_t errorCode; + + /* Init */ + errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */ + if (FSE_isError(errorCode)) return errorCode; + + FSE_initDState(&state1, &bitD, dt); + FSE_initDState(&state2, &bitD, dt); + +#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) + + /* 4 symbols per loop */ + for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[1] = FSE_GETSYMBOL(&state2); + + if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } + + op[2] = FSE_GETSYMBOL(&state1); + + if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[3] = FSE_GETSYMBOL(&state2); + } + + /* tail */ + /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ + while (1) + { + if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) ) + break; + + *op++ = FSE_GETSYMBOL(&state1); + + if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) ) + break; + + *op++ = FSE_GETSYMBOL(&state2); + } + + /* end ? */ + if (BIT_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2)) + return op-ostart; + + if (op==omax) return ERROR(dstSize_tooSmall); /* dst buffer is full, but cSrc unfinished */ + + return ERROR(corruption_detected); +} + + +static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt) +{ + FSE_DTableHeader DTableH; + memcpy(&DTableH, dt, sizeof(DTableH)); + + /* select fast mode (static) */ + if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); + return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); +} + + +static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize) +{ + const BYTE* const istart = (const BYTE*)cSrc; + const BYTE* ip = istart; + short counting[FSE_MAX_SYMBOL_VALUE+1]; + DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ + unsigned tableLog; + unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; + size_t errorCode; + + if (cSrcSize<2) return ERROR(srcSize_wrong); /* too small input size */ + + /* normal FSE decoding mode */ + errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); + if (FSE_isError(errorCode)) return errorCode; + if (errorCode >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size */ + ip += errorCode; + cSrcSize -= errorCode; + + errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog); + if (FSE_isError(errorCode)) return errorCode; + + /* always return, even if it is an error code */ + return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt); +} + + + +#endif /* FSE_COMMONDEFS_ONLY */ +/* ****************************************************************** + Huff0 : Huffman coder, part of New Generation Entropy library + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +/**************************************************************** +* Compiler specifics +****************************************************************/ +#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +/* inline is defined */ +#elif defined(_MSC_VER) +# define inline __inline +#else +# define inline /* disable inline */ +#endif + + +#ifdef _MSC_VER /* Visual Studio */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif + + +/**************************************************************** +* Includes +****************************************************************/ +#include /* malloc, free, qsort */ +#include /* memcpy, memset */ +#include /* printf (debug) */ + +/**************************************************************** +* Error Management +****************************************************************/ +#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/****************************************** +* Helper functions +******************************************/ +static unsigned HUF_isError(size_t code) { return ERR_isError(code); } + +#define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ +#define HUF_MAX_TABLELOG 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ +#define HUF_DEFAULT_TABLELOG HUF_MAX_TABLELOG /* tableLog by default, when not specified */ +#define HUF_MAX_SYMBOL_VALUE 255 +#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG) +# error "HUF_MAX_TABLELOG is too large !" +#endif + + + +/********************************************************* +* Huff0 : Huffman block decompression +*********************************************************/ +typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */ + +typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */ + +typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; + +/*! HUF_readStats + Read compact Huffman tree, saved by HUF_writeCTable + @huffWeight : destination buffer + @return : size read from `src` +*/ +static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize) +{ + U32 weightTotal; + U32 tableLog; + const BYTE* ip = (const BYTE*) src; + size_t iSize; + size_t oSize; + U32 n; + + if (!srcSize) return ERROR(srcSize_wrong); + iSize = ip[0]; + //memset(huffWeight, 0, hwSize); /* is not necessary, even though some analyzer complain ... */ + + if (iSize >= 128) /* special header */ + { + if (iSize >= (242)) /* RLE */ + { + static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 }; + oSize = l[iSize-242]; + memset(huffWeight, 1, hwSize); + iSize = 0; + } + else /* Incompressible */ + { + oSize = iSize - 127; + iSize = ((oSize+1)/2); + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + if (oSize >= hwSize) return ERROR(corruption_detected); + ip += 1; + for (n=0; n> 4; + huffWeight[n+1] = ip[n/2] & 15; + } + } + } + else /* header compressed with FSE (normal case) */ + { + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize); /* max (hwSize-1) values decoded, as last one is implied */ + if (FSE_isError(oSize)) return oSize; + } + + /* collect weight stats */ + memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32)); + weightTotal = 0; + for (n=0; n= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected); + rankStats[huffWeight[n]]++; + weightTotal += (1 << huffWeight[n]) >> 1; + } + if (weightTotal == 0) return ERROR(corruption_detected); + + /* get last non-null symbol weight (implied, total must be 2^n) */ + tableLog = BIT_highbit32(weightTotal) + 1; + if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected); + { + U32 total = 1 << tableLog; + U32 rest = total - weightTotal; + U32 verif = 1 << BIT_highbit32(rest); + U32 lastWeight = BIT_highbit32(rest) + 1; + if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ + huffWeight[oSize] = (BYTE)lastWeight; + rankStats[lastWeight]++; + } + + /* check tree construction validity */ + if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ + + /* results */ + *nbSymbolsPtr = (U32)(oSize+1); + *tableLogPtr = tableLog; + return iSize+1; +} + + +/**************************/ +/* single-symbol decoding */ +/**************************/ + +static size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize) +{ + BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; + U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */ + U32 tableLog = 0; + const BYTE* ip = (const BYTE*) src; + size_t iSize = ip[0]; + U32 nbSymbols = 0; + U32 n; + U32 nextRankStart; + void* ptr = DTable+1; + HUF_DEltX2* const dt = (HUF_DEltX2*)ptr; + + HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16)); /* if compilation fails here, assertion is false */ + //memset(huffWeight, 0, sizeof(huffWeight)); /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); + if (HUF_isError(iSize)) return iSize; + + /* check result */ + if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge); /* DTable is too small */ + DTable[0] = (U16)tableLog; /* maybe should separate sizeof DTable, as allocated, from used size of DTable, in case of DTable re-use */ + + /* Prepare ranks */ + nextRankStart = 0; + for (n=1; n<=tableLog; n++) + { + U32 current = nextRankStart; + nextRankStart += (rankVal[n] << (n-1)); + rankVal[n] = current; + } + + /* fill DTable */ + for (n=0; n> 1; + U32 i; + HUF_DEltX2 D; + D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); + for (i = rankVal[w]; i < rankVal[w] + length; i++) + dt[i] = D; + rankVal[w] += length; + } + + return iSize; +} + +static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog) +{ + const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ + const BYTE c = dt[val].byte; + BIT_skipBits(Dstream, dt[val].nbBits); + return c; +} + +#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ + *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \ + HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) + +#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) + +static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 4 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4)) + { + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_1(p, bitDPtr); + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + } + + /* closer to the end */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd)) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + + /* no more data to retrieve from bitstream, hence no need to reload */ + while (p < pEnd) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + + return pEnd-pStart; +} + + +static size_t HUF_decompress4X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const U16* DTable) +{ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { + const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + + const void* ptr = DTable; + const HUF_DEltX2* const dt = ((const HUF_DEltX2*)ptr) +1; + const U32 dtLog = DTable[0]; + size_t errorCode; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + const size_t length1 = MEM_readLE16(istart); + const size_t length2 = MEM_readLE16(istart+2); + const size_t length3 = MEM_readLE16(istart+4); + size_t length4; + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal; + + length4 = cSrcSize - (length1 + length2 + length3 + 6); + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + errorCode = BIT_initDStream(&bitD1, istart1, length1); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD2, istart2, length2); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD3, istart3, length3); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD4, istart4, length4); + if (HUF_isError(errorCode)) return errorCode; + + /* 16-32 symbols per loop (4-8 symbols per stream) */ + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) + { + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); + + /* check */ + endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endSignal) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; + } +} + + +static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG); + const BYTE* ip = (const BYTE*) cSrc; + size_t errorCode; + + errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize); + if (HUF_isError(errorCode)) return errorCode; + if (errorCode >= cSrcSize) return ERROR(srcSize_wrong); + ip += errorCode; + cSrcSize -= errorCode; + + return HUF_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable); +} + + +/***************************/ +/* double-symbols decoding */ +/***************************/ + +static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed, + const U32* rankValOrigin, const int minWeight, + const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, + U32 nbBitsBaseline, U16 baseSeq) +{ + HUF_DEltX4 DElt; + U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; + U32 s; + + /* get pre-calculated rankVal */ + memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + + /* fill skipped values */ + if (minWeight>1) + { + U32 i, skipSize = rankVal[minWeight]; + MEM_writeLE16(&(DElt.sequence), baseSeq); + DElt.nbBits = (BYTE)(consumed); + DElt.length = 1; + for (i = 0; i < skipSize; i++) + DTable[i] = DElt; + } + + /* fill DTable */ + for (s=0; s= 1 */ + + rankVal[weight] += length; + } +} + +typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1]; + +static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, + const sortedSymbol_t* sortedList, const U32 sortedListSize, + const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, + const U32 nbBitsBaseline) +{ + U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; + const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ + const U32 minBits = nbBitsBaseline - maxWeight; + U32 s; + + memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + + /* fill DTable */ + for (s=0; s= minBits) /* enough room for a second symbol */ + { + U32 sortedRank; + int minWeight = nbBits + scaleLog; + if (minWeight < 1) minWeight = 1; + sortedRank = rankStart[minWeight]; + HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits, + rankValOrigin[nbBits], minWeight, + sortedList+sortedRank, sortedListSize-sortedRank, + nbBitsBaseline, symbol); + } + else + { + U32 i; + const U32 end = start + length; + HUF_DEltX4 DElt; + + MEM_writeLE16(&(DElt.sequence), symbol); + DElt.nbBits = (BYTE)(nbBits); + DElt.length = 1; + for (i = start; i < end; i++) + DTable[i] = DElt; + } + rankVal[weight] += length; + } +} + +static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize) +{ + BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1]; + sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1]; + U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 }; + U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 }; + U32* const rankStart = rankStart0+1; + rankVal_t rankVal; + U32 tableLog, maxW, sizeOfSort, nbSymbols; + const U32 memLog = DTable[0]; + const BYTE* ip = (const BYTE*) src; + size_t iSize = ip[0]; + void* ptr = DTable; + HUF_DEltX4* const dt = ((HUF_DEltX4*)ptr) + 1; + + HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32)); /* if compilation fails here, assertion is false */ + if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge); + //memset(weightList, 0, sizeof(weightList)); /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); + if (HUF_isError(iSize)) return iSize; + + /* check result */ + if (tableLog > memLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ + + /* find maxWeight */ + for (maxW = tableLog; rankStats[maxW]==0; maxW--) + {if (!maxW) return ERROR(GENERIC); } /* necessarily finds a solution before maxW==0 */ + + /* Get start index of each weight */ + { + U32 w, nextRankStart = 0; + for (w=1; w<=maxW; w++) + { + U32 current = nextRankStart; + nextRankStart += rankStats[w]; + rankStart[w] = current; + } + rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ + sizeOfSort = nextRankStart; + } + + /* sort symbols by weight */ + { + U32 s; + for (s=0; s> consumed; + } + } + } + + HUF_fillDTableX4(dt, memLog, + sortedSymbol, sizeOfSort, + rankStart0, rankVal, maxW, + tableLog+1); + + return iSize; +} + + +static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) +{ + const size_t val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + memcpy(op, dt+val, 2); + BIT_skipBits(DStream, dt[val].nbBits); + return dt[val].length; +} + +static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) +{ + const size_t val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + memcpy(op, dt+val, 1); + if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); + else + { + if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) + { + BIT_skipBits(DStream, dt[val].nbBits); + if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) + DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ + } + } + return 1; +} + + +#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \ + ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \ + ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + +static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 8 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd-7)) + { + HUF_DECODE_SYMBOLX4_2(p, bitDPtr); + HUF_DECODE_SYMBOLX4_1(p, bitDPtr); + HUF_DECODE_SYMBOLX4_2(p, bitDPtr); + HUF_DECODE_SYMBOLX4_0(p, bitDPtr); + } + + /* closer to the end */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-2)) + HUF_DECODE_SYMBOLX4_0(p, bitDPtr); + + while (p <= pEnd-2) + HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ + + if (p < pEnd) + p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog); + + return p-pStart; +} + + + +static size_t HUF_decompress4X4_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const U32* DTable) +{ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { + const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + + const void* ptr = DTable; + const HUF_DEltX4* const dt = ((const HUF_DEltX4*)ptr) +1; + const U32 dtLog = DTable[0]; + size_t errorCode; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + const size_t length1 = MEM_readLE16(istart); + const size_t length2 = MEM_readLE16(istart+2); + const size_t length3 = MEM_readLE16(istart+4); + size_t length4; + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal; + + length4 = cSrcSize - (length1 + length2 + length3 + 6); + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + errorCode = BIT_initDStream(&bitD1, istart1, length1); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD2, istart2, length2); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD3, istart3, length3); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD4, istart4, length4); + if (HUF_isError(errorCode)) return errorCode; + + /* 16-32 symbols per loop (4-8 symbols per stream) */ + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) + { + HUF_DECODE_SYMBOLX4_2(op1, &bitD1); + HUF_DECODE_SYMBOLX4_2(op2, &bitD2); + HUF_DECODE_SYMBOLX4_2(op3, &bitD3); + HUF_DECODE_SYMBOLX4_2(op4, &bitD4); + HUF_DECODE_SYMBOLX4_1(op1, &bitD1); + HUF_DECODE_SYMBOLX4_1(op2, &bitD2); + HUF_DECODE_SYMBOLX4_1(op3, &bitD3); + HUF_DECODE_SYMBOLX4_1(op4, &bitD4); + HUF_DECODE_SYMBOLX4_2(op1, &bitD1); + HUF_DECODE_SYMBOLX4_2(op2, &bitD2); + HUF_DECODE_SYMBOLX4_2(op3, &bitD3); + HUF_DECODE_SYMBOLX4_2(op4, &bitD4); + HUF_DECODE_SYMBOLX4_0(op1, &bitD1); + HUF_DECODE_SYMBOLX4_0(op2, &bitD2); + HUF_DECODE_SYMBOLX4_0(op3, &bitD3); + HUF_DECODE_SYMBOLX4_0(op4, &bitD4); + + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog); + + /* check */ + endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endSignal) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; + } +} + + +static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG); + const BYTE* ip = (const BYTE*) cSrc; + + size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; + cSrcSize -= hSize; + + return HUF_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable); +} + + +/**********************************/ +/* quad-symbol decoding */ +/**********************************/ +typedef struct { BYTE nbBits; BYTE nbBytes; } HUF_DDescX6; +typedef union { BYTE byte[4]; U32 sequence; } HUF_DSeqX6; + +/* recursive, up to level 3; may benefit from