diff --git a/cmake/external/gzstream.cmake b/cmake/external/gzstream.cmake index 59d8e932459dd49017cb32b27e5f1919272fe387..3e36ef7ae205bbf85f345d55456309cc05a58fbd 100644 --- a/cmake/external/gzstream.cmake +++ b/cmake/external/gzstream.cmake @@ -27,13 +27,14 @@ SET(GZSTREAM_INCLUDE_DIR "${GZSTREAM_INSTALL_DIR}/include/" CACHE PATH "gzstream ExternalProject_Add( extern_gzstream + DEPENDS zlib GIT_REPOSITORY "https://github.com/jacquesqiao/gzstream.git" GIT_TAG "" PREFIX ${GZSTREAM_SOURCES_DIR} UPDATE_COMMAND "" CONFIGURE_COMMAND "" BUILD_IN_SOURCE 1 - BUILD_COMMAND make -j8 + BUILD_COMMAND make EXTERN_CPPFLAGS="-I${THIRD_PARTY_PATH}/install/zlib/include" EXTERM_LDFLAGS="-L${THIRD_PARTY_PATH}/install/zlib/lib" -j8 INSTALL_COMMAND mkdir -p ${GZSTREAM_INSTALL_DIR}/lib/ && mkdir -p ${GZSTREAM_INSTALL_DIR}/include/ && cp ${GZSTREAM_SOURCES_DIR}/src/extern_gzstream/libgzstream.a ${GZSTREAM_INSTALL_DIR}/lib && cp -r ${GZSTREAM_SOURCES_DIR}/src/extern_gzstream/gzstream.h ${GZSTREAM_INSTALL_DIR}/include diff --git a/paddle/fluid/inference/api/demo_ci/run.sh b/paddle/fluid/inference/api/demo_ci/run.sh index ff718077c1ba6b10fe87aac10d84f96a23ad6bba..a94ccfa92439a735e101c7e5709909abea062ff8 100755 --- a/paddle/fluid/inference/api/demo_ci/run.sh +++ b/paddle/fluid/inference/api/demo_ci/run.sh @@ -54,6 +54,9 @@ mkdir -p build cd build for WITH_STATIC_LIB in ON OFF; do +# TODO(Superjomn) reopen this +# something wrong with the TensorArray reset. +:< void SCAL(int n, const T a, T* x) const; + template + T ASUM(int n, T* x, int inc) const; + template void BatchedGEMM(CBLAS_TRANSPOSE transA, CBLAS_TRANSPOSE transB, int M, int N, int K, T alpha, const T* A, const T* B, T beta, T* C, @@ -269,6 +272,11 @@ class BlasT : private Blas { Base()->template SCAL(args...); } + template + T ASUM(ARGS... args) const { + return Base()->template ASUM(args...); + } + template void BatchedGEMM(ARGS... args) const { Base()->template BatchedGEMM(args...); diff --git a/paddle/fluid/operators/math/blas_impl.h b/paddle/fluid/operators/math/blas_impl.h index 93bf7c7c88db36807143b136ea800d6e5e49dd43..c84087bb1e4849b27d53e05f046c93f631150f6f 100644 --- a/paddle/fluid/operators/math/blas_impl.h +++ b/paddle/fluid/operators/math/blas_impl.h @@ -84,6 +84,11 @@ struct CBlas { platform::dynload::cblas_sscal(args...); } + template + static float ASUM(ARGS... args) { + return platform::dynload::cblas_sasum(args...); + } + template static void GEMM_BATCH(ARGS... args) { platform::dynload::cblas_sgemm_batch(args...); @@ -174,6 +179,11 @@ struct CBlas { platform::dynload::cblas_dscal(args...); } + template + static double ASUM(ARGS... args) { + return platform::dynload::cblas_dasum(args...); + } + template static void GEMM_BATCH(ARGS... args) { platform::dynload::cblas_dgemm_batch(args...); @@ -268,6 +278,7 @@ struct CBlas { static void VPOW(...) { PADDLE_THROW("float16 VPOW not supported on CPU"); } static void DOT(...) { PADDLE_THROW("float16 DOT not supported on CPU"); }; static void SCAL(...) { PADDLE_THROW("float16 SCAL not supported on CPU"); }; + static void ASUM(...) { PADDLE_THROW("float16 ASUM not supported on CPU"); }; #ifdef PADDLE_WITH_MKLML static void GEMM_BATCH(...) { PADDLE_THROW("float16 GEMM_BATCH not supported on CPU"); @@ -476,6 +487,21 @@ void Blas::SCAL(int n, const T a, T *x) const { #endif } +template <> +template +T Blas::ASUM(int n, T *x, int inc) const { + auto sum = static_cast(0.0); +#ifdef PADDLE_WITH_MKLML + sum = CBlas::ASUM(n, x, inc); +#else + // TODO(jczaja): check if openblas does provide cblas_sasum/cblas_dasum + for (int c = 0; c < n; ++c) { + sum += x[c]; + } +#endif + return sum; +} + template <> template void Blas::GEMV(bool trans_a, int M, int N, T alpha, diff --git a/paddle/fluid/operators/math/softmax_impl.h b/paddle/fluid/operators/math/softmax_impl.h index 0f3e5b20086378da8ef1138a5f5c005b724f7fa2..31ed5196668954bc387423c34a0667622db71373 100644 --- a/paddle/fluid/operators/math/softmax_impl.h +++ b/paddle/fluid/operators/math/softmax_impl.h @@ -100,11 +100,8 @@ class SoftmaxFunctor> { blas.VEXP(num_classes * batch_size, out_data, out_data); for (int n = 0; n < batch_size; ++n) { - entities[n] = out_data[n * num_classes]; - for (int c = 1; c < num_classes; ++c) { - entities[n] += out_data[n * num_classes + c]; - } - blas.SCAL(num_classes, 1.0f / entities[n], &out_data[n * num_classes]); + auto sum = blas.ASUM(num_classes, &out_data[n * num_classes], 1); + blas.SCAL(num_classes, 1.0f / sum, &out_data[n * num_classes]); } } }; diff --git a/paddle/fluid/operators/softmax_op.h b/paddle/fluid/operators/softmax_op.h index 8eb5c7691efe930e9f79ad6a381cb290107d1a14..91829d5761bfdd1f9806af6589a2967fe866fec8 100644 --- a/paddle/fluid/operators/softmax_op.h +++ b/paddle/fluid/operators/softmax_op.h @@ -36,9 +36,7 @@ class SoftmaxKernel : public framework::OpKernel { Tensor Out_2d = framework::ReshapeToMatrix(*Out, rank - 1); #ifdef PADDLE_ON_INFERENCE - math::SoftmaxFunctor< - DeviceContext, T, - std::is_same::value>()( + math::SoftmaxFunctor()( context.template device_context(), &X_2d, &Out_2d); #else math::SoftmaxFunctor()( diff --git a/paddle/fluid/platform/dynload/mklml.h b/paddle/fluid/platform/dynload/mklml.h index 9273e9b1e72f0ad7abd6c20d4a34283fbe24378a..f0a973662360fd9ff35e1006cce937d86f3e563c 100644 --- a/paddle/fluid/platform/dynload/mklml.h +++ b/paddle/fluid/platform/dynload/mklml.h @@ -68,6 +68,8 @@ extern void* mklml_dso_handle; __macro(cblas_dgemm_batch); \ __macro(cblas_sdot); \ __macro(cblas_ddot); \ + __macro(cblas_sasum); \ + __macro(cblas_dasum); \ __macro(cblas_sscal); \ __macro(cblas_dscal); \ __macro(vsAdd); \ diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index e31c2f217322be8ef8b131189504b54cf6b4ad80..1835c064055635a4284fc64f4ca4dd8728f933ca 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -398,7 +398,26 @@ All parameter, weight, gradient are variables in Paddle. }, py::return_value_policy::copy); - py::class_(m, "Scope", "") + py::class_(m, "Scope", R"DOC( + Scope is an association of a name to Variable. All variables belong to Scope. + + Variables in a parent scope can be retrieved from local scope. + + You need to specify a scope to run a Net, i.e., `exe.Run(&scope)`. + One net can run in different scopes and update different variable in the + scope. + + You can create var in a scope and get it from the scope. + + Examples: + .. code-block:: python + + # create tensor from a scope and set value to it. + param = scope.var('Param').get_tensor() + param_array = np.full((height, row_numel), 5.0).astype("float32") + param.set(param_array, place) + + )DOC") .def("var", [](Scope &self, const std::string &name) -> Variable * { return self.Var(name); diff --git a/python/paddle/dataset/wmt16.py b/python/paddle/dataset/wmt16.py index aa66696fae7d3adb44511417edf4a92b82a9151b..1052d24c57b79e1db921f59bb6ea6ecdc87a7f81 100644 --- a/python/paddle/dataset/wmt16.py +++ b/python/paddle/dataset/wmt16.py @@ -71,15 +71,16 @@ def __build_dict(tar_file, dict_size, save_path, lang): for w in sen.split(): word_dict[w] += 1 - with open(save_path, "w") as fout: - fout.write("%s\n%s\n%s\n" % (START_MARK, END_MARK, UNK_MARK)) + with open(save_path, "wb") as fout: + fout.write( + cpt.to_bytes("%s\n%s\n%s\n" % (START_MARK, END_MARK, UNK_MARK))) for idx, word in enumerate( sorted( six.iteritems(word_dict), key=lambda x: x[1], reverse=True)): if idx + 3 == dict_size: break - fout.write(word[0].encode('utf-8')) - fout.write('\n') + fout.write(cpt.to_bytes(word[0])) + fout.write(cpt.to_bytes('\n')) def __load_dict(tar_file, dict_size, lang, reverse=False): diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index a26b8df5a240be8340597b9627866c323fa98a2d..b37ebbe5179ba6e36be70ff936cb8a3ca0d89d13 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -33,13 +33,15 @@ def force_init_on_cpu(): """ The flag of whether force to init variables on CPU. - Returns:: + Returns: + bool: the state if we should force init on CPU. Examples: + .. code-block:: python if force_init_on_cpu(): - pass + create_op('force_cpu': force_init_on_cpu()) """ return _force_init_on_cpu_