未验证 提交 b8975d68 编写于 作者: Y yuyang18

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into...

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into feature/combine_open_files_and_double_buffer
...@@ -66,6 +66,12 @@ option(WITH_ANAKIN "Compile with Anakin library" OFF) ...@@ -66,6 +66,12 @@ option(WITH_ANAKIN "Compile with Anakin library" OFF)
option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE}) option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE})
option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF) option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
option(WITH_SYSTEM_BLAS "Use system blas library" OFF) option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION})
# PY_VERSION
if(NOT PY_VERSION)
set(PY_VERSION 2.7)
endif()
# CMAKE_BUILD_TYPE # CMAKE_BUILD_TYPE
if(NOT CMAKE_BUILD_TYPE) if(NOT CMAKE_BUILD_TYPE)
...@@ -146,6 +152,7 @@ endif() ...@@ -146,6 +152,7 @@ endif()
######################################################################################## ########################################################################################
include(external/mklml) # download mklml package include(external/mklml) # download mklml package
include(external/libxsmm) # download, build, install libxsmm
include(external/zlib) # download, build, install zlib include(external/zlib) # download, build, install zlib
include(external/gflags) # download, build, install gflags include(external/gflags) # download, build, install gflags
include(external/glog) # download, build, install glog include(external/glog) # download, build, install glog
...@@ -232,6 +239,10 @@ if(WITH_MKLML) ...@@ -232,6 +239,10 @@ if(WITH_MKLML)
list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB}) list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB})
endif() endif()
if(WITH_LIBXSMM)
list(APPEND EXTERNAL_LIBS ${LIBXSMM_LIBS})
endif()
if(WITH_MKLDNN) if(WITH_MKLDNN)
list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB}) list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB})
endif() endif()
......
...@@ -80,7 +80,7 @@ RUN pip install pre-commit 'ipython==5.3.0' && \ ...@@ -80,7 +80,7 @@ RUN pip install pre-commit 'ipython==5.3.0' && \
pip install opencv-python pip install opencv-python
#For docstring checker #For docstring checker
RUN pip install pylint pytest astroid isort RUN pip install pylint pytest astroid isort LinkChecker
COPY ./python/requirements.txt /root/ COPY ./python/requirements.txt /root/
RUN pip install -r /root/requirements.txt RUN pip install -r /root/requirements.txt
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
OPTION(WITH_LIBXSMM "Compile with libxsmm" OFF)
IF(NOT WITH_LIBXSMM)
return()
ENDIF()
IF(WIN32 OR APPLE OR ANDROID OR IOS)
MESSAGE(WARNING "Windows, Mac or Mobile are not supported with libxsmm in Paddle yet.")
SET(WITH_LIBXSMM OFF CACHE STRING "Disable LIBXSMM" FORCE)
return()
ENDIF()
INCLUDE (ExternalProject)
SET(LIBXSMM_SOURCES_DIR ${THIRD_PARTY_PATH}/libxsmm)
SET(LIBXSMM_INSTALL_DIR ${THIRD_PARTY_PATH}/install/libxsmm)
SET(LIBXSMM_INCLUDE_DIR "${LIBXSMM_INSTALL_DIR}/include" CACHE PATH "LIBXSMM include directory." FORCE)
SET(LIBXSMM_LIBRARY_DIR "${LIBXSMM_INSTALL_DIR}/lib" CACHE PATH "LIBXSMM library directory." FORCE)
SET(LIBXSMM_LIBS "${LIBXSMM_LIBRARY_DIR}/libxsmm.a"
"${LIBXSMM_LIBRARY_DIR}/libxsmmnoblas.a")
ExternalProject_Add(
extern_libxsmm
GIT_REPOSITORY "https://github.com/hfp/libxsmm.git"
GIT_TAG "7cc03b5b342fdbc6b6d990b190671c5dbb8489a2"
PREFIX ${LIBXSMM_SOURCES_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_IN_SOURCE 1
BUILD_COMMAND $(MAKE) --silent PREFIX=${LIBXSMM_INSTALL_DIR} CXX=g++ CC=gcc WARP=0 install
INSTALL_COMMAND ""
)
ADD_LIBRARY(libxsmm STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET libxsmm PROPERTY IMPORTED_LOCATION "${LIBXSMM_LIBRARY_DIR}/libxsmm.a")
SET_PROPERTY(TARGET libxsmm PROPERTY IMPORTED_LOCATION "${LIBXSMM_LIBRARY_DIR}/libxsmmnoblas.a")
MESSAGE(STATUS "Libxsmm library: ${LIBXSMM_LIBS}")
include_directories(${LIBXSMM_INCLUDE_DIR})
ADD_DEFINITIONS(-DPADDLE_WITH_LIBXSMM)
ADD_DEPENDENCIES(libxsmm extern_libxsmm)
LIST(APPEND external_project_dependencies libxsmm)
...@@ -121,6 +121,11 @@ ELSE() ...@@ -121,6 +121,11 @@ ELSE()
TARGET_LINK_LIBRARIES(cblas ${CBLAS_LIBRARIES}) TARGET_LINK_LIBRARIES(cblas ${CBLAS_LIBRARIES})
ENDIF("${CBLAS_PROVIDER}" STREQUAL "MKLML") ENDIF("${CBLAS_PROVIDER}" STREQUAL "MKLML")
IF(WITH_LIBXSMM)
TARGET_LINK_LIBRARIES(cblas ${LIBXSMM_LIBS})
ADD_DEPENDENCIES(cblas extern_libxsmm)
ENDIF()
IF(NOT ${CBLAS_FOUND}) IF(NOT ${CBLAS_FOUND})
ADD_DEPENDENCIES(cblas extern_openblas) ADD_DEPENDENCIES(cblas extern_openblas)
LIST(APPEND external_project_dependencies cblas) LIST(APPEND external_project_dependencies cblas)
......
...@@ -18,8 +18,9 @@ ENDIF() ...@@ -18,8 +18,9 @@ ENDIF()
INCLUDE(python_module) INCLUDE(python_module)
FIND_PACKAGE(PythonInterp 2.7) FIND_PACKAGE(PythonInterp ${PY_VERSION})
FIND_PACKAGE(PythonLibs 2.7) FIND_PACKAGE(PythonLibs ${PY_VERSION})
# Fixme: Maybe find a static library. Get SHARED/STATIC by FIND_PACKAGE. # Fixme: Maybe find a static library. Get SHARED/STATIC by FIND_PACKAGE.
ADD_LIBRARY(python SHARED IMPORTED GLOBAL) ADD_LIBRARY(python SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET python PROPERTY IMPORTED_LOCATION ${PYTHON_LIBRARIES}) SET_PROPERTY(TARGET python PROPERTY IMPORTED_LOCATION ${PYTHON_LIBRARIES})
......
...@@ -276,13 +276,22 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build( ...@@ -276,13 +276,22 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
} }
} }
// Insert BCast Ops bool use_gpu = false;
for (size_t dev_id = 0; dev_id < bcast_var_name_set.size(); ++dev_id) { #ifdef PADDLE_WITH_CUDA
auto &to_bcast_set = bcast_var_name_set[dev_id]; use_gpu = nccl_ctxs_ != nullptr;
for (auto &bcast_name : to_bcast_set) { #endif
CreateBroadcastOp(&result, bcast_name, dev_id);
if (use_gpu ||
strategy_.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce) {
// Insert BCast Ops
for (size_t dev_id = 0; dev_id < bcast_var_name_set.size(); ++dev_id) {
auto &to_bcast_set = bcast_var_name_set[dev_id];
for (auto &bcast_name : to_bcast_set) {
CreateBroadcastOp(&result, bcast_name, dev_id);
}
} }
} }
/* /*
Dependency graph has been constructed. However, there are still data Dependency graph has been constructed. However, there are still data
hazards need to be handled. hazards need to be handled.
...@@ -412,14 +421,19 @@ int MultiDevSSAGraphBuilder::GetOpDeviceID(const OpDesc &op) const { ...@@ -412,14 +421,19 @@ int MultiDevSSAGraphBuilder::GetOpDeviceID(const OpDesc &op) const {
if (strategy_.reduce_ != BuildStrategy::ReduceStrategy::kReduce) { if (strategy_.reduce_ != BuildStrategy::ReduceStrategy::kReduce) {
return -1; return -1;
} }
int op_role = boost::get<int>(
for (auto &varname : op.InputArgumentNames()) { op.GetAttr(framework::OpProtoAndCheckerMaker::OpRoleAttrName()));
int dev_id = GetVarDeviceID(varname); if (op_role != static_cast<int>(framework::OpRole::kOptimize)) {
if (dev_id != -1) { return -1;
return dev_id;
}
} }
return -1; auto param_grad = boost::get<std::vector<std::string>>(
op.GetAttr(OpProtoAndCheckerMaker::OpRoleVarAttrName()));
PADDLE_ENFORCE_EQ(param_grad.size(), 2U);
int dev_id = GetVarDeviceID(param_grad[1]);
PADDLE_ENFORCE_NE(dev_id, -1, "dev_id should not be -1.[%s, %s]", op.Type(),
param_grad[0]);
return dev_id;
} }
int MultiDevSSAGraphBuilder::GetVarDeviceID(const std::string &varname) const { int MultiDevSSAGraphBuilder::GetVarDeviceID(const std::string &varname) const {
......
...@@ -45,6 +45,7 @@ class ParallelExecutorPrivate { ...@@ -45,6 +45,7 @@ class ParallelExecutorPrivate {
#endif #endif
bool own_local_scope_; bool own_local_scope_;
bool use_cuda_; bool use_cuda_;
bool use_all_reduce_;
}; };
std::vector<Scope *> &ParallelExecutor::GetLocalScopes() { std::vector<Scope *> &ParallelExecutor::GetLocalScopes() {
...@@ -62,6 +63,14 @@ ParallelExecutor::ParallelExecutor( ...@@ -62,6 +63,14 @@ ParallelExecutor::ParallelExecutor(
: member_(new ParallelExecutorPrivate(places)) { : member_(new ParallelExecutorPrivate(places)) {
member_->global_scope_ = scope; member_->global_scope_ = scope;
member_->use_cuda_ = exec_strategy.use_cuda_; member_->use_cuda_ = exec_strategy.use_cuda_;
member_->use_all_reduce_ =
build_strategy.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce;
if (!member_->use_all_reduce_) {
PADDLE_ENFORCE(places.size() > 1,
"If you set build_strategy.reduce with 'Reduce',"
"the number of places must be greater than 1.");
}
// Step 1. Bcast the params to devs. // Step 1. Bcast the params to devs.
// Create local scopes // Create local scopes
...@@ -117,7 +126,7 @@ ParallelExecutor::ParallelExecutor( ...@@ -117,7 +126,7 @@ ParallelExecutor::ParallelExecutor(
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
builder_factory.SetNCCLContextMap(member_->nccl_ctxs_.get()); builder_factory.SetNCCLContextMap(member_->nccl_ctxs_.get());
#else #else
PADDLE_THROW("Not compiled with CUDA"); PADDLE_THROW("Not compiled with CUDA.");
#endif #endif
} }
...@@ -133,7 +142,7 @@ ParallelExecutor::ParallelExecutor( ...@@ -133,7 +142,7 @@ ParallelExecutor::ParallelExecutor(
void ParallelExecutor::BCastParamsToDevs( void ParallelExecutor::BCastParamsToDevs(
const std::unordered_set<std::string> &vars) const { const std::unordered_set<std::string> &vars) const {
// the the initializing bcast, all vars would be bcast from device(0), // the initializing bcast, all vars would be bcast from device(0),
// otherwise // otherwise
// bcast from the specified device. // bcast from the specified device.
bool initializing = builder_.get() == nullptr ? true : false; bool initializing = builder_.get() == nullptr ? true : false;
...@@ -209,9 +218,13 @@ void ParallelExecutor::BCastParamsToDevs( ...@@ -209,9 +218,13 @@ void ParallelExecutor::BCastParamsToDevs(
auto local_scope = member_->local_scopes_[i]; auto local_scope = member_->local_scopes_[i];
auto *t = local_scope->Var(var)->GetMutable<LoDTensor>(); auto *t = local_scope->Var(var)->GetMutable<LoDTensor>();
t->Resize(dims); if (member_->use_all_reduce_ || member_->use_cuda_) {
t->mutable_data(cpu, main_tensor.type()); t->Resize(dims);
paddle::framework::TensorCopy(main_tensor, cpu, t); t->mutable_data(cpu, main_tensor.type());
paddle::framework::TensorCopy(main_tensor, cpu, t);
} else {
t->ShareDataWith(main_tensor);
}
} }
} }
} }
......
...@@ -21,6 +21,10 @@ ...@@ -21,6 +21,10 @@
#include "paddle/fluid/platform/dynload/mklml.h" #include "paddle/fluid/platform/dynload/mklml.h"
#endif #endif
#ifdef PADDLE_WITH_LIBXSMM
#include <libxsmm.h>
#endif
#ifdef PADDLE_USE_OPENBLAS #ifdef PADDLE_USE_OPENBLAS
#include <cblas.h> #include <cblas.h>
#endif #endif
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include <limits>
#include <vector> #include <vector>
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
...@@ -30,6 +31,12 @@ struct CBlas<float> { ...@@ -30,6 +31,12 @@ struct CBlas<float> {
platform::dynload::cblas_sgemm(args...); platform::dynload::cblas_sgemm(args...);
} }
#ifdef PADDLE_WITH_LIBXSMM
template <typename... ARGS>
static void SMM_GEMM(ARGS... args) {
libxsmm_sgemm(args...);
}
#endif
template <typename... ARGS> template <typename... ARGS>
static void AXPY(ARGS... args) { static void AXPY(ARGS... args) {
platform::dynload::cblas_saxpy(args...); platform::dynload::cblas_saxpy(args...);
...@@ -63,6 +70,12 @@ struct CBlas<double> { ...@@ -63,6 +70,12 @@ struct CBlas<double> {
platform::dynload::cblas_dgemm(args...); platform::dynload::cblas_dgemm(args...);
} }
#ifdef PADDLE_WITH_LIBXSMM
template <typename... ARGS>
static void SMM_GEMM(ARGS... args) {
libxsmm_dgemm(args...);
}
#endif
template <typename... ARGS> template <typename... ARGS>
static void AXPY(ARGS... args) { static void AXPY(ARGS... args) {
platform::dynload::cblas_daxpy(args...); platform::dynload::cblas_daxpy(args...);
...@@ -140,6 +153,9 @@ struct CBlas<double> { ...@@ -140,6 +153,9 @@ struct CBlas<double> {
template <> template <>
struct CBlas<platform::float16> { struct CBlas<platform::float16> {
static void GEMM(...) { PADDLE_THROW("float16 GEMM not supported on CPU"); } static void GEMM(...) { PADDLE_THROW("float16 GEMM not supported on CPU"); }
static void SMM_GEMM(...) {
PADDLE_THROW("float16 SMM_GEMM not supported on CPU");
}
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
static void GEMM_BATCH(...) { static void GEMM_BATCH(...) {
PADDLE_THROW("float16 GEMM_BATCH not supported on CPU"); PADDLE_THROW("float16 GEMM_BATCH not supported on CPU");
...@@ -147,6 +163,33 @@ struct CBlas<platform::float16> { ...@@ -147,6 +163,33 @@ struct CBlas<platform::float16> {
#endif #endif
}; };
template <typename T>
inline bool UseXSMM(const int &m, const int &n, const int &k, bool transa,
bool transb, const T &alpha, const T &beta) {
#ifdef PADDLE_WITH_LIBXSMM
// Refer to https://github.com/hfp/libxsmm/blob/master/README.md
// But the threshold is custom
constexpr int LIBXSMM_THRESHOLD = 20 * 20 * 20;
if (m * n * k > LIBXSMM_THRESHOLD || transa || transb ||
std::abs<T>(alpha - static_cast<T>(1) >
std::numeric_limits<T>::epsilon()) ||
std::abs<T>(beta) > std::numeric_limits<T>::epsilon()) {
return false;
} else {
return true;
}
#endif
return false;
}
template <>
inline bool UseXSMM<platform::float16>(const int &m, const int &n, const int &k,
bool transa, bool transb,
const platform::float16 &alpha,
const platform::float16 &beta) {
return false;
}
template <> template <>
template <typename T> template <typename T>
void Blas<platform::CPUDeviceContext>::GEMM(CBLAS_TRANSPOSE transA, void Blas<platform::CPUDeviceContext>::GEMM(CBLAS_TRANSPOSE transA,
...@@ -156,8 +199,21 @@ void Blas<platform::CPUDeviceContext>::GEMM(CBLAS_TRANSPOSE transA, ...@@ -156,8 +199,21 @@ void Blas<platform::CPUDeviceContext>::GEMM(CBLAS_TRANSPOSE transA,
int lda = (transA == CblasNoTrans) ? K : M; int lda = (transA == CblasNoTrans) ? K : M;
int ldb = (transB == CblasNoTrans) ? N : K; int ldb = (transB == CblasNoTrans) ? N : K;
int ldc = N; int ldc = N;
CBlas<T>::GEMM(CblasRowMajor, transA, transB, M, N, K, alpha, A, lda, B, ldb, #ifdef PADDLE_WITH_LIBXSMM
beta, C, ldc); if (UseXSMM(M, N, K, transA != CblasNoTrans, transB != CblasNoTrans, alpha,
beta)) {
// Note: SMM use ColMajor
const char transa = 'N';
const char transb = 'N';
CBlas<T>::SMM_GEMM(&transa, &transb, &N, &M, &K, &alpha, B, &ldb, A, &lda,
&beta, C, &ldc);
} else {
#endif
CBlas<T>::GEMM(CblasRowMajor, transA, transB, M, N, K, alpha, A, lda, B,
ldb, beta, C, ldc);
#ifdef PADDLE_WITH_LIBXSMM
}
#endif
} }
template <> template <>
......
...@@ -54,8 +54,64 @@ TEST(math_function, gemm_notrans_cblas) { ...@@ -54,8 +54,64 @@ TEST(math_function, gemm_notrans_cblas) {
EXPECT_EQ(input3_ptr[6], 86); EXPECT_EQ(input3_ptr[6], 86);
EXPECT_EQ(input3_ptr[7], 99); EXPECT_EQ(input3_ptr[7], 99);
} }
#ifdef PADDLE_WITH_LIBXSMM
template <typename T>
void MklSmmCompare(int m, int n, int k) {
paddle::framework::Tensor mat_a;
paddle::framework::Tensor mat_b;
paddle::framework::Tensor mat_c_smm;
paddle::framework::Tensor mat_c_mkl;
auto* cpu_place = new paddle::platform::CPUPlace();
T* A = mat_a.mutable_data<T>({m, k}, *cpu_place);
T* B = mat_b.mutable_data<T>({k, n}, *cpu_place);
T* CSMM = mat_c_smm.mutable_data<T>({m, n}, *cpu_place);
T* CMKL = mat_c_mkl.mutable_data<T>({m, n}, *cpu_place);
T alpha = static_cast<T>(1);
T beta = static_cast<T>(0);
for (int i = 0; i < mat_a.numel(); ++i) {
A[i] = static_cast<T>(i);
}
for (int i = 0; i < mat_b.numel(); ++i) {
B[i] = static_cast<T>(i);
}
// lda,ldb,ldc follow RowMajor
int lda = k;
int ldb = n;
int ldc = n;
auto smm = [&, m, n, k, lda, ldb, ldc, alpha, beta]() {
const char transa = 'N';
const char transb = 'N';
paddle::operators::math::CBlas<T>::SMM_GEMM(&transa, &transb, &n, &m, &k,
&alpha, B, &ldb, A, &lda, &beta,
CSMM, &ldc);
};
auto mkl = [&, m, n, k, lda, ldb, ldc, alpha, beta]() {
paddle::operators::math::CBlas<T>::GEMM(CblasRowMajor, CblasNoTrans,
CblasNoTrans, m, n, k, alpha, A,
lda, B, ldb, beta, CMKL, ldc);
};
smm();
mkl();
ASSERT_EQ(mat_c_mkl.numel(), mat_c_smm.numel());
for (int i = 0; i < mat_c_mkl.numel(); ++i) {
EXPECT_FLOAT_EQ(CSMM[i], CMKL[i]);
}
}
TEST(math_function, gemm_mkl_vs_smm) {
MklSmmCompare<float>(1, 2, 3);
MklSmmCompare<double>(1, 2, 3);
MklSmmCompare<float>(3, 2, 1);
MklSmmCompare<double>(3, 2, 1);
MklSmmCompare<float>(3, 8, 5);
MklSmmCompare<double>(3, 8, 5);
}
#endif
TEST(math_function, gemm_trans_clbas) { TEST(math_function, gemm_trans_cblas) {
paddle::framework::Tensor input1; paddle::framework::Tensor input1;
paddle::framework::Tensor input2; paddle::framework::Tensor input2;
paddle::framework::Tensor input3; paddle::framework::Tensor input3;
......
...@@ -60,6 +60,7 @@ class TopkKernel : public framework::OpKernel<T> { ...@@ -60,6 +60,7 @@ class TopkKernel : public framework::OpKernel<T> {
#endif #endif
for (size_t i = 0; i < row; i++) { for (size_t i = 0; i < row; i++) {
std::vector<std::pair<T, size_t>> vec; std::vector<std::pair<T, size_t>> vec;
vec.reserve(col);
for (size_t j = 0; j < col; j++) { for (size_t j = 0; j < col; j++) {
vec.push_back(std::pair<T, size_t>(eg_input(i, j), j)); vec.push_back(std::pair<T, size_t>(eg_input(i, j), j));
} }
......
...@@ -136,7 +136,13 @@ std::string callPythonFunc(const std::string& moduleName, ...@@ -136,7 +136,13 @@ std::string callPythonFunc(const std::string& moduleName,
const std::string& funcName, const std::string& funcName,
const std::vector<std::string>& args) { const std::vector<std::string>& args) {
PyObjectPtr obj = callPythonFuncRetPyObj(moduleName, funcName, args); PyObjectPtr obj = callPythonFuncRetPyObj(moduleName, funcName, args);
#if PY_MAJOR_VERSION >= 3
Py_ssize_t str_size = 0u;
const char* str = PyUnicode_AsUTF8AndSize(obj.get(), &str_size);
return std::string(str, (size_t)str_size);
#else
return std::string(PyString_AsString(obj.get()), PyString_Size(obj.get())); return std::string(PyString_AsString(obj.get()), PyString_Size(obj.get()));
#endif // PY_MAJOR_VERSION >= 3
} }
PyObjectPtr createPythonClass( PyObjectPtr createPythonClass(
......
...@@ -88,6 +88,33 @@ PyObjectPtr createPythonClass(const std::string& moduleName, ...@@ -88,6 +88,33 @@ PyObjectPtr createPythonClass(const std::string& moduleName,
namespace py { namespace py {
PyObjectPtr import(const std::string& moduleName); PyObjectPtr import(const std::string& moduleName);
#if PY_MAJOR_VERSION >= 3
/**
* Cast a PyLong to int type T.
* @tparam T return type.
* @param [in] obj PyLong object.
* @param [out] ok status for casting. False if error occured. nullptr if user
* don't care is ok or not.
* @return The value of python object, or 0 if not ok.
*/
template <typename T>
T castInt(PyObject* obj, bool* ok = nullptr) {
// Refer to https://www.python.org/dev/peps/pep-0237/, the int and long object
// were unified to long since python3
if (PyLong_Check(obj)) {
if (ok) *ok = true;
return (T)PyLong_AsUnsignedLong(obj);
} else {
if (ok) *ok = false;
return (T)0;
}
}
// Convert PyAPI from 2.x to 3.x
#define PyString_FromString PyUnicode_FromString
#define PyString_AsString PyUnicode_AsUTF8
#else
/** /**
* Cast a PyLong or PyInt to int type T. * Cast a PyLong or PyInt to int type T.
* @tparam T return type. * @tparam T return type.
...@@ -109,6 +136,7 @@ T castInt(PyObject* obj, bool* ok = nullptr) { ...@@ -109,6 +136,7 @@ T castInt(PyObject* obj, bool* ok = nullptr) {
return (T)0; return (T)0;
} }
} }
#endif // PY_MAJOR_VERSION >= 3
/** /**
* Invoke repr of python object. * Invoke repr of python object.
......
...@@ -78,6 +78,12 @@ function cmake_gen() { ...@@ -78,6 +78,12 @@ function cmake_gen() {
PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27mu/bin/python PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27mu/bin/python
-DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27mu/include/python2.7 -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27mu/include/python2.7
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs4/lib/libpython2.7.so" -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs4/lib/libpython2.7.so"
elif [ "$1" == "cp35-cp35m" ]; then
export LD_LIBRARY_PATH=/opt/_internal/cpython-3.5.1/lib/:${LD_LIBRARY_PATH}
export PATH=/opt/_internal/cpython-3.5.1/bin/:${PATH}
export PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/_internal/cpython-3.5.1/bin/python3
-DPYTHON_INCLUDE_DIR:PATH=/opt/_internal/cpython-3.5.1/include/python3.5m
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-3.5.1/lib/libpython3.so"
fi fi
fi fi
...@@ -108,6 +114,7 @@ function cmake_gen() { ...@@ -108,6 +114,7 @@ function cmake_gen() {
-DWITH_CONTRIB=${WITH_CONTRIB:-ON} -DWITH_CONTRIB=${WITH_CONTRIB:-ON}
-DWITH_ANAKIN=${WITH_ANAKIN:-OFF} -DWITH_ANAKIN=${WITH_ANAKIN:-OFF}
-DWITH_INFERENCE_DEMO=${WITH_INFERENCE_DEMO:-ON} -DWITH_INFERENCE_DEMO=${WITH_INFERENCE_DEMO:-ON}
-DPY_VERSION=${PY_VERSION:-2.7}
======================================== ========================================
EOF EOF
# Disable UNITTEST_USE_VIRTUALENV in docker because # Disable UNITTEST_USE_VIRTUALENV in docker because
...@@ -136,7 +143,8 @@ EOF ...@@ -136,7 +143,8 @@ EOF
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
-DWITH_CONTRIB=${WITH_CONTRIB:-ON} \ -DWITH_CONTRIB=${WITH_CONTRIB:-ON} \
-DWITH_ANAKIN=${WITH_ANAKIN:-OFF} \ -DWITH_ANAKIN=${WITH_ANAKIN:-OFF} \
-DWITH_INFERENCE_DEMO=${WITH_INFERENCE_DEMO:-ON} -DWITH_INFERENCE_DEMO=${WITH_INFERENCE_DEMO:-ON} \
-DPY_VERSION=${PY_VERSION:-2.7}
} }
function abort(){ function abort(){
......
...@@ -12,16 +12,16 @@ ...@@ -12,16 +12,16 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
try: try:
from version import full_version as __version__ from paddle.version import full_version as __version__
from version import commit as __git_commit__ from paddle.version import commit as __git_commit__
except ImportError: except ImportError:
import sys import sys
sys.stderr.write('''Warning with import paddle: you should not sys.stderr.write('''Warning with import paddle: you should not
import paddle from the source directory; please install paddlepaddle*.whl firstly.''' import paddle from the source directory; please install paddlepaddle*.whl firstly.'''
) )
import reader import paddle.reader
import dataset import paddle.dataset
import batch import paddle.batch
batch = batch.batch batch = batch.batch
...@@ -15,20 +15,20 @@ ...@@ -15,20 +15,20 @@
Dataset package. Dataset package.
""" """
import mnist import paddle.dataset.mnist
import imikolov import paddle.dataset.imikolov
import imdb import paddle.dataset.imdb
import cifar import paddle.dataset.cifar
import movielens import paddle.dataset.movielens
import conll05 import paddle.dataset.conll05
import uci_housing import paddle.dataset.uci_housing
import sentiment import paddle.dataset.sentiment
import wmt14 import paddle.dataset.wmt14
import wmt16 import paddle.dataset.wmt16
import mq2007 import paddle.dataset.mq2007
import flowers import paddle.dataset.flowers
import voc2012 import paddle.dataset.voc2012
import image import paddle.dataset.image
__all__ = [ __all__ = [
'mnist', 'mnist',
......
...@@ -324,10 +324,12 @@ def set_gradient_clip(clip, param_list=None, program=None): ...@@ -324,10 +324,12 @@ def set_gradient_clip(clip, param_list=None, program=None):
param.gradient_clip_attr = copy.deepcopy(clip) param.gradient_clip_attr = copy.deepcopy(clip)
def append_gradient_clip_ops(param_grad): def append_gradient_clip_ops(param_grads):
context = dict() context = dict()
for p, g in param_grad: for p, g in param_grads:
with p.block.program.optimized_guard(p): if g is None:
continue
with p.block.program.optimized_guard([p, g]):
clip_attr = getattr(p, 'gradient_clip_attr', NullGradientClipAttr()) clip_attr = getattr(p, 'gradient_clip_attr', NullGradientClipAttr())
if clip_attr is None: if clip_attr is None:
clip_attr = NullGradientClipAttr() clip_attr = NullGradientClipAttr()
...@@ -339,8 +341,10 @@ def append_gradient_clip_ops(param_grad): ...@@ -339,8 +341,10 @@ def append_gradient_clip_ops(param_grad):
clip_attr._process_context(context=context, param=p, grad=g) clip_attr._process_context(context=context, param=p, grad=g)
res = [] res = []
for p, g in param_grad: for p, g in param_grads:
with p.block.program.optimized_guard(p): if g is None:
continue
with p.block.program.optimized_guard([p, g]):
res.append(clip_attr._create_operators(param=p, grad=g)) res.append(clip_attr._create_operators(param=p, grad=g))
return res return res
......
...@@ -1319,7 +1319,7 @@ class Program(object): ...@@ -1319,7 +1319,7 @@ class Program(object):
self._op_role_var = [var_name] self._op_role_var = [var_name]
@contextlib.contextmanager @contextlib.contextmanager
def optimized_guard(self, var): def optimized_guard(self, param_and_grads):
""" """
A with guard to set :code:`Optimization` :code:`OpRole` and A with guard to set :code:`Optimization` :code:`OpRole` and
:code:`OpRoleVar` automatically. :code:`OpRoleVar` automatically.
...@@ -1327,17 +1327,20 @@ class Program(object): ...@@ -1327,17 +1327,20 @@ class Program(object):
Notes: This is a very low level API. Users should not use it directly. Notes: This is a very low level API. Users should not use it directly.
Args: Args:
var(Variable|str): The variable (name) to be optimized. param_and_grads(list): The variables (names) to be optimized.
Examples: Examples:
>>> p, g = backward(...) >>> p, g = backward(...)
>>> with program.optimized_guard(p): >>> with program.optimized_guard([p,g]):
>>> p = p - 0.001 * g >>> p = p - 0.001 * g
""" """
OpRole = core.op_proto_and_checker_maker.OpRole OpRole = core.op_proto_and_checker_maker.OpRole
self._current_role = OpRole.Optimize self._current_role = OpRole.Optimize
self._op_role_var = [var.name if isinstance(var, Variable) else var] self._op_role_var = [
var.name if isinstance(var, Variable) else var
for var in param_and_grads
]
yield yield
self._op_role_var = [] self._op_role_var = []
self._current_role = OpRole.Forward self._current_role = OpRole.Forward
......
...@@ -123,7 +123,7 @@ class Optimizer(object): ...@@ -123,7 +123,7 @@ class Optimizer(object):
""" """
pass pass
def _finish_update(self, block, parameters): def _finish_update(self, block, parameters_and_grads):
"""Finish any custom updates needed """Finish any custom updates needed
before completing an optimization step before completing an optimization step
...@@ -226,18 +226,18 @@ class Optimizer(object): ...@@ -226,18 +226,18 @@ class Optimizer(object):
optimize_ops = [] optimize_ops = []
for param_and_grad in parameters_and_grads: for param_and_grad in parameters_and_grads:
if param_and_grad[1] is None:
continue
with param_and_grad[0].block.program.optimized_guard( with param_and_grad[0].block.program.optimized_guard(
param_and_grad[0]): param_and_grad):
if param_and_grad[0].trainable is True and param_and_grad[ if param_and_grad[0].trainable is True:
1] is not None:
optimize_op = self._append_optimize_op(loss.block, optimize_op = self._append_optimize_op(loss.block,
param_and_grad) param_and_grad)
optimize_ops.append(optimize_op) optimize_ops.append(optimize_op)
# Get custom finish ops for subclasses # Get custom finish ops for subclasses
# FIXME: Need to fix this once we figure out how to handle dependencies # FIXME: Need to fix this once we figure out how to handle dependencies
self._finish_update(loss.block, self._finish_update(loss.block, parameters_and_grads)
[p[0] for p in parameters_and_grads])
end = len(global_block.ops) end = len(global_block.ops)
return global_block.slice_ops(start, end) return global_block.slice_ops(start, end)
...@@ -564,13 +564,15 @@ class AdamOptimizer(Optimizer): ...@@ -564,13 +564,15 @@ class AdamOptimizer(Optimizer):
return adam_op return adam_op
def _finish_update(self, block, parameters): def _finish_update(self, block, param_and_grads):
"""Update Beta1 and Beta2 Power accumulators """Update Beta1 and Beta2 Power accumulators
""" """
assert isinstance(block, framework.Block) assert isinstance(block, framework.Block)
main_block = block.program.global_block() main_block = block.program.global_block()
for param in parameters: for param, grad in param_and_grads:
with param.block.program.optimized_guard(param): if grad is None:
continue
with param.block.program.optimized_guard([param, grad]):
beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str, beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str,
param) param)
beta2_pow_acc = self._get_accumulator(self._beta2_pow_acc_str, beta2_pow_acc = self._get_accumulator(self._beta2_pow_acc_str,
...@@ -691,13 +693,15 @@ class AdamaxOptimizer(Optimizer): ...@@ -691,13 +693,15 @@ class AdamaxOptimizer(Optimizer):
return adamax_op return adamax_op
def _finish_update(self, block, parameters): def _finish_update(self, block, parameters_and_grads):
"""Update Beta1 Power accumulator """Update Beta1 Power accumulator
""" """
assert isinstance(block, framework.Block) assert isinstance(block, framework.Block)
main_block = block.program.global_block() main_block = block.program.global_block()
for param in parameters: for param, grad in parameters_and_grads:
with param.block.program.optimized_guard(param): if grad is None:
continue
with param.block.program.optimized_guard([param, grad]):
beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str, beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str,
param) param)
main_block.append_op( main_block.append_op(
...@@ -1158,7 +1162,9 @@ class ModelAverage(Optimizer): ...@@ -1158,7 +1162,9 @@ class ModelAverage(Optimizer):
self.params_grads.append((param, grad)) self.params_grads.append((param, grad))
for param, grad in self.params_grads: for param, grad in self.params_grads:
with param.block.program.optimized_guard(param): if grad is None:
continue
with param.block.program.optimized_guard([param, grad]):
self._append_average_accumulate_op(param) self._append_average_accumulate_op(param)
self.apply_program = Program() self.apply_program = Program()
......
...@@ -41,12 +41,11 @@ def append_regularization_ops(parameters_and_grads, regularization=None): ...@@ -41,12 +41,11 @@ def append_regularization_ops(parameters_and_grads, regularization=None):
""" """
params_and_grads = [] params_and_grads = []
for param, grad in parameters_and_grads: for param, grad in parameters_and_grads:
with param.block.program.optimized_guard(param): # If no gradient then we don't need to do anything
# If no gradient then we don't need to do anything if grad is None:
if grad is None: params_and_grads.append((param, grad))
params_and_grads.append((param, grad)) continue
continue with param.block.program.optimized_guard([param, grad]):
regularization_term = None regularization_term = None
if param.regularizer is not None: if param.regularizer is not None:
# Add variable for regularization term in grad block # Add variable for regularization term in grad block
......
...@@ -35,7 +35,7 @@ class TestParallelExecutorBase(unittest.TestCase): ...@@ -35,7 +35,7 @@ class TestParallelExecutorBase(unittest.TestCase):
feed_dict=None, feed_dict=None,
seed=None, seed=None,
use_parallel_executor=True, use_parallel_executor=True,
balance_parameter_opt_between_cards=False): use_reduce=False):
def run_executor(exe, feed, fetch_list, program=None): def run_executor(exe, feed, fetch_list, program=None):
if isinstance(exe, fluid.ParallelExecutor): if isinstance(exe, fluid.ParallelExecutor):
res = exe.run(fetch_list=fetch_list, feed=feed) res = exe.run(fetch_list=fetch_list, feed=feed)
...@@ -50,14 +50,19 @@ class TestParallelExecutorBase(unittest.TestCase): ...@@ -50,14 +50,19 @@ class TestParallelExecutorBase(unittest.TestCase):
main = fluid.Program() main = fluid.Program()
startup = fluid.Program() startup = fluid.Program()
startup.random_seed = 1 # Fix random seed startup.random_seed = 1 # Fix random seed
main.random_seed = 1
with fluid.program_guard(main, startup): with fluid.program_guard(main, startup):
if seed is not None: if seed is not None:
startup.random_seed = seed startup.random_seed = seed
main.random_seed = seed
loss = method(use_feed=feed_dict is not None) loss = method(use_feed=feed_dict is not None)
adam = fluid.optimizer.Adam() adam = fluid.optimizer.Adam()
adam.minimize(loss) adam.minimize(loss)
if memory_opt: if memory_opt:
fluid.memory_optimize(main) fluid.memory_optimize(main)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
startup_exe = fluid.Executor(place) startup_exe = fluid.Executor(place)
startup_exe.run(startup) startup_exe.run(startup)
...@@ -65,7 +70,8 @@ class TestParallelExecutorBase(unittest.TestCase): ...@@ -65,7 +70,8 @@ class TestParallelExecutorBase(unittest.TestCase):
exec_strategy.allow_op_delay = allow_op_delay exec_strategy.allow_op_delay = allow_op_delay
build_strategy = fluid.BuildStrategy() build_strategy = fluid.BuildStrategy()
build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce if balance_parameter_opt_between_cards else fluid.BuildStrategy.ReduceStrategy.AllReduce build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce \
if use_reduce else fluid.BuildStrategy.ReduceStrategy.AllReduce
if use_parallel_executor: if use_parallel_executor:
exe = fluid.ParallelExecutor( exe = fluid.ParallelExecutor(
......
...@@ -97,9 +97,7 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -97,9 +97,7 @@ class TestMNIST(TestParallelExecutorBase):
fluid.recordio_writer.convert_reader_to_recordio_file( fluid.recordio_writer.convert_reader_to_recordio_file(
MNIST_RECORDIO_FILE, reader, feeder) MNIST_RECORDIO_FILE, reader, feeder)
def check_simple_fc_convergence(self, def check_simple_fc_convergence(self, use_cuda, use_reduce=False):
balance_parameter_opt_between_cards,
use_cuda=True):
self.check_network_convergence(simple_fc_net, use_cuda=use_cuda) self.check_network_convergence(simple_fc_net, use_cuda=use_cuda)
self.check_network_convergence( self.check_network_convergence(
simple_fc_net, use_cuda=use_cuda, allow_op_delay=True) simple_fc_net, use_cuda=use_cuda, allow_op_delay=True)
...@@ -111,20 +109,19 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -111,20 +109,19 @@ class TestMNIST(TestParallelExecutorBase):
feed_dict={"image": img, feed_dict={"image": img,
"label": label}, "label": label},
use_cuda=use_cuda, use_cuda=use_cuda,
balance_parameter_opt_between_cards=balance_parameter_opt_between_cards use_reduce=use_reduce)
)
def test_simple_fc(self): def test_simple_fc(self):
self.check_simple_fc_convergence(False, use_cuda=True) # use_cuda
self.check_simple_fc_convergence(False, use_cuda=False) self.check_simple_fc_convergence(True)
self.check_simple_fc_convergence(False)
def test_simple_fc_with_new_strategy(self): def test_simple_fc_with_new_strategy(self):
self.check_simple_fc_convergence(True, use_cuda=True) # use_cuda, use_reduce
self.check_simple_fc_convergence(True, use_cuda=False) self.check_simple_fc_convergence(True, True)
self.check_simple_fc_convergence(False, True)
def check_simple_fc_parallel_accuracy(self, def check_simple_fc_parallel_accuracy(self, use_cuda, use_reduce=False):
balance_parameter_opt_between_cards,
use_cuda=True):
img = np.zeros(shape=[32, 784], dtype='float32') img = np.zeros(shape=[32, 784], dtype='float32')
label = np.ones(shape=[32, 1], dtype='int64') label = np.ones(shape=[32, 1], dtype='int64')
single_first_loss, single_last_loss = self.check_network_convergence( single_first_loss, single_last_loss = self.check_network_convergence(
...@@ -141,8 +138,7 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -141,8 +138,7 @@ class TestMNIST(TestParallelExecutorBase):
"label": label}, "label": label},
use_cuda=use_cuda, use_cuda=use_cuda,
use_parallel_executor=True, use_parallel_executor=True,
balance_parameter_opt_between_cards=balance_parameter_opt_between_cards use_reduce=use_reduce)
)
for p_f in parallel_first_loss: for p_f in parallel_first_loss:
self.assertAlmostEquals(p_f, single_first_loss[0], delta=1e-6) self.assertAlmostEquals(p_f, single_first_loss[0], delta=1e-6)
...@@ -150,15 +146,15 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -150,15 +146,15 @@ class TestMNIST(TestParallelExecutorBase):
self.assertAlmostEquals(p_l, single_last_loss[0], delta=1e-6) self.assertAlmostEquals(p_l, single_last_loss[0], delta=1e-6)
def test_simple_fc_parallel_accuracy(self): def test_simple_fc_parallel_accuracy(self):
self.check_simple_fc_parallel_accuracy(False, use_cuda=True) self.check_simple_fc_parallel_accuracy(True)
self.check_simple_fc_parallel_accuracy(False, use_cuda=False) self.check_simple_fc_parallel_accuracy(False)
def test_simple_fc_parallel_accuracy_with_new_strategy(self): def test_simple_fc_parallel_accuracy_with_new_strategy(self):
self.check_simple_fc_parallel_accuracy(True, use_cuda=True) # use_cuda, use_reduce
self.check_simple_fc_parallel_accuracy(True, use_cuda=False) self.check_simple_fc_parallel_accuracy(True, True)
self.check_simple_fc_parallel_accuracy(False, True)
def check_batchnorm_fc_convergence( def check_batchnorm_fc_convergence(self, use_cuda, use_reduce=False):
self, balance_parameter_opt_between_cards, use_cuda):
self.check_network_convergence(fc_with_batchnorm, use_cuda=use_cuda) self.check_network_convergence(fc_with_batchnorm, use_cuda=use_cuda)
img = np.zeros(shape=[32, 784], dtype='float32') img = np.zeros(shape=[32, 784], dtype='float32')
label = np.ones(shape=[32, 1], dtype='int64') label = np.ones(shape=[32, 1], dtype='int64')
...@@ -167,16 +163,16 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -167,16 +163,16 @@ class TestMNIST(TestParallelExecutorBase):
feed_dict={"image": img, feed_dict={"image": img,
"label": label}, "label": label},
use_cuda=use_cuda, use_cuda=use_cuda,
balance_parameter_opt_between_cards=balance_parameter_opt_between_cards use_reduce=use_reduce)
)
def test_batchnorm_fc(self): def test_batchnorm_fc(self):
self.check_batchnorm_fc_convergence(False, use_cuda=True) self.check_batchnorm_fc_convergence(True)
self.check_batchnorm_fc_convergence(False, use_cuda=False) self.check_batchnorm_fc_convergence(False)
def test_batchnorm_fc_with_new_strategy(self): def test_batchnorm_fc_with_new_strategy(self):
self.check_batchnorm_fc_convergence(True, use_cuda=True) # use_cuda, use_reduce
self.check_batchnorm_fc_convergence(True, use_cuda=False) self.check_batchnorm_fc_convergence(True, True)
self.check_batchnorm_fc_convergence(False, True)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -131,10 +131,7 @@ def SE_ResNeXt50Small(batch_size=2, use_feed=False): ...@@ -131,10 +131,7 @@ def SE_ResNeXt50Small(batch_size=2, use_feed=False):
class TestResnet(TestParallelExecutorBase): class TestResnet(TestParallelExecutorBase):
def check_resnet_convergence(self, def check_resnet_convergence(self, use_cuda, use_reduce=False, iter=20):
balance_parameter_opt_between_cards,
use_cuda=True,
iter=20):
os.environ['CPU_NUM'] = str(4) os.environ['CPU_NUM'] = str(4)
import functools import functools
...@@ -145,16 +142,16 @@ class TestResnet(TestParallelExecutorBase): ...@@ -145,16 +142,16 @@ class TestResnet(TestParallelExecutorBase):
iter=iter, iter=iter,
batch_size=batch_size, batch_size=batch_size,
use_cuda=use_cuda, use_cuda=use_cuda,
balance_parameter_opt_between_cards=balance_parameter_opt_between_cards use_reduce=use_reduce)
)
def test_resnet(self): def test_resnet(self):
self.check_resnet_convergence(False, use_cuda=True) self.check_resnet_convergence(True)
self.check_resnet_convergence(False, use_cuda=False, iter=5) self.check_resnet_convergence(False, iter=5)
def test_resnet_with_new_strategy(self): def test_resnet_with_new_strategy(self):
self.check_resnet_convergence(True, use_cuda=True) # use_cuda, use_reduce
self.check_resnet_convergence(True, use_cuda=False, iter=5) self.check_resnet_convergence(True, True)
self.check_resnet_convergence(False, True, iter=5)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -66,9 +66,9 @@ An example implementation for multiple item data reader creator: ...@@ -66,9 +66,9 @@ An example implementation for multiple item data reader creator:
TODO(yuyang18): Should we add whole design doc here? TODO(yuyang18): Should we add whole design doc here?
""" """
import decorator import paddle.reader.decorator
from decorator import * from paddle.reader.decorator import *
import creator import paddle.reader.creator
__all__ = decorator.__all__ + ['creator'] __all__ = decorator.__all__ + ['creator']
...@@ -20,7 +20,7 @@ __all__ = [ ...@@ -20,7 +20,7 @@ __all__ = [
from threading import Thread from threading import Thread
import subprocess import subprocess
from Queue import Queue from six.moves.queue import Queue
import itertools import itertools
import random import random
import zlib import zlib
......
...@@ -8,4 +8,4 @@ scipy>=0.19.0 ...@@ -8,4 +8,4 @@ scipy>=0.19.0
Pillow Pillow
nltk>=3.2.2 nltk>=3.2.2
graphviz graphviz
LinkChecker six
...@@ -17,7 +17,8 @@ def git_commit(): ...@@ -17,7 +17,8 @@ def git_commit():
git_commit = subprocess.Popen(cmd, stdout = subprocess.PIPE).communicate()[0].strip() git_commit = subprocess.Popen(cmd, stdout = subprocess.PIPE).communicate()[0].strip()
except: except:
git_commit = 'Unknown' git_commit = 'Unknown'
return git_commit git_commit = git_commit.decode()
return str(git_commit)
def _get_version_detail(idx): def _get_version_detail(idx):
assert idx < 3, "vesion info consists of %(major)d.%(minor)d.%(patch)d, \ assert idx < 3, "vesion info consists of %(major)d.%(minor)d.%(patch)d, \
...@@ -44,6 +45,7 @@ def is_taged(): ...@@ -44,6 +45,7 @@ def is_taged():
try: try:
cmd = ['git', 'describe', '--exact-match', '--tags', 'HEAD', '2>/dev/null'] cmd = ['git', 'describe', '--exact-match', '--tags', 'HEAD', '2>/dev/null']
git_tag = subprocess.Popen(cmd, stdout = subprocess.PIPE).communicate()[0].strip() git_tag = subprocess.Popen(cmd, stdout = subprocess.PIPE).communicate()[0].strip()
git_tag = git_tag.decode()
except: except:
return False return False
...@@ -67,13 +69,13 @@ with_mkl = '%(with_mkl)s' ...@@ -67,13 +69,13 @@ with_mkl = '%(with_mkl)s'
def show(): def show():
if istaged: if istaged:
print 'full_version:', full_version print('full_version:', full_version)
print 'major:', major print('major:', major)
print 'minor:', minor print('minor:', minor)
print 'patch:', patch print('patch:', patch)
print 'rc:', rc print('rc:', rc)
else: else:
print 'commit:', commit print('commit:', commit)
def mkl(): def mkl():
return with_mkl return with_mkl
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册