Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into acc_image_proc

8c9a9677 · dangqingqing · 9d72cab0 · 31e57175 · 8c9a9677 · 8c9a9677
7 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 2.8)
 project(paddle CXX C)
 set(PADDLE_MAJOR_VERSION 0)
 set(PADDLE_MINOR_VERSION 9)
-set(PADDLE_PATCH_VERSION 0a0)
+set(PADDLE_PATCH_VERSION 0)
 set(PADDLE_VERSION ${PADDLE_MAJOR_VERSION}.${PADDLE_MINOR_VERSION}.${PADDLE_PATCH_VERSION})
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")

--- a/RELEASE.md
+++ b/RELEASE.md
+# Release v0.9.0
+## New Features:
+* New Layers
+  * bilinear interpolation layer.
+  * spatial pyramid-pool layer.
+  * de-convolution layer.
+  * maxout layer.
+* Support rectangle padding, stride, window and input for Pooling Operation.
+* Add —job=time in trainer, which can be used to print time info without compiler option -WITH_TIMER=ON.
+* Expose cost_weight/nce_layer in `trainer_config_helpers`
+* Add FAQ, concepts, h-rnn docs.
+* Add Bidi-LSTM and DB-LSTM to quick start demo @alvations
+* Add usage track scripts.
+## Improvements
+* Add Travis-CI for Mac OS X. Enable swig unittest in Travis-CI. Skip Travis-CI when only docs are changed.
+* Add code coverage tools.
+* Refine convolution layer to speedup and reduce GPU memory.
+* Speed up PyDataProvider2
+* Add ubuntu deb package build scripts.
+* Make Paddle use git-flow branching model.
+* PServer support no parameter blocks.
+## Bug Fixes
+* add zlib link to py_paddle
+* add input sparse data check for sparse layer at runtime
+* Bug fix for sparse matrix multiplication
+* Fix floating-point overflow problem of tanh
+* Fix some nvcc compile options
+* Fix a bug in yield dictionary in DataProvider
+* Fix SRL hang when exit.
+# Release v0.8.0beta.1
+New features:
+* Mac OSX is supported by source code. #138
+   * Both GPU and CPU versions of PaddlePaddle are supported.
+* Support CUDA 8.0
+* Enhance `PyDataProvider2`
+   * Add dictionary yield format. `PyDataProvider2` can yield a dictionary with key is data_layer's name, value is features.
+   * Add `min_pool_size` to control memory pool in provider.
+* Add `deb` install package & docker image for no_avx machines.
+   * Especially for cloud computing and virtual machines
+* Automatically disable `avx` instructions in cmake when machine's CPU don't support `avx` instructions.
+* Add Parallel NN api in trainer_config_helpers.
+* Add `travis ci` for Github
+Bug fixes:
+* Several bugs in trainer_config_helpers. Also complete the unittest for trainer_config_helpers
+* Check if PaddlePaddle is installed when unittest.
+* Fix bugs in GTX series GPU
+* Fix bug in MultinomialSampler
+Also more documentation was written since last release.
+# Release v0.8.0beta.0
+PaddlePaddle v0.8.0beta.0 release. The install package is not stable yet and it's a pre-release version.
--- a/paddle/scripts/docker/Dockerfile.m4
+++ b/paddle/scripts/docker/Dockerfile.m4
 FROM PADDLE_BASE_IMAGE
 MAINTAINER PaddlePaddle Dev Team <paddle-dev@baidu.com>
 COPY build.sh /root/
-ENV GIT_CHECKOUT=v0.9.0a0
+ENV GIT_CHECKOUT=v0.9.0
 ENV WITH_GPU=PADDLE_WITH_GPU
 ENV IS_DEVEL=PADDLE_IS_DEVEL
 ENV WITH_DEMO=PADDLE_WITH_DEMO

--- a/plugin/opencv/CMakeLists.txt
+++ b/plugin/opencv/CMakeLists.txt
@@ -32,8 +32,13 @@ list(APPEND DEJPEG_LINKER_LIBS ${Boost_LIBRARIES})
 file(GLOB DEJPEG_HEADER "${CMAKE_CURRENT_SOURCE_DIR}" "*.h")
 file(GLOB DEJPEG_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}" "*.cpp")
-set(CMAKE_CXX_FLAGS "-std=c++11 -O3 -fPIC -Wno-unused-parameter")
+set(BUILD_PRIVATE_FLAGS
+    -Wno-all
+    -Wno-error
+    -Wno-non-virtual-dtor
+    -Wno-delete-non-virtual-dtor)
 add_library(DeJpeg SHARED ${DEJPEG_SOURCES})
+target_compile_options(DeJpeg BEFORE PRIVATE ${BUILD_PRIVATE_FLAGS})
 target_link_libraries(DeJpeg ${DEJPEG_LINKER_LIBS})
 set_target_properties(DeJpeg PROPERTIES PREFIX "")
--- a/plugin/opencv/DataTransformer.cpp
+++ b/plugin/opencv/DataTransformer.cpp
@@ -50,7 +50,7 @@ DataTransformer::DataTransformer(int threadNum,
    prefetchFree_.enqueue(prefetch_[i]);
  }
-  numThreads_ = 12;
+  numThreads_ = threadNum;
  syncThreadPool_.reset(new SyncThreadPool(numThreads_, false));
 }
@@ -154,7 +154,7 @@ void DataTransformer::transform(Mat& cvImgOri, float* target) {
 void DataTransformer::start(vector<char*>& data, int* datalen, int* labels) {
  auto job = [&](int tid, int numThreads) {
-    for (int i = tid; i < data.size(); i += numThreads) {
+    for (size_t i = tid; i < data.size(); i += numThreads) {
      DataTypePtr ret = prefetchFree_.dequeue();
      char* buf = data[i];
      int size = datalen[i];

--- a/plugin/opencv/DataTransformer.h
+++ b/plugin/opencv/DataTransformer.h
@@ -14,7 +14,6 @@ limitations under the License. */
 #include <iostream>
 #include <fstream>
-// #define OPENCV_CAN_BREAK_BINARY_COMPATIBILITY
 #include <opencv2/opencv.hpp>
 #include <vector>
 #include <string>
@@ -22,7 +21,6 @@ limitations under the License. */
 #include "paddle/utils/Thread.h"
-using namespace std;
 using namespace cv;
 using namespace paddle;
@@ -113,7 +111,7 @@ private:
   */
  int Rand(int min, int max);
-  typedef pair<float*, int> DataType;
+  typedef std::pair<float*, int> DataType;
  typedef std::shared_ptr<DataType> DataTypePtr;
  std::vector<DataTypePtr> prefetch_;
  std::unique_ptr<SyncThreadPool> syncThreadPool_;

--- a/plugin/opencv/PyDecodejpeg.cpp
+++ b/plugin/opencv/PyDecodejpeg.cpp
@@ -19,13 +19,11 @@ limitations under the License. */
 #include <unistd.h>
 #include <glog/logging.h>
 #include <numpy/arrayobject.h>
 #include <boost/python.hpp>
 #include "DataTransformer.h"
 using namespace boost::python;
-using namespace std;
 /**
 * DecodeJpeg is an image processing API for interfacing Python and C++
@@ -37,7 +35,7 @@ using namespace std;
 class DecodeJpeg {
 public:
  /**
-   * The constructor will create and nitialize an object of DataTransformer.
+   * The constructor will create and initialize an object of DataTransformer.
   */
  DecodeJpeg(int threadNum,
             int capacity,
@@ -56,11 +54,11 @@ public:
        LOG(FATAL) << "Object is not a numpy array";
      }
      pyTypeCheck(meanValues);
-      int size = PyArray_SIZE(meanValues);
+      int size = PyArray_SIZE(reinterpret_cast<PyArrayObject*>(meanValues));
      isChannelMean = (size == channel) ? true : false;
      isEltMean = (size == channel * cropSizeH * cropSizeW) ? true : false;
      CHECK(isChannelMean != isEltMean);
-      mean = (float*)PyArray_DATA(meanValues);
+      mean = (float*)PyArray_DATA(reinterpret_cast<PyArrayObject*>(meanValues));
    }
    tfhandlerPtr_ = std::make_shared<DataTransformer>(threadNum,
                                                      capacity,
@@ -91,8 +89,9 @@ public:
      char* src = boost::python::extract<char*>(pysrc[t]);
      data.push_back(src);
    }
-    int* dlen = (int*)PyArray_DATA(pydlen);
+    int* dlen = (int*)PyArray_DATA(reinterpret_cast<PyArrayObject*>(pydlen));
-    int* dlabels = (int*)PyArray_DATA(pylabel);
+    int* dlabels =
+        (int*)PyArray_DATA(reinterpret_cast<PyArrayObject*>(pylabel));
    tfhandlerPtr_->start(data, dlen, dlabels);
  }
@@ -106,8 +105,8 @@ public:
    pyWritableCheck(pylab);
    pyContinuousCheck(pytrg);
    pyContinuousCheck(pylab);
-    float* data = (float*)PyArray_DATA(pytrg);
+    float* data = (float*)PyArray_DATA(reinterpret_cast<PyArrayObject*>(pytrg));
-    int* label = (int*)PyArray_DATA(pylab);
+    int* label = (int*)PyArray_DATA(reinterpret_cast<PyArrayObject*>(pylab));
    tfhandlerPtr_->obtain(data, label);
  }
@@ -121,8 +120,8 @@ private:
  /**
   * @brief Check whether the type of PyObject is valid or not.
   */
-  void pyTypeCheck(const PyObject* o) {
+  void pyTypeCheck(PyObject* o) {
-    int typenum = PyArray_TYPE(o);
+    int typenum = PyArray_TYPE(reinterpret_cast<PyArrayObject*>(o));
    // clang-format off
    int type =
@@ -143,13 +142,17 @@ private:
  /**
   * @brief Check whether the PyObject is writable or not.
   */
-  void pyWritableCheck(PyObject* o) { CHECK(PyArray_ISWRITEABLE(o)); }
+  void pyWritableCheck(PyObject* o) {
+    CHECK(PyArray_ISWRITEABLE(reinterpret_cast<PyArrayObject*>(o)));
+  }
  /**
   * @brief Check whether the PyObject is c-contiguous or not.
   */
-  void pyContinuousCheck(PyObject* o) { CHECK(PyArray_IS_C_CONTIGUOUS(o)); }
+  void pyContinuousCheck(PyObject* o) {
-};
+    CHECK(PyArray_IS_C_CONTIGUOUS(reinterpret_cast<PyArrayObject*>(o)));
+  }
+};  // DecodeJpeg
 /**
 * @brief Initialize the Python interpreter and numpy.