提交 b72beee4 编写于 作者: Z zhangjinchao01

fix dash and space bug,

ISSUE=4586495

git-svn-id: https://svn.baidu.com/idl/trunk/paddle@1408 1ad973e4-5ce8-4261-8a94-b56d1f490c56
上级
# This file is used by clang-format to autoformat paddle source code
#
# The clang-format is part of llvm toolchain.
# It need to install llvm and clang to format source code style.
#
# The basic usage is,
# clang-format -i -style=file PATH/TO/SOURCE/CODE
#
# The -style=file implicit use ".clang-format" file located in one of
# parent directory.
# The -i means inplace change.
#
# The document of clang-format is
# http://clang.llvm.org/docs/ClangFormat.html
# http://clang.llvm.org/docs/ClangFormatStyleOptions.html
#
# TODO(yuyang18): Add python and other language code style
---
Language: Cpp
BasedOnStyle: Google
IndentWidth: 2
TabWidth: 2
ContinuationIndentWidth: 4
AccessModifierOffset: -2 # The private/protected/public has no indent in class
PointerAlignment: Left # int* p/int& p, not int *p/int &p
Standard: Cpp11
AllowAllParametersOfDeclarationOnNextLine: true
...
*.DS_Store
build/
cmake_minimum_required(VERSION 2.8)
project(paddle CXX C)
set(PADDLE_MAJOR_VERSION 0)
set(PADDLE_MINOR_VERSION 8)
set(PADDLE_PATCH_VERSION 0b)
set(PADDLE_VERSION ${PADDLE_MAJOR_VERSION}.${PADDLE_MINOR_VERSION}.${PADDLE_PATCH_VERSION})
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
set(PROJ_ROOT ${CMAKE_SOURCE_DIR})
include(package)
include(swig)
find_package(CUDA QUIET)
find_package(Protobuf REQUIRED)
find_package(PythonLibs 2.7 REQUIRED)
find_package(PythonInterp 2.7 REQUIRED)
find_package(NumPy)
find_package(Threads REQUIRED)
find_package(Glog)
find_package(Gflags QUIET)
find_package(GTest)
find_package(Sphinx)
find_package(Doxygen)
include(cblas)
find_program(M4_EXECUTABLE m4)
###################### Configurations ###########################
option(WITH_DSO "Compile PaddlePaddle with dynamic linked libraries" ON)
option(WITH_GPU "Compile PaddlePaddle with gpu" ${CUDA_FOUND})
option(WITH_DOUBLE "Compile PaddlePaddle with double precision, otherwise use single precision" OFF)
option(WITH_AVX "Compile PaddlePaddle with avx instructs" ON) # TODO(yuyang18): Check AVX is supported or not as default value
option(WITH_PYTHON "Compile PaddlePaddle with python interpretor" ON)
option(WITH_STYLE_CHECK "Style Check for PaddlePaddle" ${PYTHONINTERP_FOUND})
option(WITH_RDMA "Compile PaddlePaddle with rdma support" OFF)
option(WITH_GLOG "Compile PaddlePaddle use glog, otherwise use a log implement internally" ${LIBGLOG_FOUND})
option(WITH_GFLAGS "Compile PaddlePaddle use gflags, otherwise use a flag implement internally" ${GFLAGS_FOUND})
option(WITH_TIMER "Compile PaddlePaddle use timer" OFF)
option(WITH_TESTING "Compile and run unittest for PaddlePaddle" ${GTEST_FOUND})
option(WITH_DOC "Compile PaddlePaddle with documentation" OFF)
option(WITH_DOC_CN "Compile PaddlePaddle with Chinese documentation" OFF)
option(WITH_SWIG_PY "Compile PaddlePaddle with py PaddlePaddle predict api" ${SWIG_FOUND})
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
FORCE)
endif()
include(enableCXX11)
include(cpplint)
include(ccache)
include(util)
include(flags)
include(cudnn)
include(FindPythonModule)
include(check_packages)
# add PaddlePaddle version
if(DEFINED ENV{PADDLE_VERSION})
add_definitions(-DPADDLE_VERSION=\"$ENV{PADDLE_VERSION}\")
else()
if(EXISTS ${PROJ_ROOT}/.svn/)
find_package(Subversion REQUIRED)
if(SUBVERSION_FOUND)
Subversion_WC_INFO(${PROJ_ROOT} Project)
add_definitions(-DPADDLE_VERSION=${Project_WC_REVISION})
endif()
endif()
endif()
if(NOT WITH_GPU)
add_definitions(-DPADDLE_ONLY_CPU)
add_definitions(-DHPPL_STUB_FUNC)
list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
else()
# TODO(yuyang18): Change it to remove std=c++11 in cuda compile.
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
if(NOT CUDNN_FOUND)
message(FATAL_ERROR "Paddle need cudnn to compile")
endif()
if(WITH_DSO)
set(CUDA_LIBRARIES "")
add_definitions(-DPADDLE_USE_DSO)
endif(WITH_DSO)
# Include cuda and cudnn
include_directories(${CUDNN_INCLUDE_DIR})
include_directories(${CUDA_TOOLKIT_INCLUDE})
endif(NOT WITH_GPU)
if(WITH_DOUBLE)
add_definitions(-DPADDLE_TYPE_DOUBLE -DHPPL_TYPE_DOUBLE)
set(ACCURACY double)
else(WITH_DOUBLE)
set(ACCURACY float)
endif(WITH_DOUBLE)
if(NOT WITH_TIMER)
add_definitions(-DPADDLE_DISABLE_TIMER)
endif(NOT WITH_TIMER)
if(WITH_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx")
else(WITH_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse3")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3")
endif(WITH_AVX)
if(WITH_PYTHON)
include_directories(${PYTHON_INCLUDE_DIR})
include_directories(${PYTHON_NUMPY_INCLUDE_DIR})
else(WITH_PYTHON)
add_definitions(-DPADDLE_NO_PYTHON)
endif(WITH_PYTHON)
if(NOT WITH_RDMA)
add_definitions(-DPADDLE_DISABLE_RDMA)
endif()
if(WITH_GLOG)
add_definitions(-DPADDLE_USE_GLOG)
endif()
if(WITH_GFLAGS)
add_definitions(-DPADDLE_USE_GFLAGS)
add_definitions(-DGFLAGS_NS=${GFLAGS_NAMESPACE})
include_directories(${GFLAGS_INCLUDE_DIRS})
endif()
if(WITH_TESTING)
enable_testing()
include_directories(${GTEST_INCLUDE_DIRS})
endif()
include_directories("${CBLAS_INC_DIR}")
include_directories("${PROJ_ROOT}")
include_directories("${PROJ_ROOT}/paddle/cuda/include")
include_directories(${PROTOBUF_INCLUDE_DIRS})
include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto")
if(EXISTS "${PROJ_ROOT}/paddle/internals/CMakeLists.txt")
set(PADDLE_WITH_INTERNAL ON)
include(paddle/internals/CMakeLists.txt)
else()
set(PADDLE_WITH_INTERNAL OFF)
set(INTERNAL_PROTO_PATH "")
endif()
add_subdirectory(proto)
add_subdirectory(paddle)
add_subdirectory(python)
if(WITH_DOC)
add_subdirectory(doc)
endif()
if(WITH_DOC_CN)
add_subdirectory(doc_cn)
endif()
Copyright (c) 2016 Baidu, Inc. All Rights Reserved
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
# PaddlePaddle
[![Documentation Status](https://readthedocs.org/projects/ctcspeechrecognition/badge/?version=latest)](http://ctcspeechrecognition.readthedocs.io/en/latest/?badge=latest)
PaddlePaddle (PArallel Distributed Deep LEarning) is an easy-to-use,
efficient, flexible and scalable deep learning platform, which is originally
developed by Baidu scientists and engineers for the purpose of applying deep
learning to many products at Baidu.
## Features
- **Flexibility**
PaddlePaddle supports a wide range of neural network architectures and
optimization algorithms. It is easy to configure complex models such as
neural machine translation model with attention mechanism or complex memory
connection.
- **Efficiency**
In order to unleash the power of heterogeneous computing resource,
optimization occurs at different levels of PaddlePaddle, including
computing, memory, architecture and communication. The following are some
examples:
1. Optimized math operations through SSE/AVX intrinsics, BLAS libraries
(e.g. MKL, ATLAS, cuBLAS) or customized CPU/GPU kernels.
2. Highly optimized recurrent networks which can handle **variable-length**
sequence without padding.
3. Optimized local and distributed training for models with high dimensional
sparse data.
- **Scalability**
With PaddlePaddle, it is easy to use many CPUs/GPUs and machines to speed
up your training. PaddlePaddle can achieve high throughput and performance
via optimized communication.
- **Connected to Products**
In addition, PaddlePaddle is also designed to be easily deployable. At Baidu,
PaddlePaddle has been deployed into products or service with a vast number
of users, including ad click-through rate (CTR) prediction, large-scale image
classification, optical character recognition(OCR), search ranking, computer
virus detection, recommendation, etc. It is widely utilized in products at
Baidu and it has achieved a significant impact. We hope you can also exploit
the capability of PaddlePaddle to make a huge impact for your product.
## Installation
See [installation guide]() to build and install from the source code or install
the Docker Image.
## Documentation
- [Quick Start]() <br>
You can follow the quick start tutorial to learn how use PaddlePaddle
step-by-step.
- [Example and Demo]() <br>
We provide five demos, including: image classification, sentiment analysis,
sequence to sequence model, recommendation, semantic role labelling.
- [Distributed Training]() <br>
This system supports training deep learning models on multiple machines
with data parallelism.
- [Python API]() <br>
PaddlePaddle supports using either Python interface or C++ to build your
system. We also use SWIG to wrap C++ source code to create a user friendly
interface for Python. You can also use SWIG to create interface for your
favorite programming language.
- [How to Contribute]() <br>
We sincerely appreciate your interest and contributions. If you’d like to
contribute, please read the contribution guide.
- [Source Code Documents]() <br>
## Ask Questions
If you want to ask questions and discuss about methods and models, welcome
to send email to paddle-dev@baidu.com. Framework development discussions and
bug reports are collected on [Issues](https://github.com/paddle/paddle/issues).
## Copyright and License
PaddlePaddle is provided under the [Apache-2.0 license](LICENSE).
Cao, Ying
Cheng, Yujuan
Dang, Qingqing
Dong, Tengfei
Du, Dalong
Feng, Shouqiang
Gao, Haoyuan
Han, Baochang
Han, Jinchen
Hao, Nanyu
He, Daoyuan
He, Zhengyan
Hou, Jue
Huang, Chang
Huang, Zhiheng
Hu, Na
Kong, Qi
Liao, Gang
Li, Bo
Li, Jiajie
Li, Jing
Li, Lei
Li, Peng
Liu, Sheng
Liu, Yuan
Li, Yuze
Luo, Heng
Luo, Tao
Lyu, Qin
Mao, Hongyue
Qian, Xiaojun
Qi, Jun
Qin, Duohao
Shen, Guolong
Shi, Guangchuan
Song, Xiang
Wang, Jiang
Wang, Yanfei
Wang, Yong
Weng, Renliang
Xu, Tianbing
Xu, Wei
Xu, Xingyu
Yan, Chong
Yan, Chunwei
Yang, Yi
Yu, Yang
Yu, Yinan
Zhang, Jian
Zhang, Ruiqing
Zhang, Weide
Zhao, Liang
Zhou, Jie
此差异已折叠。
#
# Find libglog
#
# LIBGLOG_INCLUDE_DIR - where to find glog/logging.h, etc.
# LIBGLOG_LIBRARY - List of libraries when using libglog.
# LIBGLOG_FOUND - True if libglog found.
#
# from https://github.com/facebook/hhvm/blob/master/CMake/FindGlog.cmake
IF (LIBGLOG_INCLUDE_DIR)
# Already in cache, be silent
SET(LIBGLOG_FIND_QUIETLY TRUE)
ENDIF ()
FIND_PATH(LIBGLOG_INCLUDE_DIR glog/logging.h)
FIND_LIBRARY(LIBGLOG_LIBRARY glog)
# handle the QUIETLY and REQUIRED arguments and set LIBGLOG_FOUND to TRUE if
# all listed variables are TRUE
INCLUDE(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(LIBGLOG DEFAULT_MSG LIBGLOG_LIBRARY LIBGLOG_INCLUDE_DIR)
MARK_AS_ADVANCED(LIBGLOG_LIBRARY LIBGLOG_INCLUDE_DIR)
\ No newline at end of file
# Find the Python NumPy package
# PYTHON_NUMPY_INCLUDE_DIR
# NUMPY_FOUND
# will be set by this script
cmake_minimum_required(VERSION 2.6)
if(NOT PYTHON_EXECUTABLE)
if(NumPy_FIND_QUIETLY)
find_package(PythonInterp QUIET)
else()
find_package(PythonInterp)
set(_numpy_out 1)
endif()
endif()
if (PYTHON_EXECUTABLE)
# write a python script that finds the numpy path
file(WRITE ${PROJECT_BINARY_DIR}/FindNumpyPath.py
"try: import numpy; print(numpy.get_include())\nexcept:pass\n")
# execute the find script
exec_program("${PYTHON_EXECUTABLE}" ${PROJECT_BINARY_DIR}
ARGS "FindNumpyPath.py"
OUTPUT_VARIABLE NUMPY_PATH)
elseif(_numpy_out)
message(STATUS "Python executable not found.")
endif(PYTHON_EXECUTABLE)
find_path(PYTHON_NUMPY_INCLUDE_DIR numpy/arrayobject.h
HINTS "${NUMPY_PATH}" "${PYTHON_INCLUDE_PATH}")
if(PYTHON_NUMPY_INCLUDE_DIR)
set(PYTHON_NUMPY_FOUND 1 CACHE INTERNAL "Python numpy found")
endif(PYTHON_NUMPY_INCLUDE_DIR)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(NumPy DEFAULT_MSG PYTHON_NUMPY_INCLUDE_DIR)
# Find if a Python module is installed
# Found at http://www.cmake.org/pipermail/cmake/2011-January/041666.html
# To use do: find_python_module(PyQt4 REQUIRED)
function(find_python_module module)
string(TOUPPER ${module} module_upper)
if(NOT PY_${module_upper})
if(ARGC GREATER 1 AND ARGV1 STREQUAL "REQUIRED")
set(${module}_FIND_REQUIRED TRUE)
else()
set(${module}_FIND_REQUIRED FALSE)
endif()
# A module's location is usually a directory, but for binary modules
# it's a .so file.
execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
"import re, ${module}; print(re.compile('/__init__.py.*').sub('',${module}.__file__))"
RESULT_VARIABLE _${module}_status
OUTPUT_VARIABLE _${module}_location
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT _${module}_status)
set(PY_${module_upper} ${_${module}_location} CACHE STRING
"Location of Python module ${module}")
endif(NOT _${module}_status)
endif(NOT PY_${module_upper})
find_package_handle_standard_args(PY_${module} DEFAULT_MSG PY_${module_upper})
if(NOT PY_${module_upper}_FOUND AND ${module}_FIND_REQUIRED)
message(FATAL_ERROR "python module ${module} is not found")
endif()
set(PY_${module_upper}_FOUND ${PY_${module_upper}_FOUND} PARENT_SCOPE)
endfunction(find_python_module)
# - This module looks for Sphinx
# Find the Sphinx documentation generator
#
# This modules defines
# SPHINX_EXECUTABLE
# SPHINX_FOUND
find_program(SPHINX_EXECUTABLE
NAMES sphinx-build
PATHS
/usr/bin
/usr/local/bin
/opt/local/bin
DOC "Sphinx documentation generator"
)
if( NOT SPHINX_EXECUTABLE )
set(_Python_VERSIONS
2.7 2.6 2.5 2.4 2.3 2.2 2.1 2.0 1.6 1.5
)
foreach( _version ${_Python_VERSIONS} )
set( _sphinx_NAMES sphinx-build-${_version} )
find_program( SPHINX_EXECUTABLE
NAMES ${_sphinx_NAMES}
PATHS
/usr/bin
/usr/local/bin
/opt/loca/bin
DOC "Sphinx documentation generator"
)
endforeach()
endif()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Sphinx DEFAULT_MSG
SPHINX_EXECUTABLE
)
option( SPHINX_HTML_OUTPUT "Build a single HTML with the whole content." ON )
option( SPHINX_DIRHTML_OUTPUT "Build HTML pages, but with a single directory per document." OFF )
option( SPHINX_HTMLHELP_OUTPUT "Build HTML pages with additional information for building a documentation collection in htmlhelp." OFF )
option( SPHINX_QTHELP_OUTPUT "Build HTML pages with additional information for building a documentation collection in qthelp." OFF )
option( SPHINX_DEVHELP_OUTPUT "Build HTML pages with additional information for building a documentation collection in devhelp." OFF )
option( SPHINX_EPUB_OUTPUT "Build HTML pages with additional information for building a documentation collection in epub." OFF )
option( SPHINX_LATEX_OUTPUT "Build LaTeX sources that can be compiled to a PDF document using pdflatex." OFF )
option( SPHINX_MAN_OUTPUT "Build manual pages in groff format for UNIX systems." OFF )
option( SPHINX_TEXT_OUTPUT "Build plain text files." OFF )
mark_as_advanced(
SPHINX_EXECUTABLE
SPHINX_HTML_OUTPUT
SPHINX_DIRHTML_OUTPUT
SPHINX_HTMLHELP_OUTPUT
SPHINX_QTHELP_OUTPUT
SPHINX_DEVHELP_OUTPUT
SPHINX_EPUB_OUTPUT
SPHINX_LATEX_OUTPUT
SPHINX_MAN_OUTPUT
SPHINX_TEXT_OUTPUT
)
function( Sphinx_add_target target_name builder conf cache source destination )
add_custom_target( ${target_name} ALL
COMMAND ${SPHINX_EXECUTABLE} -b ${builder}
-d ${cache}
-c ${conf}
${source}
${destination}
COMMENT "Generating sphinx documentation: ${builder}"
)
set_property(
DIRECTORY APPEND PROPERTY
ADDITIONAL_MAKE_CLEAN_FILES
${destination}
)
endfunction()
# Target dependencies can be optionally listed at the end.
function( Sphinx_add_targets target_base_name conf source base_destination )
set( _dependencies )
foreach( arg IN LISTS ARGN )
set( _dependencies ${_dependencies} ${arg} )
endforeach()
if( ${SPHINX_HTML_OUTPUT} )
Sphinx_add_target( ${target_base_name}_html html ${conf} ${source} ${base_destination}/html )
add_dependencies( ${target_base_name}_html ${_dependencies} )
endif()
if( ${SPHINX_DIRHTML_OUTPUT} )
Sphinx_add_target( ${target_base_name}_dirhtml dirhtml ${conf} ${source} ${base_destination}/dirhtml )
add_dependencies( ${target_base_name}_dirhtml ${_dependencies} )
endif()
if( ${SPHINX_QTHELP_OUTPUT} )
Sphinx_add_target( ${target_base_name}_qthelp qthelp ${conf} ${source} ${base_destination}/qthelp )
add_dependencies( ${target_base_name}_qthelp ${_dependencies} )
endif()
if( ${SPHINX_DEVHELP_OUTPUT} )
Sphinx_add_target( ${target_base_name}_devhelp devhelp ${conf} ${source} ${base_destination}/devhelp )
add_dependencies( ${target_base_name}_devhelp ${_dependencies} )
endif()
if( ${SPHINX_EPUB_OUTPUT} )
Sphinx_add_target( ${target_base_name}_epub epub ${conf} ${source} ${base_destination}/epub )
add_dependencies( ${target_base_name}_epub ${_dependencies} )
endif()
if( ${SPHINX_LATEX_OUTPUT} )
Sphinx_add_target( ${target_base_name}_latex latex ${conf} ${source} ${base_destination}/latex )
add_dependencies( ${target_base_name}_latex ${_dependencies} )
endif()
if( ${SPHINX_MAN_OUTPUT} )
Sphinx_add_target( ${target_base_name}_man man ${conf} ${source} ${base_destination}/man )
add_dependencies( ${target_base_name}_man ${_dependencies} )
endif()
if( ${SPHINX_TEXT_OUTPUT} )
Sphinx_add_target( ${target_base_name}_text text ${conf} ${source} ${base_destination}/text )
add_dependencies( ${target_base_name}_text ${_dependencies} )
endif()
if( ${BUILD_TESTING} )
sphinx_add_target( ${target_base_name}_linkcheck linkcheck ${conf} ${source} ${base_destination}/linkcheck )
add_dependencies( ${target_base_name}_linkcheck ${_dependencies} )
endif()
endfunction()
\ No newline at end of file
# Find the CBlas libraries
#
# It will search MKL, atlas, OpenBlas, reference-cblas in order.
#
# If any cblas implementation found, the following variable will be set.
# CBLAS_PROVIDER # one of MKL, ATLAS, OPENBLAS, REFERENCE
# CBLAS_INC_DIR # the include directory for cblas.
# CBLAS_LIBS # a list of libraries should be linked by paddle.
# # Each library should be full path to object file.
#
# User should set one of MKL_ROOT, ATLAS_ROOT, OPENBLAS_ROOT, REFERENCE_CBLAS_ROOT
# during cmake. If none of them set, it will try to find cblas implementation in
# system paths.
#
## Find MKL First.
set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains MKL")
find_path(MKL_INCLUDE_DIR mkl.h PATHS ${MKL_ROOT}/include)
find_library(MKL_CORE_LIB NAMES mkl_core PATHS ${MKL_ROOT}/lib)
find_library(MKL_SEQUENTIAL_LIB NAMES mkl_sequential PATHS ${MKL_ROOT}/lib)
find_library(MKL_INTEL_LP64 NAMES mkl_intel_lp64 PATHS ${MKL_ROOT}/lib)
if(MKL_INCLUDE_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
set(CBLAS_PROVIDER MKL)
set(CBLAS_INC_DIR ${MKL_INCLUDE_DIR})
set(CBLAS_LIBS ${MKL_INTEL_LP64}
${MKL_SEQUENTIAL_LIB}
${MKL_CORE_LIB})
add_definitions(-DPADDLE_USE_MKL)
return() # return file.
endif()
## Then find atlas.
set(ATLAS_ROOT $ENV{ATLAS_ROOT} CACHE PATH "Folder contains Atlas")
set(ATLAS_INCLUDE_SEARCH_PATHS
${ATLAS_ROOT}/include
/usr/include
/usr/include/atlas)
set(ATLAS_LIB_SEARCH_PATHS
${ATLAS_ROOT}/lib
/usr/lib
/usr/lib/blas/atlas
/usr/lib/atlas
/usr/lib/atlas-base # special for ubuntu 14.04.
)
find_path(ATLAS_INC_DIR NAMES cblas.h
PATHS ${ATLAS_INCLUDE_SEARCH_PATHS})
find_library(ATLAS_CBLAS_LIB NAMES cblas libcblas.so.3
PATHS ${ATLAS_LIB_SEARCH_PATHS})
find_library(ATLAS_LIB NAMES atlas libatlas.so.3
PATHS ${ATLAS_LIB_SEARCH_PATHS})
if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB)
set(CBLAS_PROVIDER ATLAS)
set(CBLAS_INC_DIR ${ATLAS_INC_DIR})
set(CBLAS_LIBS ${ATLAS_LIB} ${ATLAS_CBLAS_LIB})
return()
endif()
## Then find openblas.
set(OPENBLAS_ROOT $ENV{OPENBLAS_ROOT} CACHE PATH "Folder contains Openblas")
set(OPENBLAS_INCLUDE_SEARCH_PATHS
${OPENBLAS_ROOT}/include
/usr/include
/usr/include/openblas)
set(OPENBLAS_LIB_SEARCH_PATHS
${OPENBLAS_ROOT}/lib
/usr/lib
/usr/lib/blas/openblas
/usr/lib/openblas)
find_path(OPENBLAS_INC_DIR NAMES cblas.h
PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS})
find_library(OPENBLAS_LIB NAMES openblas
PATHS ${OPENBLAS_LIB_SEARCH_PATHS})
if(OPENBLAS_INC_DIR AND OPENBLAS_LIB)
set(CBLAS_PROVIDER OPENBLAS)
set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR})
set(CBLAS_LIBS ${OPENBLAS_LIB})
return()
endif()
## Then find the reference-cblas. www.netlib.org/blas/
set(REFERENCE_CBLAS_ROOT $ENV{REFERENCE_CBLAS_ROOT} CACHE PATH
"Folder contains reference-cblas")
set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS
${REFERENCE_CBLAS_ROOT}/include
/usr/include
/usr/include/cblas
)
set(REFERENCE_CBLAS_LIB_SEARCH_PATHS
${REFERENCE_CBLAS_ROOT}/lib
/usr/lib
/usr/lib/blas/reference/
/usr/lib/reference/
)
find_path(REFERENCE_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS
${REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS})
find_library(REFERENCE_CBLAS_LIBRARY NAMES cblas PATHS
${REFERENCE_CBLAS_LIB_SEARCH_PATHS})
if (REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY)
set(CBLAS_PROVIDER REFERENCE)
set(CBLAS_INC_DIR ${REFERENCE_CBLAS_INCLUDE_DIR})
set(CBLAS_LIBS ${REFERENCE_CBLAS_LIBRARY})
return()
endif()
message(FATAL_ERROR "CBlas must be set. Paddle support MKL, ATLAS, OpenBlas, reference-cblas."
" Try set MKL_ROOT, ATLAS_ROOT, OPENBLAS_ROOT or REFERENCE_CBLAS_ROOT.")
# Use ccache if found ccache program
find_program(CCACHE_FOUND ccache)
if(CCACHE_FOUND)
message(STATUS "Ccache is founded, use ccache to speed up compile.")
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
endif(CCACHE_FOUND)
\ No newline at end of file
# Check package for each cmake option
if(WITH_GPU)
find_package(CUDA REQUIRED) # CUDA is required when use gpu
endif()
if(WITH_PYTHON)
find_package(PythonLibs 2.6 REQUIRED)
find_package(PythonInterp REQUIRED)
find_package(NumPy REQUIRED)
endif()
if(WITH_STYLE_CHECK)
find_package(PythonInterp REQUIRED)
endif()
if(WITH_GLOG)
find_package(Glog REQUIRED)
endif()
if(WITH_GFLAGS)
find_package(Gflags REQUIRED)
endif()
if(WITH_TESTING)
find_package(GTest REQUIRED)
endif()
if(WITH_DOC)
find_package(Sphinx REQUIRED)
find_package(Doxygen REQUIRED)
find_python_module(recommonmark REQUIRED)
find_python_module(breathe REQUIRED)
endif()
if(WITH_SWIG_PY)
if(NOT SWIG_FOUND)
message(FATAL_ERROR "SWIG is not found. Please install swig or disable WITH_SWIG_PY")
endif()
find_python_module(wheel REQUIRED) # package wheel
endif()
if(NOT M4_EXECUTABLE)
message(FATAL_ERROR "Paddle need m4 to generate proto file.")
endif()
# util to check C++ file style
# * it basically use google cpplint.py.
# * It provide "add_style_check_target" for cmake.
# Usage see add_style_check_target's document
#
# TODO(yuyang18): Add python style check.
set(STYLE_FILTER)
# diable unwanted filters
# paddle do not indent public/potected/private in class
set(STYLE_FILTER "${STYLE_FILTER}-whitespace/indent,")
# paddle use mutable reference. BUT IT IS NOT RECOMMANDED
set(STYLE_FILTER "${STYLE_FILTER}-runtime/references,")
# paddle use relative path for include.
set(STYLE_FILTER "${STYLE_FILTER}-build/include,")
# paddle use <thread>, <mutex>, etc.
set(STYLE_FILTER "${STYLE_FILTER}-build/c++11,")
# paddle use c style casting. BUT IT IS NOT RECOMMANDED
set(STYLE_FILTER "${STYLE_FILTER}-readability/casting")
# IGNORE SOME FILES
set(IGNORE_PATTERN
.*ImportanceSampler.*
.*cblas\\.h.*
.*LtrDataProvider.*
.*MultiDataProvider.*)
# add_style_check_target
#
# attach check code style step for target.
#
# first argument: target name to attach
# rest arguments: source list to check code style.
#
# NOTE: If WITH_STYLE_CHECK is OFF, then this macro just do nothing.
macro(add_style_check_target TARGET_NAME)
if(WITH_STYLE_CHECK)
set(SOURCES_LIST ${ARGN})
list(REMOVE_DUPLICATES SOURCES_LIST)
list(SORT SOURCES_LIST)
foreach(filename ${SOURCES_LIST})
set(LINT ON)
foreach(pattern ${IGNORE_PATTERN})
if(filename MATCHES ${pattern})
message(STATUS "DROP LINT ${filename}")
set(LINT OFF)
endif()
endforeach()
if(LINT MATCHES ON)
add_custom_command(TARGET ${TARGET_NAME}
PRE_BUILD
COMMAND "${PYTHON_EXECUTABLE}" "${PROJ_ROOT}/paddle/scripts/cpplint.py"
"--filter=${STYLE_FILTER}" ${filename}
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR})
endif()
endforeach()
endif()
endmacro()
set(CUDNN_ROOT "" CACHE PATH "CUDNN ROOT")
find_path(CUDNN_INCLUDE_DIR cudnn.h
PATHS ${CUDNN_ROOT} ${CUDNN_ROOT}/include
$ENV{CUDNN_ROOT} $ENV{CUDNN_ROOT}/include ${CUDA_TOOLKIT_INCLUDE}
NO_DEFAULT_PATH
)
get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH)
list(APPEND CUDNN_CHECK_LIBRARY_DIRS
${CUDNN_ROOT}
${CUDNN_ROOT}/lib64
${CUDNN_ROOT}/lib
$ENV{CUDNN_ROOT}
$ENV{CUDNN_ROOT}/lib64
$ENV{CUDNN_ROOT}/lib
/usr/lib)
find_library(CUDNN_LIBRARY NAMES libcudnn.so # libcudnn_static.a
PATHS ${CUDNN_CHECK_LIBRARY_DIRS} ${CUDNN_INCLUDE_DIR} ${__libpath_hist}
NO_DEFAULT_PATH
DOC "Path to cuDNN library.")
if(CUDNN_INCLUDE_DIR AND CUDNN_LIBRARY)
set(CUDNN_FOUND ON)
else()
set(CUDNN_FOUND OFF)
endif()
if(CUDNN_FOUND)
file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS)
get_filename_component(CUDNN_LIB_PATH ${CUDNN_LIBRARY} DIRECTORY)
string(REGEX MATCH "define CUDNN_VERSION +([0-9]+)"
CUDNN_VERSION "${CUDNN_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define CUDNN_VERSION +([0-9]+)" "\\1"
CUDNN_VERSION "${CUDNN_VERSION}")
if("${CUDNN_VERSION}" STREQUAL "2000")
message(STATUS "Current cuDNN version is v2. ")
else()
string(REGEX MATCH "define CUDNN_MAJOR +([0-9]+)" CUDNN_MAJOR_VERSION
"${CUDNN_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define CUDNN_MAJOR +([0-9]+)" "\\1"
CUDNN_MAJOR_VERSION "${CUDNN_MAJOR_VERSION}")
string(REGEX MATCH "define CUDNN_MINOR +([0-9]+)" CUDNN_MINOR_VERSION
"${CUDNN_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define CUDNN_MINOR +([0-9]+)" "\\1"
CUDNN_MINOR_VERSION "${CUDNN_MINOR_VERSION}")
string(REGEX MATCH "define CUDNN_PATCHLEVEL +([0-9]+)"
CUDNN_PATCHLEVEL_VERSION "${CUDNN_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define CUDNN_PATCHLEVEL +([0-9]+)" "\\1"
CUDNN_PATCHLEVEL_VERSION "${CUDNN_PATCHLEVEL_VERSION}")
if(NOT CUDNN_MAJOR_VERSION)
set(CUDNN_VERSION "???")
else()
math(EXPR CUDNN_VERSION
"${CUDNN_MAJOR_VERSION} * 1000 +
${CUDNN_MINOR_VERSION} * 100 + ${CUDNN_PATCHLEVEL_VERSION}")
endif()
message(STATUS "Current cuDNN header is ${CUDNN_INCLUDE_DIR}/cudnn.h. "
"Current cuDNN version is v${CUDNN_MAJOR_VERSION}. ")
endif()
endif()
# Enable C++ 11 for GCC.
# NOTE: It's only tested for gcc.
include(CheckCXXCompilerFlag)
CHECK_CXX_COMPILER_FLAG("-std=c++11" COMPILER_SUPPORT_CXX11)
CHECK_CXX_COMPILER_FLAG("-std=c++0x" COMPILER_SUPPORT_CXX0X)
if(COMPILER_SUPPORT_CXX11)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
elseif(COMPILER_SUPPORT_CXX0X)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
else()
message(FATAL_ERROR "Your compiler must support c++11")
endif()
\ No newline at end of file
# Setting Paddle Compile Flags
include(CheckCXXCompilerFlag)
include(CheckCCompilerFlag)
include(CheckCXXSymbolExists)
# safe_set_flag
#
# Set a compile flag only if compiler is support
# is_c: is C flag or C++ flag, bool type.
# src_list: The list name which the flag name will be append to.
# flag_name: the flag name for compiler, such as '-Werror' '-Wall' etc
# rest arguments: not used.
function(safe_set_flag is_c src_list flag_name)
string(REPLACE "-" "_" safe_name ${flag_name})
string(REPLACE "=" "_" safe_name ${safe_name})
if(is_c)
CHECK_C_COMPILER_FLAG(${flag_name} C_COMPILER_SUPPORT_FLAG_${safe_name})
set(safe_name C_COMPILER_SUPPORT_FLAG_${safe_name})
else()
CHECK_CXX_COMPILER_FLAG(${flag_name} CXX_COMPILER_SUPPORT_FLAG_${safe_name})
set(safe_name CXX_COMPILER_SUPPORT_FLAG_${safe_name})
endif()
if(${safe_name})
set(${src_list} "${${src_list}} ${flag_name}" PARENT_SCOPE)
if(is_c)
set(CUDA_NVCC_FLAGS
--compiler-options;${flag_name}
${CUDA_NVCC_FLAGS}
PARENT_SCOPE)
endif()
endif()
endfunction()
# helper macro to set cflag
macro(safe_set_cflag src_list flag_name)
safe_set_flag(ON ${src_list} ${flag_name})
endmacro()
# helper macro to set cxxflag
macro(safe_set_cxxflag src_list flag_name)
safe_set_flag(OFF ${src_list} ${flag_name})
endmacro()
CHECK_CXX_SYMBOL_EXISTS(UINT64_MAX "stdint.h" UINT64_MAX_EXISTS)
if(NOT UINT64_MAX_EXISTS)
set(CMAKE_REQUIRED_DEFINITIONS -D__STDC_LIMIT_MACROS)
CHECK_CXX_SYMBOL_EXISTS(UINT64_MAX "stdint.h" UINT64_MAX_EXISTS_HERE)
if(UINT64_MAX_EXISTS_HERE)
set(CMAKE_REQUIRED_DEFINITIONS)
add_definitions(-D__STDC_LIMIT_MACROS)
else()
message(FATAL_ERROR "Cannot find symbol UINT64_MAX")
endif()
endif()
# Common flags. the compiler flag used for C/C++ sources whenever release or debug
# Do not care if this flag is support for gcc.
set(COMMON_FLAGS
-fPIC
-fno-omit-frame-pointer
-Wall
-Wextra
-Werror
-Wnon-virtual-dtor
-Wdelete-non-virtual-dtor
-Wno-unused-parameter
-Wno-error=literal-suffix
-Wno-error=unused-local-typedefs)
foreach(flag ${COMMON_FLAGS})
safe_set_cflag(CMAKE_C_FLAGS ${flag})
safe_set_cxxflag(CMAKE_CXX_FLAGS ${flag})
endforeach()
# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
# So, don't set these flags here.
foreach(capability 30 35 50)
list(APPEND __arch_flags "-gencode arch=compute_${capability},code=sm_${capability}")
endforeach()
if (CUDA_VERSION VERSION_GREATER "7.0")
list(APPEND __arch_flags "-gencode arch=compute_52,code=sm_52")
endif()
set(CUDA_NVCC_FLAGS ${__arch_flags} ${CUDA_NVCC_FLAGS})
set(CPACK_PACKAGE_NAME paddle)
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "")
set(CPACK_PACKAGE_VERSION_MAJOR ${PADDLE_MAJOR_VERSION})
set(CPACK_PACKAGE_VERSION_MINOR ${PADDLE_MINOR_VERSION})
set(CPACK_PACKAGE_VERSION_PATCH ${PADDLE_PATCH_VERSION})
set(CPACK_PACKAGE_VERSION ${PADDLE_VERSION})
## DEB Settings
set(CPACK_DEBIAN_PACKAGE_NAME paddle)
set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE amd64)
set(CPACK_DEBIAN_PACKAGE_MAINTAINER PaddlePaddle Dev <paddle-dev@baidu.com>)
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Paddle")
set(CPACK_PACKAGE_DESCRIPTION "")
set(CPACK_DEBIAN_PACKAGE_DEPENDS "libatlas3-base, libgflags2, libgoogle-glog0, libprotobuf8, libpython2.7, libstdc++6, python-numpy, python-pip, python-pip-whl, python-protobuf")
set(CPACK_DEBIAN_PACKAGE_SECTION Devel)
set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJ_ROOT}/paddle/scripts/deb/postinst")
#set(CPACK_GENERATOR "DEB")
# Start cpack
include (CMakePackageConfigHelpers)
include (CPack)
find_program(
SWIG_BINARY_PATH
swig)
if(${SWIG_BINARY_PATH} STREQUAL "SWIG_BINARY_PATH-NOTFOUND")
set(SWIG_FOUND OFF)
else()
set(SWIG_FOUND ON)
endif()
set(MIN_SWIG_VERSION 2)
if(SWIG_FOUND)
execute_process(COMMAND sh -c "${SWIG_BINARY_PATH} -version | grep Version | cut -f3 -d' '"
OUTPUT_VARIABLE _SWIG_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(${_SWIG_VERSION} VERSION_LESS ${MIN_SWIG_VERSION})
message("swig version ${MIN_SWIG_VERSION} or greater is needed for generating python api. "
"Only version ${_SWIG_VERSION} is found. Set SWIG_FOUND to FALSE")
set(SWIG_FOUND FALSE)
endif(${_SWIG_VERSION} VERSION_LESS ${MIN_SWIG_VERSION})
endif(SWIG_FOUND)
function(generate_python_api target_name)
add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py
${PROJ_ROOT}/paddle/Paddle_wrap.cxx
${PROJ_ROOT}/paddle/Paddle_wrap.h
COMMAND swig -python -c++ -outcurrentdir -I../ api/Paddle.swig
&& mv ${PROJ_ROOT}/paddle/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py
DEPENDS ${PROJ_ROOT}/paddle/api/Paddle.swig
WORKING_DIRECTORY ${PROJ_ROOT}/paddle
COMMENT "Generate Python API from swig")
add_custom_target(${target_name} ALL DEPENDS
${PROJ_ROOT}/paddle/Paddle_wrap.cxx
${PROJ_ROOT}/paddle/Paddle_wrap.h
${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py)
endfunction(generate_python_api)
# Some common routine for paddle compile.
# target_circle_link_libraries
# Link libraries to target which has circle dependencies.
#
# First Argument: target name want to be linked with libraries
# Rest Arguments: libraries which link together.
function(target_circle_link_libraries TARGET_NAME)
target_link_libraries(${TARGET_NAME}
-Wl,--start-group
${ARGN}
-Wl,--end-group)
endfunction()
# compile_cu_as_cpp
# Make a cu file compiled as C++
# Arguments: Source files
macro(compile_cu_as_cpp)
foreach(s ${ARGN})
set_source_files_properties(${s} PROPERTIES LANGUAGE CXX)
set_source_files_properties(${s} PROPERTIES COMPILE_FLAGS "-x c++")
endforeach()
endmacro()
# link_paddle_exe
# add paddle library for a paddle executable, such as trainer, pserver.
#
# It will handle WITH_PYTHON/WITH_GLOG etc.
function(link_paddle_exe TARGET_NAME)
if(WITH_METRIC)
if(WITH_GPU)
set(METRIC_LIBS paddle_metric_learning paddle_dserver_lib metric metric_cpu)
else()
set(METRIC_LIBS paddle_metric_learning paddle_dserver_lib metric_cpu)
endif()
else()
set(METRIC_LIBS "")
endif()
if(PADDLE_WITH_INTERNAL)
set(INTERAL_LIBS paddle_internal_gserver paddle_internal_parameter)
target_circle_link_libraries(${TARGET_NAME}
-Wl,--whole-archive
paddle_internal_gserver
paddle_internal_owlqn
-Wl,--no-whole-archive
paddle_internal_parameter)
else()
set(INTERAL_LIBS "")
endif()
target_circle_link_libraries(${TARGET_NAME}
-Wl,--whole-archive
paddle_gserver
${METRIC_LIBS}
-Wl,--no-whole-archive
paddle_pserver
paddle_trainer_lib
paddle_network
paddle_math
paddle_utils
paddle_parameter
paddle_proto
paddle_cuda
${METRIC_LIBS}
${PROTOBUF_LIBRARY}
${CMAKE_THREAD_LIBS_INIT}
${CBLAS_LIBS}
${CMAKE_DL_LIBS}
${INTERAL_LIBS}
-lz)
if(WITH_PYTHON)
target_link_libraries(${TARGET_NAME}
${PYTHON_LIBRARIES})
endif()
if(WITH_GLOG)
target_link_libraries(${TARGET_NAME}
${LIBGLOG_LIBRARY})
endif()
if(WITH_GFLAGS)
target_link_libraries(${TARGET_NAME}
${GFLAGS_LIBRARIES})
endif()
if(WITH_GPU)
if(NOT WITH_DSO OR WITH_METRIC)
target_link_libraries(${TARGET_NAME}
${CUDNN_LIBRARY}
${CUDA_curand_LIBRARY})
CUDA_ADD_CUBLAS_TO_TARGET(${TARGET_NAME})
endif()
check_library_exists(rt clock_gettime "time.h" HAVE_CLOCK_GETTIME )
if(HAVE_CLOCK_GETTIME)
target_link_libraries(${TARGET_NAME} rt)
endif()
endif()
endfunction()
# link_paddle_test
# Link a paddle unittest for target
# TARGET_NAME: the unittest target name
# Rest Arguemnts: not used.
function(link_paddle_test TARGET_NAME)
link_paddle_exe(${TARGET_NAME})
target_link_libraries(${TARGET_NAME} ${GTEST_MAIN_LIBRARIES}
${GTEST_LIBRARIES})
endfunction()
# add_unittest_without_exec
#
# create a paddle unittest. not specifically define how to run this unittest.
# TARGET_NAME: the unittest target name, same as executable file name
# Rest Arguments: the source files to compile this unittest.
macro(add_unittest_without_exec TARGET_NAME)
add_executable(${TARGET_NAME} ${ARGN})
link_paddle_test(${TARGET_NAME})
add_style_check_target(${TARGET_NAME} ${ARGN})
endmacro()
# add_unittest
# create a paddle unittest and just to execute this binary to make unittest.
#
# TARGET_NAME: the unittest target name, same as executable file name
# Rest Arguments: the source files to compile this unittest.
macro(add_unittest TARGET_NAME)
add_unittest_without_exec(${TARGET_NAME} ${ARGN})
add_test(${TARGET_NAME} ${TARGET_NAME})
endmacro()
# add_simple_unittest
# create a paddle unittest with file name. It just compile ${TARGET_NAME}.cpp to
# ${TARGET_NAME} and then execute it.
macro(add_simple_unittest TARGET_NAME)
add_unittest(${TARGET_NAME} ${TARGET_NAME}.cpp)
endmacro()
macro(add_paddle_culib TARGET_NAME)
set(NVCC_FLAG ${CUDA_NVCC_FLAGS})
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--use_fast_math)
cuda_add_library(${TARGET_NAME} STATIC ${ARGN})
set(CUDA_NVCC_FLAGS ${NVCC_FLAG})
endmacro()
data/cifar-10-batches-py
data/cifar-out
cifar_vgg_model/*
plot.png
train.log
image_provider_copy_1.py
*pyc
../model_zoo/resnet/classify.py
\ No newline at end of file
#!/bin/bash
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
python classify.py \
--job=predict \
--conf=vgg_16_cifar.py \
--model=./cifar_vgg_model/pass-00299 \
--multi_crop \
--data=./example/test.list
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
tar zxf cifar-10-python.tar.gz
rm cifar-10-python.tar.gz
rm -rf cifar-out/*
echo Converting CIFAR data to images.....
python process_cifar.py ./cifar-10-batches-py ./cifar-out
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import sys
import os
import PIL.Image as Image
"""
Usage: python process_cifar input_dir output_dir
"""
def mkdir_not_exist(path):
"""
Make dir if the path does not exist.
path: the path to be created.
"""
if not os.path.exists(path):
os.mkdir(path)
def create_dir_structure(output_dir):
"""
Create the directory structure for the directory.
output_dir: the direcotry structure path.
"""
mkdir_not_exist(os.path.join(output_dir))
mkdir_not_exist(os.path.join(output_dir, "train"))
mkdir_not_exist(os.path.join(output_dir, "test"))
def convert_batch(batch_path, label_set, label_map,
output_dir, data_split):
"""
Convert CIFAR batch to the structure of Paddle format.
batch_path: the batch to be converted.
label_set: the set of labels.
output_dir: the output path.
data_split: whether it is training or testing data.
"""
data = np.load(batch_path)
for data, label, filename in zip(data['data'], data['labels'],
data['filenames']):
data = data.reshape((3, 32, 32))
data = np.transpose(data, (1, 2, 0))
label = label_map[label]
output_dir_this = os.path.join(output_dir, data_split, str(label))
output_filename = os.path.join(output_dir_this, filename)
if not label in label_set:
label_set[label] = True
mkdir_not_exist(output_dir_this)
Image.fromarray(data).save(output_filename)
if __name__ == '__main__':
input_dir = sys.argv[1]
output_dir = sys.argv[2]
num_batch = 5
create_dir_structure(output_dir)
label_map = {0: "airplane", 1: "automobile", 2: "bird", 3: "cat", 4: "deer",
5: "dog", 6: "frog", 7: "horse", 8: "ship", 9: "truck"}
labels = {}
for i in range(1, num_batch + 1):
convert_batch(os.path.join(input_dir, "data_batch_%d" % i), labels,
label_map, output_dir, "train")
convert_batch(os.path.join(input_dir, "test_batch"), {},
label_map, output_dir, "test")
\ No newline at end of file
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
from optparse import OptionParser
from py_paddle import swig_paddle, util, DataProviderWrapperConverter
from paddle.trainer.PyDataProviderWrapper import DenseSlot
from paddle.trainer.config_parser import parse_config
"""
Will merge predictor from Qingqing.
"""
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import random
import paddle.utils.image_util as image_util
from paddle.trainer.PyDataProvider2 import *
#
# {'img_size': 32,
# 'settings': <paddle.trainer.PyDataProviderWrapper.Cls instance at 0x7fea27cb6050>,
# 'color': True,
# 'mean_img_size': 32,
# 'meta': './data/cifar-out/batches/batches.meta',
# 'num_classes': 10,
# 'file_list': ('./data/cifar-out/batches/train_batch_000',),
# 'use_jpeg': True}
def hook(settings, img_size, mean_img_size, num_classes, color, meta, use_jpeg,
is_train, **kwargs):
settings.mean_img_size = mean_img_size
settings.img_size = img_size
settings.num_classes = num_classes
settings.color = color
settings.is_train = is_train
if settings.color:
settings.img_raw_size = settings.img_size * settings.img_size * 3
else:
settings.img_raw_size = settings.img_size * settings.img_size
settings.meta_path = meta
settings.use_jpeg = use_jpeg
settings.img_mean = image_util.load_meta(settings.meta_path,
settings.mean_img_size,
settings.img_size,
settings.color)
settings.logger.info('Image size: %s', settings.img_size)
settings.logger.info('Meta path: %s', settings.meta_path)
settings.input_types = [
dense_vector(settings.img_raw_size), # image feature
integer_value(settings.num_classes)] # labels
settings.logger.info('DataProvider Initialization finished')
@provider(init_hook=hook)
def processData(settings, file_name):
"""
The main function for loading data.
Load the batch, iterate all the images and labels in this batch.
file_name: the batch file name.
"""
data = cPickle.load(io.open(file_name, 'rb'))
indexes = list(range(len(data['images'])))
if settings.is_train:
random.shuffle(indexes)
for i in indexes:
if settings.use_jpeg == 1:
img = image_util.decode_jpeg(data['images'][i])
else:
img = data['images'][i]
img_feat = image_util.preprocess_img(img, settings.img_mean,
settings.img_size, settings.is_train,
settings.color)
label = data['labels'][i]
yield img_feat.tolist(), int(label)
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from PIL import Image
from cStringIO import StringIO
def resize_image(img, target_size):
"""
Resize an image so that the shorter edge has length target_size.
img: the input image to be resized.
target_size: the target resized image size.
"""
percent = (target_size/float(min(img.size[0], img.size[1])))
resized_size = int(round(img.size[0] * percent)), int(round(img.size[1] * percent))
img = img.resize(resized_size, Image.ANTIALIAS)
return img
def flip(im):
"""
Return the flipped image.
Flip an image along the horizontal direction.
im: input image, (H x W x K) ndarrays
"""
if len(im.shape) == 3:
return im[:, :, ::-1]
else:
return im[:, ::-1]
def crop_img(im, inner_size, color=True, test=True):
"""
Return cropped image.
The size of the cropped image is inner_size * inner_size.
im: (K x H x W) ndarrays
inner_size: the cropped image size.
color: whether it is color image.
test: whether in test mode.
If False, does random cropping and flipping.
If True, crop the center of images.
"""
if color:
height, width = max(inner_size, im.shape[1]), max(inner_size, im.shape[2])
padded_im = np.zeros((3, height, width))
startY = (height - im.shape[1]) / 2
startX = (width - im.shape[2]) / 2
endY, endX = startY + im.shape[1], startX + im.shape[2]
padded_im[:, startY: endY, startX: endX] = im
else:
im = im.astype('float32')
height, width = max(inner_size, im.shape[0]), max(inner_size, im.shape[1])
padded_im = np.zeros((height, width))
startY = (height - im.shape[0]) / 2
startX = (width - im.shape[1]) / 2
endY, endX = startY + im.shape[0], startX + im.shape[1]
padded_im[startY: endY, startX: endX] = im
if test:
startY = (height - inner_size) / 2
startX = (width - inner_size) / 2
else:
startY = np.random.randint(0, height - inner_size + 1)
startX = np.random.randint(0, width - inner_size + 1)
endY, endX = startY + inner_size, startX + inner_size
if color:
pic = padded_im[:, startY: endY, startX: endX]
else:
pic = padded_im[startY: endY, startX: endX]
if (not test) and (np.random.randint(2) == 0):
pic = flip(pic)
return pic
def decode_jpeg(jpeg_string):
np_array = np.array(Image.open(StringIO(jpeg_string)))
if len(np_array.shape) == 3:
np_array = np.transpose(np_array, (2, 0, 1))
return np_array
def preprocess_img(im, img_mean, crop_size, is_train, color=True):
"""
Does data augmentation for images.
If is_train is false, cropping the center region from the image.
If is_train is true, randomly crop a region from the image,
and randomy does flipping.
im: (K x H x W) ndarrays
"""
im = im.astype('float32')
test = not is_train
pic = crop_img(im, crop_size, color, test)
pic -= img_mean
return pic.flatten()
def load_meta(meta_path, mean_img_size, crop_size, color=True):
"""
Return the loaded meta file.
Load the meta image, which is the mean of the images in the dataset.
The mean image is subtracted from every input image so that the expected mean
of each input image is zero.
"""
mean = np.load(meta_path)['data_mean']
border = (mean_img_size - crop_size) / 2
if color:
assert(mean_img_size * mean_img_size * 3 == mean.shape[0])
mean = mean.reshape(3, mean_img_size, mean_img_size)
mean = mean[:, border: border + crop_size,
border: border + crop_size].astype('float32')
else:
assert(mean_img_size * mean_img_size == mean.shape[0])
mean = mean.reshape(mean_img_size, mean_img_size)
mean = mean[border: border + crop_size,
border: border + crop_size].astype('float32')
return mean
def load_image(img_path, is_color=True):
"""
Load image and return.
img_path: image path.
is_color: is color image or not.
"""
img = Image.open(img_path)
img.load()
return img
def oversample(img, crop_dims):
"""
image : iterable of (H x W x K) ndarrays
crop_dims: (height, width) tuple for the crops.
Returned data contains ten crops of input image, namely,
four corner patches and the center patch as well as their
horizontal reflections.
"""
# Dimensions and center.
im_shape = np.array(img[0].shape)
crop_dims = np.array(crop_dims)
im_center = im_shape[:2] / 2.0
# Make crop coordinates
h_indices = (0, im_shape[0] - crop_dims[0])
w_indices = (0, im_shape[1] - crop_dims[1])
crops_ix = np.empty((5, 4), dtype=int)
curr = 0
for i in h_indices:
for j in w_indices:
crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1])
curr += 1
crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate([
-crop_dims / 2.0,
crop_dims / 2.0
])
crops_ix = np.tile(crops_ix, (2, 1))
# Extract crops
crops = np.empty((10 * len(img), crop_dims[0], crop_dims[1],
im_shape[-1]), dtype=np.float32)
ix = 0
for im in img:
for crop in crops_ix:
crops[ix] = im[crop[0]:crop[2], crop[1]:crop[3], :]
ix += 1
crops[ix-5:ix] = crops[ix-5:ix, :, ::-1, :] # flip for mirrors
return crops
class ImageTransformer:
def __init__(self, transpose = None,
channel_swap = None, mean = None, is_color = True):
self.transpose = transpose
self.channel_swap = None
self.mean = None
self.is_color = is_color
def set_transpose(self, order):
if self.is_color:
assert 3 == len(order)
self.transpose = order
def set_channel_swap(self, order):
if self.is_color:
assert 3 == len(order)
self.channel_swap = order
def set_mean(self, mean):
# mean value, may be one value per channel
if mean.ndim == 1:
mean = mean[:, np.newaxis, np.newaxis]
else:
# elementwise mean
if self.is_color:
assert len(mean.shape) == 3
self.mean = mean
def transformer(self, data):
if self.transpose is not None:
data = data.transpose(self.transpose)
if self.channel_swap is not None:
data = data[self.channel_swap, :, :]
if self.mean is not None:
data -= self.mean
return data
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.utils.preprocess_img import ImageClassificationDatasetCreater
from optparse import OptionParser
def option_parser():
parser = OptionParser(usage="usage: python preprcoess.py "\
"-i data_dir [options]")
parser.add_option("-i", "--input", action="store",
dest="input", help="Input data directory.")
parser.add_option("-s", "--size", action="store",
dest="size", help="Processed image size.")
parser.add_option("-c", "--color", action="store",
dest="color", help="whether to use color images.")
return parser.parse_args()
if __name__ == '__main__':
options, args = option_parser()
data_dir = options.input
processed_image_size = int(options.size)
color = options.color == "1"
data_creator = ImageClassificationDatasetCreater(data_dir,
processed_image_size,
color)
data_creator.num_per_batch = 1000
data_creator.overwrite = True
data_creator.create_batches()
#!/bin/bash
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
export PYTHONPATH=$PYTHONPATH:../../
data_dir=./data/cifar-out
python preprocess.py -i $data_dir -s 32 -c 1
#!/bin/bash
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
config=vgg_16_cifar.py
output=./cifar_vgg_model
log=train.log
paddle train \
--config=$config \
--dot_period=10 \
--log_period=100 \
--test_all_data_in_one_period=1 \
--use_gpu=1 \
--trainer_count=1 \
--num_passes=200 \
--save_dir=$output \
2>&1 | tee $log
python -m paddle.utils.plotcurve -i $log > plot.png
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
hadoop fs -Dhadoop.job.ugi=paddle_demo,paddle_demo -put data/cifar-out/batches/train_batch_* /app/idl/idl-dl/paddle/demo/image_classification/train/
hadoop fs -Dhadoop.job.ugi=paddle_demo,paddle_demo -put data/cifar-out/batches/test_batch_* /app/idl/idl-dl/paddle/demo/image_classification/test/
hadoop fs -Dhadoop.job.ugi=paddle_demo,paddle_demo -put data/cifar-out/batches/batches.meta /app/idl/idl-dl/paddle/demo/image_classification/train_meta
hadoop fs -Dhadoop.job.ugi=paddle_demo,paddle_demo -put data/cifar-out/batches/batches.meta /app/idl/idl-dl/paddle/demo/image_classification/test_meta
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
is_predict = get_config_arg("is_predict", bool, False)
####################Data Configuration ##################
if not is_predict:
data_dir='data/cifar-out/batches/'
meta_path=data_dir+'batches.meta'
args = {'meta':meta_path,'mean_img_size': 32,
'img_size': 32,'num_classes': 10,
'use_jpeg': 1,'color': "color"}
define_py_data_sources2(train_list=data_dir+"train.list",
test_list=data_dir+'test.list',
module='image_provider',
obj='processData',
args=args)
######################Algorithm Configuration #############
settings(
batch_size = 128,
learning_rate = 0.1 / 128.0,
learning_method = MomentumOptimizer(0.9),
regularization = L2Regularization(0.0005 * 128)
)
#######################Network Configuration #############
data_size=3*32*32
label_size=10
img = data_layer(name='image',
size=data_size)
# small_vgg is predined in trainer_config_helpers.network
predict = small_vgg(input_image=img,
num_channels=3,
num_classes=label_size)
if not is_predict:
lbl = data_layer(name="label", size=label_size)
outputs(classification_cost(input=predict, label=lbl))
else:
outputs(predict)
#!/bin/env python
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Example:
python extract_para.py --preModel PREMODEL --preDict PREDICT \
--usrModel USRMODEL --usrDict USRDICT -d DIM
Options:
-h, --help show this help message and exit
--preModel PREMODEL the name of pretrained embedding model
--preDict PREDICT the name of pretrained dictionary
--usrModel usrModel the name of output usr embedding model
--usrDict usrDict the name of user specified dictionary
-d DIM dimension of parameter
"""
from optparse import OptionParser
import struct
def get_row_index(preDict, usrDict):
"""
Get the row positions for all words in user dictionary from pre-trained dictionary.
return: a list of row positions
Example: preDict='a\nb\nc\n', usrDict='a\nc\n', then return [0,2]
"""
pos = []
index = dict()
with open(preDict, "r") as f:
for line_index, line in enumerate(f):
word = line.strip().split()[0]
index[word] = line_index
with open(usrDict, "r") as f:
for line in f:
word = line.strip().split()[0]
pos.append(index[word])
return pos
def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict, paraDim):
"""
Extract desired parameters from a pretrained embedding model based on user dictionary
"""
if paraDim not in [32, 64, 128, 256]:
raise RuntimeError("We only support 32, 64, 128, 256 dimensions now")
fi = open(preModel, "rb")
fo = open(usrModel, "wb")
# write filehead
rowIndex = get_row_index(preDict, usrDict)
newHead = struct.pack("iil", 0, 4, len(rowIndex) * paraDim)
fo.write(newHead)
bytes = 4 * paraDim
for i in range(0, len(rowIndex)):
# find the absolute position of input file
fi.seek(rowIndex[i] * bytes + 16, 0)
fo.write(fi.read(bytes))
print "extract parameters finish, total", len(rowIndex), "lines"
fi.close()
def main():
"""
Main entry for running paraconvert.py
"""
usage = "usage: \n" \
"python %prog --preModel PREMODEL --preDict PREDICT" \
" --usrModel USRMODEL --usrDict USRDICT -d DIM"
parser = OptionParser(usage)
parser.add_option("--preModel", action="store", dest="preModel",
help="the name of pretrained embedding model")
parser.add_option("--preDict", action="store", dest="preDict",
help="the name of pretrained dictionary")
parser.add_option("--usrModel", action="store", dest="usrModel",
help="the name of output usr embedding model")
parser.add_option("--usrDict", action="store", dest="usrDict",
help="the name of user specified dictionary")
parser.add_option("-d", action="store", dest="dim",
help="dimension of parameter")
(options, args) = parser.parse_args()
extract_parameters_by_usrDict(options.preModel, options.preDict,
options.usrModel, options.usrDict, int(options.dim))
if __name__ == '__main__':
main()
#!/bin/env python
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Example:
python paraconvert.py --b2t -i INPUT -o OUTPUT -d DIM
python paraconvert.py --t2b -i INPUT -o OUTPUT
Options:
-h, --help show this help message and exit
--b2t convert parameter file of embedding model from binary to text
--t2b convert parameter file of embedding model from text to binary
-i INPUT input parameter file name
-o OUTPUT output parameter file name
-d DIM dimension of parameter
"""
from optparse import OptionParser
import struct
def binary2text(input, output, paraDim):
"""
Convert a binary parameter file of embedding model to be a text file.
input: the name of input binary parameter file, the format is:
1) the first 16 bytes is filehead:
version(4 bytes): version of paddle, default = 0
floatSize(4 bytes): sizeof(float) = 4
paraCount(8 bytes): total number of parameter
2) the next (paraCount * 4) bytes is parameters, each has 4 bytes
output: the name of output text parameter file, for example:
0,4,32156096
-0.7845433,1.1937413,-0.1704215,...
0.0000909,0.0009465,-0.0008813,...
...
the format is:
1) the first line is filehead:
version=0, floatSize=4, paraCount=32156096
2) other lines print the paramters
a) each line prints paraDim paramters splitted by ','
b) there is paraCount/paraDim lines (embedding words)
paraDim: dimension of parameters
"""
fi = open(input, "rb")
fo = open(output, "w")
"""
"""
version, floatSize, paraCount = struct.unpack("iil", fi.read(16))
newHead = ','.join([str(version), str(floatSize), str(paraCount)])
print >> fo, newHead
bytes = 4 * int(paraDim)
format = "%df" % int(paraDim)
context = fi.read(bytes)
line = 0
while context:
numbers = struct.unpack(format, context)
lst = []
for i in numbers:
lst.append('%8.7f' % i)
print >> fo, ','.join(lst)
context = fi.read(bytes)
line += 1
fi.close()
fo.close()
print "binary2text finish, total", line, "lines"
def get_para_count(input):
"""
Compute the total number of embedding parameters in input text file.
input: the name of input text file
"""
numRows = 1
paraDim = 0
with open(input) as f:
line = f.readline()
paraDim = len(line.split(","))
for line in f:
numRows += 1
return numRows * paraDim
def text2binary(input, output, paddle_head=True):
"""
Convert a text parameter file of embedding model to be a binary file.
input: the name of input text parameter file, for example:
-0.7845433,1.1937413,-0.1704215,...
0.0000909,0.0009465,-0.0008813,...
...
the format is:
1) it doesn't have filehead
2) each line stores the same dimension of parameters,
the separator is commas ','
output: the name of output binary parameter file, the format is:
1) the first 16 bytes is filehead:
version(4 bytes), floatSize(4 bytes), paraCount(8 bytes)
2) the next (paraCount * 4) bytes is parameters, each has 4 bytes
"""
fi = open(input, "r")
fo = open(output, "wb")
newHead = struct.pack("iil", 0, 4, get_para_count(input))
fo.write(newHead)
count = 0
for line in fi:
line = line.strip().split(",")
for i in range(0, len(line)):
binary_data = struct.pack("f", float(line[i]))
fo.write(binary_data)
count += 1
fi.close()
fo.close()
print "text2binary finish, total", count, "lines"
def main():
"""
Main entry for running paraconvert.py
"""
usage = "usage: \n" \
"python %prog --b2t -i INPUT -o OUTPUT -d DIM \n" \
"python %prog --t2b -i INPUT -o OUTPUT"
parser = OptionParser(usage)
parser.add_option("--b2t", action="store_true",
help="convert parameter file of embedding model from binary to text")
parser.add_option("--t2b", action="store_true",
help="convert parameter file of embedding model from text to binary")
parser.add_option("-i", action="store", dest="input",
help="input parameter file name")
parser.add_option("-o", action="store", dest="output",
help="output parameter file name")
parser.add_option("-d", action="store", dest="dim",
help="dimension of parameter")
(options, args) = parser.parse_args()
if options.b2t:
binary2text(options.input, options.output, options.dim)
if options.t2b:
text2binary(options.input, options.output)
if __name__ == '__main__':
main()
#!/bin/bash
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
set -x
# download the dictionary and pretrained model
for file in baidu.dict model_32.emb model_64.emb model_128.emb model_256.emb
do
# following is the google drive address
# you can also directly download from https://pan.baidu.com/s/1o8q577s
wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/embedding/$file --no-check-certificate
done
fea_output/
features/
model.list
ResNet_50.dot
ResNet_50.png
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import cPickle
import logging
from PIL import Image
import numpy as np
from optparse import OptionParser
import paddle.utils.image_util as image_util
from py_paddle import swig_paddle, util
from py_paddle import DataProviderWrapperConverter
from paddle.trainer.PyDataProviderWrapper import DenseSlot
from paddle.trainer.config_parser import parse_config
logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
logging.getLogger().setLevel(logging.INFO)
class ImageClassifier():
def __init__(self, train_conf, model_dir=None,
resize_dim=256, crop_dim=224,
mean_file=None,
output_layer=None,
oversample=False, is_color=True):
"""
train_conf: network configure.
model_dir: string, directory of model.
resize_dim: int, resized image size.
crop_dim: int, crop size.
mean_file: string, image mean file.
oversample: bool, oversample means multiple crops, namely five
patches (the four corner patches and the center
patch) as well as their horizontal reflections,
ten crops in all.
"""
self.train_conf = train_conf
self.model_dir = model_dir
if model_dir is None:
self.model_dir = os.path.dirname(train_conf)
self.resize_dim = resize_dim
self.crop_dims = [crop_dim, crop_dim]
self.oversample = oversample
self.is_color = is_color
self.output_layer = output_layer
if self.output_layer:
assert isinstance(self.output_layer, basestring)
self.output_layer = self.output_layer.split(",")
self.transformer = image_util.ImageTransformer(is_color = is_color)
self.transformer.set_transpose((2,0,1))
self.transformer.set_channel_swap((2,1,0))
self.mean_file = mean_file
if self.mean_file is not None:
mean = np.load(self.mean_file)['data_mean']
mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1])
self.transformer.set_mean(mean) # mean pixel
else:
# if you use three mean value, set like:
# this three mean value is calculated from ImageNet.
self.transformer.set_mean(np.array([103.939,116.779,123.68]))
conf_args = "is_test=1,use_gpu=1,is_predict=1"
conf = parse_config(train_conf, conf_args)
swig_paddle.initPaddle("--use_gpu=1")
self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
assert isinstance(self.network, swig_paddle.GradientMachine)
self.network.loadParameters(self.model_dir)
data_size = 3 * self.crop_dims[0] * self.crop_dims[1]
slots = [DenseSlot(data_size)]
is_sequence = False
self.converter = util.DataProviderWrapperConverter(is_sequence, slots)
def get_data(self, img_path):
"""
1. load image from img_path.
2. resize or oversampling.
3. transformer data: transpose, channel swap, sub mean.
return K x H x W ndarray.
img_path: image path.
"""
image = image_util.load_image(img_path, self.is_color)
# Another way to extract oversampled features is that
# cropping and averaging from large feature map which is
# calculated by large size of image.
# This way reduces the computation.
if self.oversample:
# image_util.resize_image: short side is self.resize_dim
image = image_util.resize_image(image, self.resize_dim)
image = np.array(image)
input = np.zeros((1, image.shape[0], image.shape[1], 3),
dtype=np.float32)
input[0] = image.astype(np.float32)
input = image_util.oversample(input, self.crop_dims)
else:
image = image.resize(self.crop_dims, Image.ANTIALIAS)
input = np.zeros((1, self.crop_dims[0], self.crop_dims[1], 3),
dtype=np.float32)
input[0] = np.array(image).astype(np.float32)
data_in = []
for img in input:
img = self.transformer.transformer(img).flatten()
data_in.append([img.tolist()])
# paddle input: [[[]],[[]],...], [[]] is one sample.
return data_in
def forward(self, input_data):
"""
return output arguments which are the Outputs() in network configure.
input_data: py_paddle input data.
call forward.
"""
in_arg = self.converter(input_data)
return self.network.forwardTest(in_arg)
def forward(self, data, output_layer):
"""
return output arguments which are the Outputs() in network configure.
input_data: py_paddle input data.
call forward.
"""
input = self.converter(data)
self.network.forwardTest(input)
output = self.network.getLayerOutputs(output_layer)
res = {}
if isinstance(output_layer, basestring):
output_layer = [output_layer]
for name in output_layer:
# For oversampling, average predictions across crops.
# If not, the shape of output[name]: (1, class_number),
# the mean is also applicable.
res[name] = output[name].mean(0)
return res
def predict(self, data_file):
"""
call forward and predicting.
data_file: input image list.
"""
image_files = open(data_file, 'rb').readlines()
results = {}
if self.output_layer is None:
self.output_layer = ["output"]
for line in image_files:
image = line.split()[0]
data = self.get_data(image)
prob = self.forward(data, self.output_layer)
lab = np.argsort(-prob[self.output_layer[0]])
results[image] = lab[0]
logging.info("Label of %s is: %d", image, lab[0])
return results
def extract(self, data_file, output_dir, batch_size = 10000):
"""
extract and save features of output layers, which are
specify in Outputs() in network configure.
data_file: file name of input data.
output_dir: saved directory of extracted features.
batch_size: sample number of one batch file.
"""
if not os.path.exists(output_dir):
os.mkdir(output_dir)
sample_num = 0
batch_num = 0
image_feature = {}
image_files = open(data_file, 'rb').readlines()
for idx, line in enumerate(image_files):
image = line.split()[0]
data = self.get_data(image)
feature = self.forward(data, self.output_layer)
# save extracted features
file_name = image.split("/")[-1]
image_feature[file_name] = feature
sample_num += 1
if sample_num == batch_size:
batch_name = os.path.join(output_dir, 'batch_%d' %(batch_num))
self.save_file(image_feature, batch_name)
logging.info('Finish batch %d', batch_num)
batch_num += 1
sample_num = 0
image_feature = {}
if idx % 1000 == 0:
logging.info('%d/%d, %s', idx, len(image_files), file_name)
if sample_num > 0:
batch_name = os.path.join(output_dir, 'batch_%d' %(batch_num))
self.save_file(image_feature, batch_name)
logging.info('Finish batch %d', batch_num)
logging.info('Done: make image feature batch')
def save_file(self, data, file):
of = open(file, 'wb')
cPickle.dump(data, of, protocol=cPickle.HIGHEST_PROTOCOL)
def option_parser():
"""
Main entry for predciting
"""
usage = "%prog -c config -i data_list -w model_dir [options]"
parser = OptionParser(usage="usage: %s" % usage)
parser.add_option("-j", "--job",
action="store", dest="job_type",
help="job type: predict, extract\
predict: predicting,\
extract: extract features")
parser.add_option("-c", "--conf",
action="store", dest="train_conf",
help="network config")
parser.add_option("-i", "--data",
action="store", dest="data_file",
help="image list")
parser.add_option("-w", "--model",
action="store", dest="model_path",
default=None, help="model path")
parser.add_option("-o", "--output_dir",
action="store", dest="output_dir",
default="output", help="output path")
parser.add_option("-m", "--mean", action="store",
dest="mean", default=None,
help="mean file.")
parser.add_option("-p", "--multi_crop", action="store_true",
dest="multi_crop", default=False,
help="Wether to use multiple crops on image.")
parser.add_option("-l", "--output_layer", action="store",
dest="output_layer", default=None,
help="--job=extract, specify layers to extract "\
"features, --job=predict, specify layer of "
"classification probability, output in resnet.py.")
return parser.parse_args()
def main():
"""
1. parse input arguments.
2. predicting or extract features according job type.
"""
options, args = option_parser()
obj = ImageClassifier(options.train_conf,
options.model_path,
mean_file=options.mean,
output_layer=options.output_layer,
oversample=options.multi_crop)
if options.job_type == "predict":
obj.predict(options.data_file)
elif options.job_type == "extract":
obj.extract(options.data_file,
options.output_dir)
if __name__ == '__main__':
main()
*image_list_provider_copy_1.py
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.utils.image_util import *
from paddle.trainer.PyDataProvider2 import *
def hook(settings, image_size, crop_size, color, file_list,
is_train, **kwargs):
"""
Description: Init with a list of data file
file_list is the name list of input files.
kwargs["load_data_args"] is the value of 'load_data_args'
which can be set in config.
Each args is separated by a column.
image_size: the crop image size.
mean_meta: the path of the meta file to store the mean image.
mean_value: can be mean value, not a file.
can not set mean_meta and mean_value at the same time.
color: 'color' means a color image. Otherwise, it means a gray image.
is_train: whether the data provider is used for training.
Data argumentation might be different for training and testing.
"""
settings.img_size = image_size
settings.crop_size = crop_size
settings.mean_img_size = settings.crop_size
settings.color = color # default is color
settings.is_train = is_train
settings.is_swap_channel = kwargs.get('swap_channel', None)
if settings.is_swap_channel is not None:
settings.swap_channel = settings.is_swap_channel
settings.is_swap_channel = True
if settings.color:
settings.img_input_size = settings.crop_size * settings.crop_size * 3
else:
settings.img_input_size = settings.crop_size * settings.crop_size
settings.file_list = file_list
settings.mean_meta = kwargs.get('mean_meta', None)
settings.mean_value = kwargs.get('mean_value', None)
# can not specify both mean_meta and mean_value.
assert not (settings.mean_meta and settings.mean_value)
if not settings.mean_meta:
settings.mean_value = kwargs.get('mean_value')
sz = settings.crop_size * settings.crop_size
settings.img_mean = np.zeros(sz * 3, dtype=np.single)
for idx, value in enumerate(settings.mean_value):
settings.img_mean[idx * sz: (idx + 1) * sz] = value
settings.img_mean = settings.img_mean.reshape(3, settings.crop_size,
settings.crop_size)
else:
settings.img_mean = load_meta(settings.mean_meta,
settings.mean_img_size,
settings.crop_size, settings.color)
settings.input_types = [
dense_vector(settings.img_input_size), # image feature
integer_value(1)] # labels
settings.logger.info('Image short side: %s', settings.img_size)
settings.logger.info('Crop size: %s', settings.crop_size)
settings.logger.info('Meta path: %s', settings.mean_meta)
if settings.is_swap_channel:
settings.logger.info('swap channel: %s', settings.swap_channel)
settings.logger.info('DataProvider Initialization finished')
@provider(init_hook=hook, should_shuffle=False)
def processData(settings, file_list):
"""
The main function for loading data.
Load the batch, iterate all the images and labels in this batch.
file_name: the batch file name.
"""
img_path, lab = file_list.strip().split(' ')
img = Image.open(img_path)
img.load()
img = img.resize((settings.img_size, settings.img_size), Image.ANTIALIAS)
img = np.array(img).astype(np.float32)
if len(img.shape) == 3:
img = np.swapaxes(img, 1, 2)
img = np.swapaxes(img, 1, 0)
# swap channel
if settings.is_swap_channel:
img = img[settings.swap_channel, :, :]
img_feat = preprocess_img(img,
settings.img_mean,
settings.crop_size,
settings.is_train,
settings.color)
yield img_feat.tolist(), int(lab.strip())
example/dog.jpg 0
example/cat.jpg 0
#!/bin/bash
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
#set names of layer which you want to extract feature
#in Outputs() of resnet.py
#like: Outputs("res5_3_branch2c_conv", "res5_3_branch2c_bn")
layer_num=50
configure=./resnet.py
model_path=./model/resnet_$layer_num
fea_dir=fea_output
#Output is text file.
#Each line is one sample's features.
#If you set N layer names in Outputs()
#each line contains N features sperated by ";".
# create model list file.
model_list=./model.list
touch $model_list | echo $model_path > $model_list
paddle train \
--local=true \
--job=test \
--config=$configure \
--model_list=$model_list \
--use_gpu=1 \
--predict_output_dir=$fea_dir \
--config_args=is_test=1,layer_num=$layer_num
#!/bin/bash
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
python classify.py \
--job=extract \
--conf=resnet.py \
--mean=model/mean_meta_224/mean.meta \
--model=model/resnet_50 \
--data=./example/test.list \
--output_layer="res5_3_branch2c_conv,res5_3_branch2c_bn" \
--output_dir=features
#!/bin/bash
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
DIR="$( cd "$(dirname "$0")" ; pwd -P )"
cd $DIR
mkdir model
cd model
echo "Downloading ResNet models..."
for file in resnet_50.tar.gz resnet_101.tar.gz resnet_152.tar.gz mean_meta_224.tar.gz
do
# following is the google drive address
# you can also directly download from https://pan.baidu.com/s/1o8q577s
wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/imagenet/$file --no-check-certificate
tar -xvf $file
rm $file
done
echo "Done."
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import cPickle
import logging
logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
logging.getLogger().setLevel(logging.INFO)
def load_feature_c(file):
"""
Load feature extracted by C++ interface.
Return a list.
file: feature file.
"""
features = []
f = open(file, 'r')
for line in f:
sample = []
for slot in line.strip().split(";"):
fea = [float(val) for val in slot.strip().split()]
if fea:
sample.append(fea)
features.append(sample)
f.close()
return features
def load_feature_py(feature_dir):
"""
Load feature extracted by python interface.
Return a dictionary.
feature_dir: directory of feature file.
"""
file_list = os.listdir(feature_dir)
file_list = [os.path.join(feature_dir, f) for f in file_list]
features = {}
for file_name in file_list:
with open(file_name, 'rb') as f:
feature = cPickle.load(f)
features.update(feature)
logging.info('Load feature file %s', file_name)
return features
if __name__ == '__main__':
print load_feature_py(sys.argv[1])
#print load_feature_c(sys.argv[1])
#!/bin/bash
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
:'
Visual deep residual network
1. Using make_model_diagram.py to generate dot file.
2. Using graphviz to convert dot file.
Usage:
./net_diagram.sh
'
DIR="$( cd "$(dirname "$0")" ; pwd -P )"
cd $DIR
img_type=png
img_fileprefix=ResNet_50
conf_filename=resnet.py
dot_filename=ResNet_50.dot
config_str="layer_num=50,data_provider=0"
python -m paddle.utils.make_model_diagram $conf_filename $dot_filename $config_str
# If you have installed graphviz, running like this:
# dot -Tpng -o ResNet.png ResNet.dot
#!/bin/bash
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
python classify.py \
--job=predict \
--conf=resnet.py\
--model=model/resnet_50 \
--multi_crop \
--data=./example/test.list
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
"""
paper: https://arxiv.org/abs/1512.03385
"""
is_test = get_config_arg("is_test", bool, False)
is_predict = get_config_arg("is_predict", bool, False)
data_provider = get_config_arg("data_provider", bool, True)
layer_num = get_config_arg("layer_num", int, 50)
if not is_predict and data_provider:
train_list = 'train.list' if not is_test else None
# mean.meta is mean file of ImageNet dataset.
# mean.meta size : 3 x 224 x 224.
# If you use three mean value, set like:
# "mean_value:103.939,116.779,123.68;"
args={
'mean_meta': "model/mean_meta_224/mean.meta",
'image_size': 224, 'crop_size': 224,
'color': True,'swap_channel:': [2, 1, 0]}
define_py_data_sources2(train_list,
'example/test.list',
module="example.image_list_provider",
obj="processData",
args=args)
batch_size = 1
learning_rate = 0.1 / batch_size
momentum = 0.9
weight_decay = 0.0001 * batch_size
default_momentum(momentum)
default_decay_rate(weight_decay)
Settings(
algorithm='sgd',
batch_size=batch_size,
learning_rate=learning_rate,
# set the appropriate parameters according your schedule
learning_method='momentum',
learning_rate_decay_a=0.5,
learning_rate_decay_b=1200000 * 10,
learning_rate_schedule="discexp",
)
def conv_bn_layer(name, input, filter_size, num_filters,
stride, padding, channels=None,
active_type=ReluActivation()):
"""
A wrapper for conv layer with batch normalization layers.
Note:
conv layer has no activation.
"""
tmp = img_conv_layer(name=name + "_conv",
input=input,
filter_size=filter_size,
num_channels=channels,
num_filters=num_filters,
stride=stride,
padding=padding,
act=LinearActivation(),
bias_attr=False)
return batch_norm_layer(name=name + "_bn",
input=tmp,
act=active_type,
use_global_stats=is_test)
def bottleneck_block(name, input, num_filters1, num_filters2):
"""
A wrapper for bottlenect building block in ResNet.
Last conv_bn_layer has no activation.
Addto layer has activation of relu.
"""
last_name = conv_bn_layer(name=name + '_branch2a',
input=input,
filter_size=1,
num_filters=num_filters1,
stride=1,
padding=0)
last_name = conv_bn_layer(name=name + '_branch2b',
input=last_name,
filter_size=3,
num_filters=num_filters1,
stride=1,
padding=1)
last_name = conv_bn_layer(name=name + '_branch2c',
input=last_name,
filter_size=1,
num_filters=num_filters2,
stride=1,
padding=0,
active_type=LinearActivation())
return addto_layer(name=name + "_addto",
input=[input, last_name],
act=ReluActivation())
def mid_projection(name, input, num_filters1, num_filters2, stride=2):
"""
A wrapper for middile projection in ResNet.
projection shortcuts are used for increasing dimensions,
and other shortcuts are identity
branch1: projection shortcuts are used for increasing
dimensions, has no activation.
branch2x: bottleneck building block, shortcuts are identity.
"""
# stride = 2
branch1 = conv_bn_layer(name=name + '_branch1',
input=input,
filter_size=1,
num_filters=num_filters2,
stride=stride,
padding=0,
active_type=LinearActivation())
last_name = conv_bn_layer(name=name + '_branch2a',
input=input,
filter_size=1,
num_filters=num_filters1,
stride=stride,
padding=0)
last_name = conv_bn_layer(name=name + '_branch2b',
input=last_name,
filter_size=3,
num_filters=num_filters1,
stride=1,
padding=1)
last_name = conv_bn_layer(name=name + '_branch2c',
input=last_name,
filter_size=1,
num_filters=num_filters2,
stride=1,
padding=0,
active_type=LinearActivation())
return addto_layer(name=name + "_addto",
input=[branch1, last_name],
act=ReluActivation())
def deep_res_net(res2_num=3, res3_num=4, res4_num=6, res5_num=3):
"""
A wrapper for 50,101,152 layers of ResNet.
res2_num: number of blocks stacked in conv2_x
res3_num: number of blocks stacked in conv3_x
res4_num: number of blocks stacked in conv4_x
res5_num: number of blocks stacked in conv5_x
"""
# For ImageNet
# conv1: 112x112
img = data_layer(name='input', size=224 * 224 * 3)
tmp = conv_bn_layer("conv1", img,
filter_size=7,
channels=3,
num_filters=64,
stride=2,
padding=3)
tmp = img_pool_layer(name="pool1", input=tmp, pool_size=3, stride=2)
# conv2_x: 56x56
tmp = mid_projection(name="res2_1",
input=tmp,
num_filters1=64,
num_filters2=256,
stride=1)
for i in xrange(2, res2_num + 1, 1):
tmp = bottleneck_block(name="res2_" + str(i),
input=tmp,
num_filters1=64,
num_filters2=256)
# conv3_x: 28x28
tmp = mid_projection(name="res3_1",
input=tmp,
num_filters1=128,
num_filters2=512)
for i in xrange(2, res3_num + 1, 1):
tmp = bottleneck_block(name="res3_" + str(i),
input=tmp, num_filters1=128,
num_filters2=512)
# conv4_x: 14x14
tmp = mid_projection(name="res4_1", input=tmp,
num_filters1=256, num_filters2=1024)
for i in xrange(2, res4_num + 1, 1):
tmp = bottleneck_block(name="res4_" + str(i),
input=tmp,
num_filters1=256,
num_filters2=1024)
# conv5_x: 7x7
tmp = mid_projection(name="res5_1", input=tmp,
num_filters1=512, num_filters2=2048)
for i in xrange(2, res5_num + 1, 1):
tmp = bottleneck_block(name="res5_" + str(i),
input=tmp, num_filters1=512,
num_filters2=2048)
tmp = img_pool_layer(name='avgpool',
input=tmp,
pool_size=7,
stride=1,
pool_type=AvgPooling())
output = fc_layer(name='output',
input=tmp,
size=1000,
act=SoftmaxActivation())
if not is_predict:
classification_cost(input=output, label=data_layer(name='label',
size=1))
def res_net_50():
deep_res_net(3, 4, 6, 3)
def res_net_101():
deep_res_net(3, 4, 23, 3)
def res_net_152():
deep_res_net(3, 8, 36, 3)
if not is_predict:
Inputs("input", "label")
else:
Inputs("input")
# Outputs("cost-softmax" if not is_predict else "output")
Outputs("res5_3_branch2c_conv", "res5_3_branch2c_bn")
if layer_num == 50:
res_net_50()
elif layer_num == 101:
res_net_101()
elif layer_num == 152:
res_net_152()
else:
print("Wrong layer number.")
*.pyc
data/dict.txt
data/dict_all.txt
data/labels.list
data/mosesdecoder-master/
data/reviews_Electronics_5.json.gz
data/test.list
data/test.txt
data/train.list
data/train.txt
dataprovider_copy_1.py
train.log
output
#!/bin/bash
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
DIR="$( cd "$(dirname "$0")" ; pwd -P )"
cd $DIR
echo "Downloading Amazon Electronics reviews data..."
# http://jmcauley.ucsd.edu/data/amazon/
wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz
echo "Downloading mosesdecoder..."
#https://github.com/moses-smt/mosesdecoder
wget https://github.com/moses-smt/mosesdecoder/archive/master.zip
unzip master.zip
rm master.zip
echo "Done."
the device is cute , but that &apos;s just about all that &apos;s good. the specs are what you &apos;d expect : it &apos;s a wifi mic , with some noise filter options. the app has the option to upload your baby &apos;s name and photo , which is a cutesy touch. but the app is otherwise unstable and useless unless you upgrade for $ 60 / year.set up involves downloading the app , turning on the mic , switching your phone to the wifi network of the mic , telling the app your wifi settings , switching your wifi back to your home router. the app is then directly connected to your mic.the app is adware ! the main screen says &quot; cry notifications on / off : upgrade to evoz premium and receive a text message of email when your baby is crying &quot; .but the adware points out an important limitation , this monitor is only intended to be used from your home network. if you want to access it remotely , get a webcam. this app would make a lot more sense of the premium features were included with the hardware .
don &apos;t be fooled by my one star rating. if there was a zero , i would have selected it. this product was a waste of my money.it has never worked like the company said it supposed to. i only have one device , an iphone 4gs. after charging the the iphone mid way , the i.sound portable power max 16,000 mah is completely drained. the led light no longer lit up. when plugging the isound portable power max into a wall outlet to charge , it would charge for about 20-30 minutes and then all four battery led indicator lit up showing a full charge. i would leave it on to charge for the full 8 hours or more but each time with the same result upon using. don &apos;t buy this thing. put your money to good use elsewhere .
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
# id of the word not in dictionary
UNK_IDX = 0
# initializer is called by the framework during initialization.
# It allows the user to describe the data types and setup the
# necessary data structure for later use.
# `settings` is an object. initializer need to properly fill settings.input_types.
# initializer can also store other data structures needed to be used at process().
# In this example, dictionary is stored in settings.
# `dictionay` and `kwargs` are arguments passed from trainer_config.lr.py
def initializer(settings, dictionary, **kwargs):
# Put the word dictionary into settings
settings.word_dict = dictionary
# setting.input_types specifies what the data types the data provider
# generates.
settings.input_types = [
# The first input is a sparse_binary_vector,
# which means each dimension of the vector is either 0 or 1. It is the
# bag-of-words (BOW) representation of the texts.
sparse_binary_vector(len(dictionary)),
# The second input is an integer. It represents the category id of the
# sample. 2 means there are two labels in the dataset.
# (1 for positive and 0 for negative)
integer_value(2)]
# Delaring a data provider. It has an initializer 'data_initialzer'.
# It will cache the generated data of the first pass in memory, so that
# during later pass, no on-the-fly data generation will be needed.
# `setting` is the same object used by initializer()
# `file_name` is the name of a file listed train_list or test_list file given
# to define_py_data_sources2(). See trainer_config.lr.py.
@provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, file_name):
# Open the input data file.
with open(file_name, 'r') as f:
# Read each line.
for line in f:
# Each line contains the label and text of the comment, separated by \t.
label, comment = line.strip().split('\t')
# Split the words into a list.
words = comment.split()
# convert the words into a list of ids by looking them up in word_dict.
word_vector = [settings.word_dict.get(w, UNK_IDX) for w in words]
# Return the features for the current comment. The first is a list
# of ids representing a 0-1 binary sparse vector of the text,
# the second is the integer id of the label.
yield word_vector, int(label)
def predict_initializer(settings, dictionary, **kwargs):
settings.word_dict = dictionary
settings.input_types = [
sparse_binary_vector(len(dictionary))
]
# Declaring a data provider for prediction. The difference with process
# is that label is not generated.
@provider(init_hook=predict_initializer)
def process_predict(settings, file_name):
with open(file_name, 'r') as f:
for line in f:
comment = line.strip()
word_vector = [settings.word_dict.get(w, UNK_IDX) for w in comment]
yield word_vector
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
UNK_IDX = 0
def initializer(settings, dictionary, **kwargs):
settings.word_dict = dictionary
settings.input_types = [
# Define the type of the first input as sequence of integer.
# The value of the integers range from 0 to len(dictrionary)-1
integer_value_sequence(len(dictionary)),
# Define the second input for label id
integer_value(2)]
@provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, file_name):
with open(file_name, 'r') as f:
for line in f:
label, comment = line.strip().split('\t')
words = comment.split()
word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words]
yield word_slot, int(label)
def predict_initializer(settings, dictionary, **kwargs):
settings.word_dict = dictionary
settings.input_types = [
integer_value(len(dictionary), seq_type=SequenceType.SEQUENCE)
]
@provider(init_hook=predict_initializer)
def process_predict(settings, file_name):
with open(file_name, 'r') as f:
for line in f:
comment = line.strip()
word_slot = [settings.word_dict.get(w, UNK_IDX) for w in comment]
yield word_slot
#!/bin/bash
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
#cfg=trainer_config.lr.py
#cfg=trainer_config.emb.py
#cfg=trainer_config.cnn.py
cfg=trainer_config.lstm.py
model="output/pass-00003"
paddle train \
--config=$cfg \
--use_gpu=false \
--job=test \
--init_model_path=$model \
--config_args=is_predict=1 \
--predict_output_dir=. \
mv rank-00000 result.txt
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
'''
1. remove HTML before tokensizing
2. pos sample : rating score 5; neg sample: rating score 1-2.
3. size of pos : neg = 1:1.
4. size of testing set = min(25k, len(all_data) * 0.1), others is traning set.
5. distinct train set and test set.
Usage:
python preprocess.py -i data_file [random seed]
'''
import sys,os
import re
import operator
import gzip,math
import random
import numpy as np
from bs4 import BeautifulSoup
from subprocess import Popen, PIPE
from optparse import OptionParser
def parse(path):
"""
Open .gz file.
"""
g = gzip.open(path, 'r')
for l in g:
yield eval(l)
def clean(review):
"""
Clean input review: remove HTML, convert words to lower cases.
"""
# Remove HTML
review_text = BeautifulSoup(review, "html.parser").get_text()
# Convert words to lower case
review_text = review_text.lower()
return review_text
def tokenize(sentences):
"""
Use tokenizer.perl to tokenize input sentences.
tokenizer.perl is tool of Moses.
sentences : a list of input sentences.
return: a list of processed text.
"""
dir = './data/mosesdecoder-master/scripts/tokenizer/tokenizer.perl'
tokenizer_cmd = [dir, '-l', 'en', '-q', '-']
assert isinstance(sentences, list)
text = "\n".join(sentences)
tokenizer = Popen(tokenizer_cmd, stdin=PIPE, stdout=PIPE)
tok_text, _ = tokenizer.communicate(text)
toks = tok_text.split('\n')[:-1]
return toks
def create_dict(data, data_dir):
"""
Create dictionary based on data, and saved in data_dir/dict.txt.
The first line is unk \t -1.
data: list, input data.
data_dir: path to save dict.
"""
word_count = {}
for seq in data:
try:
for w in seq.lower().split():
if w not in word_count:
word_count[w] = 1
else:
word_count[w] += 1
except:
sys.stderr.write(seq+"\tERROR\n")
f = open(os.path.join(data_dir, 'dict.txt'), 'w')
f.write('%s\t%s\n' % ('unk', '-1'))
for k, v in sorted(word_count.items(), key=operator.itemgetter(1),\
reverse=True):
f.write('%s\t%s\n' % (k, v))
f.close()
def save_data(data, data_dir, prefix = ""):
file_name = os.path.join(data_dir, "%s.txt" % (prefix))
file(file_name,'w').write('\n'.join(data)+'\n')
file(os.path.join(data_dir, prefix+'.list'),'w').write('%s\n' % file_name)
def split_data(raw_txt):
"""
Extract positive and negative sample.
"""
pos = []
neg = []
count = 0
dup_cnt = 0
sys.stderr.write("extract raw data")
for l in raw_txt:
rating = l["overall"]
text = clean(l["reviewText"])
if rating == 5.0 and text:
pos.append(text)
if rating < 3.0 and text:
neg.append(text)
count += 1
if count % 20000==0:
sys.stderr.write(".")
sys.stderr.write("\n")
return pos, neg
def preprocess(pos_in, neg_in, data_dir, rand_seed):
# tokenize
sys.stderr.write("tokenize...\n")
tmppos = tokenize(pos_in)
tmpneg = tokenize(neg_in)
cnt = len(tmppos) + len(tmpneg)
# unique smaples
tmppos = list(set(tmppos))
tmpneg = list(set(tmpneg))
dup_cnt = cnt - len(tmppos) - len(tmpneg)
sys.stderr.write("\ntotal size of data set: %d, duplicate data: %d\n" % (cnt, dup_cnt))
# keep same size of positive and negative sample
min_len = min(len(tmppos), len(tmpneg))
tmppos = tmppos[0:min_len]
tmpneg = tmpneg[0:min_len]
# creat dictionary
sys.stderr.write("create dict with train and test data...\n")
all_data = tmppos + tmpneg
create_dict(all_data, data_dir)
# split into train set and test set
sys.stderr.write("split data...\n")
pos = ["1\t"+i for i in tmppos]
neg = ["0\t"+i for i in tmpneg]
random.seed(rand_seed)
random.shuffle(pos)
random.shuffle(neg)
# split into test set and train set
test_len = min(12500, int(min_len * 0.1))
test = pos[0:test_len] + neg[0:test_len]
train = pos[test_len:] + neg[test_len:]
# save data
sys.stderr.write("save data...\n")
save_data(train, data_dir, prefix = 'train')
save_data(test, data_dir, prefix = 'test')
file(os.path.join(data_dir,'labels.list'),'w').write('neg\t0\npos\t1\n')
def option_parser():
parser = OptionParser(usage="usage: python preprcoess.py "\
"-i data_path [options]")
parser.add_option("-i", "--data", action="store",
dest="input", help="Input data path.")
parser.add_option("-s", "--seed", action="store",
dest="seed", default=1024,
help="Set random seed.")
return parser.parse_args()
def main():
reload(sys)
sys.setdefaultencoding('utf-8')
options, args = option_parser()
data=options.input
seed=options.seed
data_dir = os.path.dirname(data)
pos, neg = split_data(parse(data))
preprocess(pos, neg, data_dir, seed)
sys.stderr.write("Done.\n")
if __name__ == '__main__':
main()
#!/bin/bash
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
python preprocess.py -i data/reviews_Electronics_5.json.gz
# use 30k dict
mv data/dict.txt data/dict_all.txt
cat data/dict_all.txt | head -n 30001 > data/dict.txt
#!/bin/bash
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
cfg=trainer_config.lr.py
#cfg=trainer_config.emb.py
#cfg=trainer_config.cnn.py
#cfg=trainer_config.lstm.py
paddle train \
--config=$cfg \
--save_dir=./output \
--trainer_count=4 \
--log_period=20 \
--num_passes=15 \
--use_gpu=false \
--show_parameter_stats_period=100 \
--test_all_data_in_one_period=1 \
2>&1 | tee 'train.log'
# edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
dict_file = "./data/dict.txt"
word_dict = dict()
with open(dict_file, 'r') as f:
for i, line in enumerate(f):
w = line.strip().split()[0]
word_dict[w] = i
is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict'
define_py_data_sources2(train_list=trn,
test_list=tst,
module="dataprovider_emb",
obj=process,
args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1
settings(
batch_size=batch_size,
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25
)
data = data_layer(name="word", size=len(word_dict))
embedding = embedding_layer(input=data, size=128)
conv = sequence_conv_pool(input=embedding, context_len=3, hidden_size=512)
output = fc_layer(input=conv, size=2, act=SoftmaxActivation())
if is_predict:
maxid = maxid_layer(output)
outputs([maxid, output])
else:
label = data_layer(name="label", size=2)
cls = classification_cost(input=output, label=label)
outputs(cls)
# edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
dict_file = "./data/dict.txt"
word_dict = dict()
with open(dict_file, 'r') as f:
for i, line in enumerate(f):
w = line.strip().split()[0]
word_dict[w] = i
is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict'
define_py_data_sources2(train_list=trn,
test_list=tst,
module="dataprovider_emb",
obj=process,
args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1
settings(
batch_size=batch_size,
learning_rate=2e-3,
learning_method=AdamOptimizer()
)
data = data_layer(name="word", size=len(word_dict))
embedding = embedding_layer(input=data, size=128)
avg = pooling_layer(input=embedding, pooling_type=AvgPooling())
output = fc_layer(input=avg, size=2, act=SoftmaxActivation())
if is_predict:
maxid = maxid_layer(output)
outputs([maxid, output])
else:
label = data_layer(name="label", size=2)
cls = classification_cost(input=output, label=label)
outputs(cls)
# edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
dict_file = "./data/dict.txt"
word_dict = dict()
with open(dict_file, 'r') as f:
for i, line in enumerate(f):
w = line.strip().split()[0]
word_dict[w] = i
is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict'
# define the data sources for the model.
# We need to use different process for training and prediction.
# For training, the input data includes both word IDs and labels.
# For prediction, the input data only includs word Ids.
define_py_data_sources2(train_list=trn,
test_list=tst,
module="dataprovider_bow",
obj=process,
args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1
settings(
batch_size=batch_size,
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25
)
# Define the data for text features. The size of the data layer is the number
# of words in the dictionary.
data = data_layer(name="word", size=len(word_dict))
# Define a fully connected layer with logistic activation.
# (also called softmax activation).
output = fc_layer(input=data, size=2, act=SoftmaxActivation())
if not is_predict:
# For training, we need label and cost
# define the category id for each example.
# The size of the data layer is the number of labels.
label = data_layer(name="label", size=2)
# Define cross-entropy classification loss and error.
classification_cost(input=output, label=label)
cls = classification_cost(input=output, label=label)
outputs(cls)
else:
# For prediction, no label is needed. We need to output
# We need to output classification result, and class probabilities.
maxid = maxid_layer(output)
outputs([maxid, output])
# edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
dict_file = "./data/dict.txt"
word_dict = dict()
with open(dict_file, 'r') as f:
for i, line in enumerate(f):
w = line.strip().split()[0]
word_dict[w] = i
is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict'
define_py_data_sources2(train_list=trn,
test_list=tst,
module="dataprovider_emb",
obj=process,
args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1
settings(
batch_size=batch_size,
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25
)
bias_attr = ParamAttr(initial_std=0.,l2_rate=0.)
data = data_layer(name="word", size=len(word_dict))
emb = embedding_layer(input=data, size=128)
fc = fc_layer(input=emb, size=512,
act=LinearActivation(),
bias_attr=bias_attr,
layer_attr=ExtraAttr(drop_rate=0.1))
lstm = lstmemory(input=fc, act=TanhActivation(),
bias_attr=bias_attr,
layer_attr=ExtraAttr(drop_rate=0.25))
lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
output = fc_layer(input=lstm_last, size=2,
bias_attr=bias_attr,
act=SoftmaxActivation())
if is_predict:
maxid = maxid_layer(output)
outputs([maxid, output])
else:
label = data_layer(name="label", size=2)
cls = classification_cost(input=output, label=label)
outputs(cls)
log.txt
data/meta.bin
data/ml-1m
data/ratings.dat.train
data/ratings.dat.test
data/train.list
data/test.list
dataprovider_copy_1.py
*.pyc
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
def meta_to_header(meta, name):
metas = meta[name]['__meta__']['raw_meta']
for each_meta in metas:
if each_meta['type'] == 'id':
yield integer_value(each_meta['max'])
elif each_meta['type'] == 'embedding':
is_seq = each_meta['seq'] == 'sequence'
yield integer_value(len(each_meta['dict']),
seq_type=SequenceType.SEQUENCE if is_seq
else SequenceType.NO_SEQUENCE)
elif each_meta['type'] == 'one_hot_dense':
yield dense_vector(len(each_meta['dict']))
{
"user": {
"file": {
"name": "users.dat",
"delimiter": "::"
},
"fields": ["id", "gender", "age", "occupation"]
},
"movie": {
"file": {
"name": "movies.dat",
"delimiter": "::"
},
"fields": ["id", "title", "genres"]
}
}
#!/bin/env python2
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
config_generator.py
Usage:
./config_generator.py <config_file> [--output_format=<output_format>]
./config_generator.py -h | --help
Options:
-h --help Show this screen.
--output_format=<output_format> Output Config format(json or yaml) [default: json].
"""
import json
import docopt
import copy
DEFAULT_FILE = {
"type": "split",
"delimiter": ","
}
DEFAULT_FIELD = {
"id": {
"type": "id"
},
"gender": {
"name": "gender",
"type": "embedding",
"dict": {
"type": "char_based"
}
},
"age": {
"name": "age",
"type": "embedding",
"dict": {
"type": "whole_content",
"sort": True
}
},
"occupation": {
"name": "occupation",
"type": "embedding",
"dict": {
"type": "whole_content",
"sort": "true"
}
},
"title": {
"regex": {
"pattern": r"^(.*)\((\d+)\)$",
"group_id": 1,
"strip": True
},
"name": "title",
"type": {
"name": "embedding",
"seq_type": "sequence",
},
"dict": {
"type": "char_based"
}
},
"genres": {
"type": "one_hot_dense",
"dict": {
"type": "split",
"delimiter": "|"
},
"name": "genres"
}
}
def merge_dict(master_dict, slave_dict):
return dict(((k, master_dict.get(k) or slave_dict.get(k))
for k in set(slave_dict) | set(master_dict)))
def main(filename, fmt):
with open(filename, 'r') as f:
conf = json.load(f)
obj = dict()
for k in conf:
val = conf[k]
file_dict = val['file']
file_dict = merge_dict(file_dict, DEFAULT_FILE)
fields = []
for pos, field_key in enumerate(val['fields']):
assert isinstance(field_key, basestring)
field = copy.deepcopy(DEFAULT_FIELD[field_key])
field['pos'] = pos
fields.append(field)
obj[k] = {
"file": file_dict,
"fields": fields
}
meta = {
"meta": obj
}
# print meta
if fmt == 'json':
def formatter(x):
import json
return json.dumps(x, indent=2)
elif fmt == 'yaml':
def formatter(x):
import yaml
return yaml.safe_dump(x, default_flow_style=False)
else:
raise NotImplementedError("Dump format %s is not implemented" % fmt)
print formatter(meta)
if __name__ == '__main__':
args = docopt.docopt(__doc__, version="0.1.0")
main(args["<config_file>"], args["--output_format"])
{
"meta": {
"movie": {
"fields": [
{
"type": "id",
"pos": 0
},
{
"regex": {
"pattern": "^(.*)\\((\\d+)\\)$",
"group_id": 1,
"strip": true
},
"type": {
"seq_type": "sequence",
"name": "embedding"
},
"dict": {
"type": "char_based"
},
"name": "title",
"pos": 1
},
{
"type": "one_hot_dense",
"dict": {
"delimiter": "|",
"type": "split"
},
"name": "genres",
"pos": 2
}
],
"file": {
"delimiter": "::",
"type": "split",
"name": "movies.dat"
}
},
"user": {
"fields": [
{
"type": "id",
"pos": 0
},
{
"type": "embedding",
"dict": {
"type": "char_based"
},
"name": "gender",
"pos": 1
},
{
"type": "embedding",
"dict": {
"sort": true,
"type": "whole_content"
},
"name": "age",
"pos": 2
},
{
"type": "embedding",
"dict": {
"sort": "true",
"type": "whole_content"
},
"name": "occupation",
"pos": 3
}
],
"file": {
"delimiter": "::",
"type": "split",
"name": "users.dat"
}
}
}
}
此差异已折叠。
#!/bin/bash
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -ex
cd "$(dirname "$0")"
# download the dataset
wget http://files.grouplens.org/datasets/movielens/ml-1m.zip
# unzip the dataset
unzip ml-1m.zip
# remove the unused zip file
rm ml-1m.zip
#!/bin/env python2
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Separate movielens 1m dataset to train/test file.
Usage:
./separate.py <input_file> [--test_ratio=<test_ratio>] [--delimiter=<delimiter>]
./separate.py -h | --help
Options:
-h --help Show this screen.
--version Show version.
--test_ratio=<test_ratio> Test ratio for separate [default: 0.1].
--delimiter=<delimiter> File delimiter [default: ,].
"""
import docopt
import collections
import random
def process(test_ratio, input_file, delimiter, **kwargs):
test_ratio = float(test_ratio)
rating_dict = collections.defaultdict(list)
with open(input_file, 'r') as f:
for line in f:
user_id = int(line.split(delimiter)[0])
rating_dict[user_id].append(line.strip())
with open(input_file + ".train", 'w') as train_file:
with open(input_file + ".test", 'w') as test_file:
for k in rating_dict.keys():
lines = rating_dict[k]
assert isinstance(lines, list)
random.shuffle(lines)
test_len = int(len(lines) * test_ratio)
for line in lines[:test_len]:
print >> test_file, line
for line in lines[test_len:]:
print >> train_file, line
if __name__ == '__main__':
args = docopt.docopt(__doc__, version='0.1.0')
kwargs = dict()
for key in args.keys():
if key != '--help':
param_name = key
assert isinstance(param_name, str)
param_name = param_name.replace('<', '')
param_name = param_name.replace('>', '')
param_name = param_name.replace('--', '')
kwargs[param_name] = args[key]
process(**kwargs)
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
try:
import cPickle as pickle
except ImportError:
import pickle
from paddle.trainer.PyDataProvider2 import *
import common_utils # parse
def hook(settings, meta, **kwargs):
"""
Init hook is invoked before process data. It will set obj.slots and store
data meta.
:param obj: global object. It will passed to process routine.
:type obj: object
:param meta: the meta file object, which passed from trainer_config. Meta
file record movie/user features.
:param kwargs: unused other arguments.
"""
del kwargs # unused kwargs
# Header define slots that used for paddle.
# first part is movie features.
# second part is user features.
# final part is rating score.
# header is a list of [USE_SEQ_OR_NOT?, SlotType]
headers = list(common_utils.meta_to_header(meta, 'movie'))
headers.extend(list(common_utils.meta_to_header(meta, 'user')))
headers.append(dense_vector(1)) # Score
# slot types.
settings.input_types = headers
settings.meta = meta
@provider(init_hook=hook, cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, filename):
with open(filename, 'r') as f:
for line in f:
# Get a rating from file.
user_id, movie_id, score = map(int, line.split('::')[:-1])
# Scale score to [-5, +5]
score = float(score) * 2 - 5.0
# Get movie/user features by movie_id, user_id
movie_meta = settings.meta['movie'][movie_id]
user_meta = settings.meta['user'][user_id]
outputs = [movie_id - 1]
# Then add movie features
for each_meta in movie_meta:
outputs.append(each_meta)
# Then add user id.
outputs.append(user_id - 1)
# Then add user features.
for each_meta in user_meta:
outputs.append(each_meta)
# Finally, add score
outputs.append([score])
# Return data to paddle
yield outputs
#!/bin/bash
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
function get_best_pass() {
cat $1 | grep -Pzo 'Test .*\n.*pass-.*' | sed -r 'N;s/Test.* cost=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' | sort | head -n 1
}
LOG=`get_best_pass log.txt`
LOG=(${LOG})
echo 'Best pass is '${LOG[1]}, ' error is '${LOG[0]}, 'which means predict get error as '`echo ${LOG[0]} | python -c 'import math; print math.sqrt(float(raw_input()))/2'`
evaluate_pass="output/pass-${LOG[1]}"
echo 'evaluating from pass '$evaluate_pass
#!/bin/env python2
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from py_paddle import swig_paddle, DataProviderWrapperConverter
from common_utils import *
from paddle.trainer.config_parser import parse_config
try:
import cPickle as pickle
except ImportError:
import pickle
import sys
if __name__ == '__main__':
model_path = sys.argv[1]
swig_paddle.initPaddle('--use_gpu=0')
conf = parse_config("trainer_config.py", "is_predict=1")
network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
assert isinstance(network, swig_paddle.GradientMachine)
network.loadParameters(model_path)
with open('meta.bin', 'rb') as f:
meta = pickle.load(f)
headers = list(meta_to_header(meta, 'movie'))
headers.extend(list(meta_to_header(meta, 'user')))
cvt = DataProviderWrapperConverter(True, map(lambda x: x[1], headers))
while True:
movie_id = int(raw_input("Input movie_id: "))
user_id = int(raw_input("Input user_id: "))
movie_meta = meta['movie'][movie_id] # Query Data From Meta.
user_meta = meta['user'][user_id]
data = [movie_id - 1]
data.extend(movie_meta)
data.append(user_id - 1)
data.extend(user_meta)
data = map(lambda (header, val): val if header[0] else [val],
zip(headers, data))
print "Prediction Score is %.2f" % ((network.forwardTest(cvt([
data]))[0]['value'][0][0] + 5) / 2)
#!/bin/bash
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
cd "$(dirname "$0")"
delimiter='::'
dir=ml-1m
cd data
echo 'generate meta config file'
python config_generator.py config.json > meta_config.json
echo 'generate meta file'
python meta_generator.py $dir meta.bin --config=meta_config.json
echo 'split train/test file'
python split.py $dir/ratings.dat --delimiter=${delimiter} --test_ratio=0.1
echo 'shuffle train file'
shuf $dir/ratings.dat.train > ratings.dat.train
cp $dir/ratings.dat.test .
echo "./data/ratings.dat.train" > train.list
echo "./data/ratings.dat.test" > test.list
#!/bin/bash
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
paddle train \
--config=trainer_config.py \
--save_dir=./output \
--use_gpu=false \
--trainer_count=4\
--test_all_data_in_one_period=true \
--log_period=100 \
--dot_period=1 \
--num_passes=50 2>&1 | tee 'log.txt'
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册