提交 d9a1f3e5 编写于 作者: W wopeizl 提交者: dzhwinter

Windows/online (#14474)

* add recordio support

* disable the openblas multi-thread on windows since no support
adjust the python script

* code style

* code style
test=develop

* add create_recordio_file_reader back

* fix code style
test=develop

* fix the gtest.cmake on windows

* fix cc_test on windows

* fix the win build
test=develop

* remove fused compile support on windows
test=develop

* add the jit support
test=develop

* add the jit support, test=develop

* add the jit support, test=develop

* add the jit back
fix compile error on windows

* rollback test=develop

* test case fix

* disable DSO by default on windows

* exclude warpctc_op on windows

* exclude the dynload_warpctc out on windows
test=develop

* fix the scripts error
test=develop

* disable avx on windows by default
test=develop

* re-organize the cmake file

* disable mkl on windows by default

* add warp_ctc back

* fix the dependency

* fix the dependency

* fix the build issue on windows

* remove unsupported flag on windows

* code style

* code style
test=develop

* fix issue

* add profiler, parallel_executor back

* clean up the pre-definitions on windows

* fix build issue

* test=develop
上级 dc75cc91
...@@ -130,6 +130,21 @@ if (APPLE OR WIN32) ...@@ -130,6 +130,21 @@ if (APPLE OR WIN32)
"Disable MKL for building on mac and windows" FORCE) "Disable MKL for building on mac and windows" FORCE)
endif() endif()
if (WIN32)
set(WITH_AVX OFF CACHE STRING
"Disable AVX when compiling for Windows" FORCE)
set(WITH_DSO OFF CACHE STRING
"Disable DSO when compiling for Windows" FORCE)
set(WITH_MKL OFF CACHE STRING
"Disable MKL when compiling for Windows" FORCE)
set(WITH_DISTRIBUTE OFF CACHE STRING
"Disable DISTRIBUTE when compiling for Windows" FORCE)
set(WITH_C_API OFF CACHE STRING
"Disable C_API when compiling for Windows" FORCE)
set(WITH_FLUID_ONLY ON CACHE STRING
"Enable FLUID_ONLY when compiling for Windows" FORCE)
endif()
set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
"A path setting third party libraries download & build directories.") "A path setting third party libraries download & build directories.")
...@@ -190,11 +205,11 @@ include(external/pybind11) # download pybind11 ...@@ -190,11 +205,11 @@ include(external/pybind11) # download pybind11
include(external/cares) include(external/cares)
include(external/cub) include(external/cub)
include(external/xxhash) # download xxhash include(external/xxhash) # download xxhash
if (NOT WIN32)
# there is no official support of snappystream, warpctc, nccl, cupti in windows
include(external/snappy) # download snappy include(external/snappy) # download snappy
include(external/snappystream) # download snappystream include(external/snappystream) # download snappystream
if (NOT WIN32)
# there is no official support of warpctc, nccl, cupti in windows
include(external/warpctc) # download, build, install warpctc include(external/warpctc) # download, build, install warpctc
include(cupti) include(cupti)
endif (NOT WIN32) endif (NOT WIN32)
......
...@@ -50,7 +50,11 @@ IF(WITH_TESTING) ...@@ -50,7 +50,11 @@ IF(WITH_TESTING)
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-DCMAKE_INSTALL_PREFIX=${GTEST_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX=${GTEST_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DBUILD_GMOCK=ON -DBUILD_GMOCK=ON
......
...@@ -24,7 +24,11 @@ set(SNAPPY_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy) ...@@ -24,7 +24,11 @@ set(SNAPPY_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy)
set(SNAPPY_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy) set(SNAPPY_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy)
set(SNAPPY_INCLUDE_DIR "${SNAPPY_INSTALL_DIR}/include" CACHE PATH "snappy include directory." FORCE) set(SNAPPY_INCLUDE_DIR "${SNAPPY_INSTALL_DIR}/include" CACHE PATH "snappy include directory." FORCE)
set(SNAPPY_LIBRARIES "${SNAPPY_INSTALL_DIR}/lib/libsnappy.a") if (WIN32)
set(SNAPPY_LIBRARIES "${SNAPPY_INSTALL_DIR}/lib/snappy.lib")
else(WIN32)
set(SNAPPY_LIBRARIES "${SNAPPY_INSTALL_DIR}/lib/libsnappy.a")
endif (WIN32)
ExternalProject_Add( ExternalProject_Add(
extern_snappy extern_snappy
...@@ -34,8 +38,12 @@ ExternalProject_Add( ...@@ -34,8 +38,12 @@ ExternalProject_Add(
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_INSTALL_PREFIX=${SNAPPY_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX=${SNAPPY_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR=${SNAPPY_INSTALL_DIR}/lib -DCMAKE_INSTALL_LIBDIR=${SNAPPY_INSTALL_DIR}/lib
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
......
...@@ -18,36 +18,45 @@ ENDIF() ...@@ -18,36 +18,45 @@ ENDIF()
include (ExternalProject) include (ExternalProject)
# NOTE: snappy is needed when linking with recordio
set(SNAPPYSTREAM_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy_stream) set(SNAPPYSTREAM_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy_stream)
set(SNAPPYSTREAM_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy_stream) set(SNAPPYSTREAM_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy_stream)
set(SNAPPYSTREAM_INCLUDE_DIR "${SNAPPYSTREAM_INSTALL_DIR}/include" CACHE PATH "snappy stream include directory." FORCE) set(SNAPPYSTREAM_INCLUDE_DIR "${SNAPPYSTREAM_INSTALL_DIR}/include" CACHE PATH "snappy stream include directory." FORCE)
set(SNAPPYSTREAM_LIBRARIES "${SNAPPYSTREAM_INSTALL_DIR}/lib/libsnappystream.a") if(WIN32)
# Fix me, VS2015 come without VLA support
ExternalProject_Add( set(SNAPPYSTREAM_LIBRARIES "${SNAPPYSTREAM_INSTALL_DIR}/lib/snappystream.lib")
extern_snappystream MESSAGE(WARNING, "In windows, snappystream has no compile support for windows,
GIT_REPOSITORY "https://github.com/hoxnox/snappystream.git" please build it manually and put it at " ${SNAPPYSTREAM_INSTALL_DIR})
GIT_TAG "0.2.8" else(WIN32)
PREFIX ${SNAPPYSTREAM_SOURCES_DIR} set(SNAPPYSTREAM_LIBRARIES "${SNAPPYSTREAM_INSTALL_DIR}/lib/libsnappystream.a")
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} ExternalProject_Add(
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} extern_snappystream
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} GIT_REPOSITORY "https://github.com/hoxnox/snappystream.git"
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} GIT_TAG "0.2.8"
-DCMAKE_INSTALL_PREFIX=${SNAPPY_INSTALL_DIR} PREFIX ${SNAPPYSTREAM_SOURCES_DIR}
-DCMAKE_INSTALL_LIBDIR=${SNAPPY_INSTALL_DIR}/lib UPDATE_COMMAND ""
-DCMAKE_POSITION_INDEPENDENT_CODE=ON CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DSNAPPY_ROOT=${SNAPPY_INSTALL_DIR} -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
${EXTERNAL_OPTIONAL_ARGS} -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
CMAKE_CACHE_ARGS -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-DCMAKE_INSTALL_PREFIX:PATH=${SNAPPYSTREAM_INSTALL_DIR} -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_INSTALL_LIBDIR:PATH=${SNAPPYSTREAM_INSTALL_DIR}/lib -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
DEPENDS snappy -DCMAKE_INSTALL_PREFIX=${SNAPPY_INSTALL_DIR}
) -DCMAKE_INSTALL_LIBDIR=${SNAPPY_INSTALL_DIR}/lib
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
-DSNAPPY_ROOT=${SNAPPY_INSTALL_DIR}
${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS
-DCMAKE_INSTALL_PREFIX:PATH=${SNAPPYSTREAM_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR:PATH=${SNAPPYSTREAM_INSTALL_DIR}/lib
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
DEPENDS snappy
)
endif(WIN32)
add_library(snappystream STATIC IMPORTED GLOBAL) add_library(snappystream STATIC IMPORTED GLOBAL)
set_property(TARGET snappystream PROPERTY IMPORTED_LOCATION ${SNAPPYSTREAM_LIBRARIES}) set_property(TARGET snappystream PROPERTY IMPORTED_LOCATION ${SNAPPYSTREAM_LIBRARIES})
......
...@@ -351,6 +351,9 @@ function(cc_test TARGET_NAME) ...@@ -351,6 +351,9 @@ function(cc_test TARGET_NAME)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_test_SRCS}) add_executable(${TARGET_NAME} ${cc_test_SRCS})
target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
if(WIN32)
target_link_libraries(${TARGET_NAME} shlwapi)
endif(WIN32)
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
COMMAND ${TARGET_NAME} ${cc_test_ARGS} COMMAND ${TARGET_NAME} ${cc_test_ARGS}
......
...@@ -84,9 +84,7 @@ function(op_library TARGET) ...@@ -84,9 +84,7 @@ function(op_library TARGET)
endif() endif()
if (WIN32) if (WIN32)
# remove windows unsupported op, because windows has no nccl, no warpctc such ops. # remove windows unsupported op, because windows has no nccl, no warpctc such ops.
foreach(windows_unsupport_op "nccl_op" "gen_nccl_id_op" "warpctc_op" "hierarchical_sigmoid_op" foreach(windows_unsupport_op "nccl_op" "gen_nccl_id_op" "warpctc_op")
"crf_decoding_op" "select_op" "lstmp_op" "gru_op" "fusion_gru_op" "lstm_op" "fusion_lstm_op" "cumsum_op"
"fusion_seqconv_eltadd_relu_op" "channel_send_op" "channel_create_op" "channel_close_op" "channel_recv_op")
if ("${TARGET}" STREQUAL "${windows_unsupport_op}") if ("${TARGET}" STREQUAL "${windows_unsupport_op}")
return() return()
endif() endif()
......
...@@ -57,43 +57,46 @@ int main() ...@@ -57,43 +57,46 @@ int main()
return 0; return 0;
}" SSE3_FOUND) }" SSE3_FOUND)
# Check AVX # disable AVX by default on windows
set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG}) if(NOT WIN32)
set(AVX_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE) # Check AVX
CHECK_CXX_SOURCE_RUNS(" set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG})
#include <immintrin.h> set(AVX_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
int main() CHECK_CXX_SOURCE_RUNS("
{ #include <immintrin.h>
__m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f); int main()
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); {
__m256 result = _mm256_add_ps (a, b); __m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
return 0; __m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
}" AVX_FOUND) __m256 result = _mm256_add_ps (a, b);
return 0;
}" AVX_FOUND)
# Check AVX 2 # Check AVX 2
set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG}) set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG})
set(AVX2_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE) set(AVX2_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS(" CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h> #include <immintrin.h>
int main() int main()
{ {
__m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4); __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
__m256i result = _mm256_abs_epi32 (a); __m256i result = _mm256_abs_epi32 (a);
return 0; return 0;
}" AVX2_FOUND) }" AVX2_FOUND)
# Check AVX512F # Check AVX512F
set(CMAKE_REQUIRED_FLAGS ${AVX512F_FLAG}) set(CMAKE_REQUIRED_FLAGS ${AVX512F_FLAG})
set(AVX512F_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE) set(AVX512F_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS(" CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h> #include <immintrin.h>
int main() int main()
{ {
__m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4, __m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4,
13, -5, 6, -7, 9, 2, -6, 3); 13, -5, 6, -7, 9, 2, -6, 3);
__m512i result = _mm512_abs_epi32 (a); __m512i result = _mm512_abs_epi32 (a);
return 0; return 0;
}" AVX512F_FOUND) }" AVX512F_FOUND)
endif(NOT WIN32)
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_RETAINED}) set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_RETAINED})
mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND AVX512F_FOUND) mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND AVX512F_FOUND)
...@@ -3,13 +3,9 @@ add_subdirectory(platform) ...@@ -3,13 +3,9 @@ add_subdirectory(platform)
add_subdirectory(framework) add_subdirectory(framework)
add_subdirectory(operators) add_subdirectory(operators)
add_subdirectory(string) add_subdirectory(string)
add_subdirectory(pybind)
if (NOT WIN32)
add_subdirectory(recordio) add_subdirectory(recordio)
endif(NOT WIN32) add_subdirectory(pybind)
# NOTE: please add subdirectory inference at last. # NOTE: please add subdirectory inference at last.
add_subdirectory(inference) add_subdirectory(inference)
add_subdirectory(train) add_subdirectory(train)
...@@ -31,9 +31,7 @@ function(windows_symbolic TARGET) ...@@ -31,9 +31,7 @@ function(windows_symbolic TARGET)
endfunction() endfunction()
add_subdirectory(ir) add_subdirectory(ir)
if (NOT WIN32)
add_subdirectory(details) add_subdirectory(details)
endif (NOT WIN32)
# ddim lib # ddim lib
proto_library(framework_proto SRCS framework.proto) proto_library(framework_proto SRCS framework.proto)
...@@ -68,11 +66,7 @@ if(WITH_GPU) ...@@ -68,11 +66,7 @@ if(WITH_GPU)
else() else()
cc_test(mixed_vector_test SRCS mixed_vector_test.cc DEPS place memory device_context tensor) cc_test(mixed_vector_test SRCS mixed_vector_test.cc DEPS place memory device_context tensor)
endif() endif()
if (NOT WIN32) cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto recordio version)
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto recordio version)
else()
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto version)
endif (NOT WIN32)
cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory) cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory)
nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor) nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor)
...@@ -122,13 +116,8 @@ cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker) ...@@ -122,13 +116,8 @@ cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto) cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context) cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context)
if (NOT WIN32)
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog
shape_inference data_transform lod_tensor profiler) shape_inference data_transform lod_tensor profiler)
else()
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog
shape_inference data_transform lod_tensor)
endif(NOT WIN32)
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context)
...@@ -183,12 +172,10 @@ else() ...@@ -183,12 +172,10 @@ else()
cc_test(test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor elementwise_add_op) cc_test(test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor elementwise_add_op)
endif() endif()
if (NOT WIN32)
cc_library(parallel_executor SRCS parallel_executor.cc DEPS cc_library(parallel_executor SRCS parallel_executor.cc DEPS
threaded_ssa_graph_executor scope_buffered_ssa_graph_executor threaded_ssa_graph_executor scope_buffered_ssa_graph_executor
graph build_strategy graph build_strategy
fast_threaded_ssa_graph_executor) fast_threaded_ssa_graph_executor)
endif() # NOT WIN32
cc_library(prune SRCS prune.cc DEPS framework_proto) cc_library(prune SRCS prune.cc DEPS framework_proto)
cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context)
......
...@@ -13,9 +13,9 @@ ...@@ -13,9 +13,9 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include <ThreadPool.h>
#include <string> #include <string>
#include <vector> #include <vector>
#include "ThreadPool.h"
#include "paddle/fluid/framework/blocking_queue.h" #include "paddle/fluid/framework/blocking_queue.h"
#include "paddle/fluid/framework/details/exception_holder.h" #include "paddle/fluid/framework/details/exception_holder.h"
#include "paddle/fluid/framework/details/execution_strategy.h" #include "paddle/fluid/framework/details/execution_strategy.h"
......
...@@ -13,11 +13,6 @@ See the License for the specific language governing permissions and ...@@ -13,11 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
// logging.h and windows.h conflict
#define GLOG_NO_ABBREVIATED_SEVERITIES
// solve static linking error in windows
// https://github.com/google/glog/issues/301
#define GOOGLE_GLOG_DLL_DECL
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#include "unsupported/Eigen/CXX11/Tensor" #include "unsupported/Eigen/CXX11/Tensor"
......
...@@ -23,11 +23,6 @@ limitations under the License. */ ...@@ -23,11 +23,6 @@ limitations under the License. */
#include <unordered_map> #include <unordered_map>
#include <unordered_set> #include <unordered_set>
#if defined(_WIN32)
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#define GOOGLE_GLOG_DLL_DECL
#endif
#include "glog/logging.h" // For VLOG() #include "glog/logging.h" // For VLOG()
#include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/details/op_registry.h" #include "paddle/fluid/framework/details/op_registry.h"
......
...@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, ...@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include <gflags/gflags.h> #include <gflags/gflags.h>
#include <glog/logging.h> #include <glog/logging.h>
......
...@@ -20,8 +20,6 @@ limitations under the License. */ ...@@ -20,8 +20,6 @@ limitations under the License. */
#include <tuple> #include <tuple>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include "glog/logging.h" // For VLOG #include "glog/logging.h" // For VLOG
#include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/attribute.h"
......
...@@ -14,12 +14,6 @@ limitations under the License. */ ...@@ -14,12 +14,6 @@ limitations under the License. */
#pragma once #pragma once
// logging.h and windows.h conflict
#define GLOG_NO_ABBREVIATED_SEVERITIES
// solve static linking error in windows
// https://github.com/google/glog/issues/301
#define GOOGLE_GLOG_DLL_DECL
#include <glog/logging.h> #include <glog/logging.h>
#include <map> #include <map>
#include <memory> #include <memory>
......
...@@ -15,6 +15,12 @@ ...@@ -15,6 +15,12 @@
#pragma once #pragma once
#include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/memory/allocation/allocator.h"
#ifdef _WIN32
#define posix_memalign_free _aligned_free
#define posix_memalign(p, a, s) \
(((*(p)) = _aligned_malloc((s), (a))), *(p) ? 0 : errno)
#endif
namespace paddle { namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
......
...@@ -22,9 +22,7 @@ if(WITH_DISTRIBUTE) ...@@ -22,9 +22,7 @@ if(WITH_DISTRIBUTE)
add_subdirectory(distributed_ops) add_subdirectory(distributed_ops)
endif() endif()
if (NOT WIN32) add_subdirectory(reader)
add_subdirectory(reader)
endif()
if (NOT WIN32) if (NOT WIN32)
add_subdirectory(nccl) add_subdirectory(nccl)
...@@ -42,7 +40,7 @@ endif() ...@@ -42,7 +40,7 @@ endif()
register_operators(EXCLUDES warpctc_op conv_fusion_op DEPS ${OP_HEADER_DEPS}) register_operators(EXCLUDES warpctc_op conv_fusion_op DEPS ${OP_HEADER_DEPS})
# warpctc_op needs cudnn 7 above # warpctc_op needs cudnn 7 above
if (WITH_GPU) if (WITH_GPU AND NOT WIN32)
if (${CUDNN_MAJOR_VERSION} VERSION_LESS 7) if (${CUDNN_MAJOR_VERSION} VERSION_LESS 7)
op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale SRCS warpctc_op.cc warpctc_op.cu.cc) op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale SRCS warpctc_op.cc warpctc_op.cu.cc)
else() else()
...@@ -59,10 +57,12 @@ endif() ...@@ -59,10 +57,12 @@ endif()
set(COMMON_OP_DEPS ${OP_HEADER_DEPS}) set(COMMON_OP_DEPS ${OP_HEADER_DEPS})
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project sequence_pooling executor dynload_warpctc sequence_padding sequence_scale cos_sim_functor memory jit_kernel concat_and_split cross_entropy softmax vol2col im2col sampler) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project sequence_pooling executor)
if (NOT WIN32) if (NOT WIN32)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dynload_warpctc)
endif() endif()
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence_padding sequence_scale cos_sim_functor memory jit_kernel concat_and_split cross_entropy softmax vol2col im2col sampler)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions)
if (WITH_GPU) if (WITH_GPU)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} depthwise_conv) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} depthwise_conv)
endif() endif()
......
...@@ -111,7 +111,7 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> { ...@@ -111,7 +111,7 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
auto pre_out_mat = EigenMatrix<T>::From(*pre_out); auto pre_out_mat = EigenMatrix<T>::From(*pre_out);
auto pre_out_grad_mat = EigenMatrix<T>::From(pre_out_grad); auto pre_out_grad_mat = EigenMatrix<T>::From(pre_out_grad);
auto out_grad_mat = EigenMatrix<T>::From(*out_grad); auto out_grad_mat = EigenMatrix<T>::From(*out_grad);
Eigen::array<int, 2> bcast({{1, static_cast<int>(pre_out_grad.dims()[1])}}); Eigen::array<int, 2> bcast{1, static_cast<int>(pre_out_grad.dims()[1])};
// softrelu derivative // softrelu derivative
pre_out_grad_mat.device(place) = pre_out_grad_mat.device(place) =
......
if (NOT WIN32) add_subdirectory(detail)
add_subdirectory(detail)
endif(NOT WIN32)
function(math_library TARGET) function(math_library TARGET)
# math_library is a function to create math library. # math_library is a function to create math library.
...@@ -43,10 +41,8 @@ math_library(depthwise_conv) ...@@ -43,10 +41,8 @@ math_library(depthwise_conv)
math_library(im2col) math_library(im2col)
math_library(sampler) math_library(sampler)
if (NOT WIN32) # windows do not support avx functions yet. math_library(gru_compute DEPS activation_functions math_function)
math_library(gru_compute DEPS activation_functions math_function) math_library(lstm_compute DEPS activation_functions)
math_library(lstm_compute DEPS activation_functions)
endif (NOT WIN32)
cc_library(blas SRCS blas.cc DEPS cblas framework_proto device_context) cc_library(blas SRCS blas.cc DEPS cblas framework_proto device_context)
math_library(math_function DEPS blas) math_library(math_function DEPS blas)
...@@ -58,9 +54,9 @@ math_library(sequence_padding) ...@@ -58,9 +54,9 @@ math_library(sequence_padding)
math_library(sequence_pooling DEPS math_function) math_library(sequence_pooling DEPS math_function)
math_library(sequence_scale) math_library(sequence_scale)
math_library(softmax DEPS math_function) math_library(softmax DEPS math_function)
if (NOT WIN32)
math_library(matrix_bit_code) math_library(matrix_bit_code)
endif (NOT WIN32)
math_library(unpooling) math_library(unpooling)
math_library(vol2col) math_library(vol2col)
...@@ -76,13 +72,12 @@ if(WITH_GPU) ...@@ -76,13 +72,12 @@ if(WITH_GPU)
endif() endif()
cc_test(concat_test SRCS concat_test.cc DEPS concat_and_split) cc_test(concat_test SRCS concat_test.cc DEPS concat_and_split)
cc_test(cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info) cc_test(cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info)
if (NOT WIN32)
set(JIT_KERNEL_SRCS jit_kernel.cc jit_kernel_blas.cc jit_kernel_exp.cc jit_kernel_rnn.cc jit_kernel_crf_decode.cc jit_kernel_layer_norm.cc) set(JIT_KERNEL_SRCS jit_kernel.cc jit_kernel_blas.cc jit_kernel_exp.cc jit_kernel_rnn.cc jit_kernel_crf_decode.cc jit_kernel_layer_norm.cc)
set(JIT_KERNEL_DEPS cpu_info cblas gflags enforce) set(JIT_KERNEL_DEPS cpu_info cblas gflags enforce)
if(WITH_XBYAK) if(WITH_XBYAK)
list(APPEND JIT_KERNEL_SRCS jit_gen.cc jit_code.cc) list(APPEND JIT_KERNEL_SRCS jit_gen.cc jit_code.cc)
list(APPEND JIT_KERNEL_DEPS xbyak) list(APPEND JIT_KERNEL_DEPS xbyak)
endif() endif()
cc_library(jit_kernel SRCS ${JIT_KERNEL_SRCS} DEPS ${JIT_KERNEL_DEPS}) cc_library(jit_kernel SRCS ${JIT_KERNEL_SRCS} DEPS ${JIT_KERNEL_DEPS})
cc_test(jit_kernel_test SRCS jit_kernel_test.cc DEPS jit_kernel) cc_test(jit_kernel_test SRCS jit_kernel_test.cc DEPS jit_kernel)
endif (NOT WIN32)
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once #pragma once
#include <math.h> #include <math.h>
#include <string> #include <string>
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/hostdevice.h" #include "paddle/fluid/platform/hostdevice.h"
......
...@@ -67,7 +67,7 @@ inline constexpr size_t FindLastSet(size_t x) { ...@@ -67,7 +67,7 @@ inline constexpr size_t FindLastSet(size_t x) {
: (std::is_same<size_t, unsigned long>::value // NOLINT : (std::is_same<size_t, unsigned long>::value // NOLINT
? (x ? 8 * sizeof(x) - __builtin_clzl(x) : 0) ? (x ? 8 * sizeof(x) - __builtin_clzl(x) : 0)
: (x ? 8 * sizeof(x) - __builtin_clzll(x) : 0)); : (x ? 8 * sizeof(x) - __builtin_clzll(x) : 0));
}
#else #else
// windows don't have built-in clz, ctz function // windows don't have built-in clz, ctz function
template <typename T> template <typename T>
...@@ -92,7 +92,6 @@ inline int clz(const T& value) { ...@@ -92,7 +92,6 @@ inline int clz(const T& value) {
inline size_t FindLastSet(size_t x) { return sizeof(size_t) * 8 - clz(x); } inline size_t FindLastSet(size_t x) { return sizeof(size_t) * 8 - clz(x); }
#endif // !_WIN32 #endif // !_WIN32
}
struct SimpleCode { struct SimpleCode {
SimpleCode(size_t code, size_t num_classes) : c_(code + num_classes) {} SimpleCode(size_t code, size_t num_classes) : c_(code + num_classes) {}
......
...@@ -74,7 +74,7 @@ class CreatePyReaderOpMaker : public FileReaderMakerBase { ...@@ -74,7 +74,7 @@ class CreatePyReaderOpMaker : public FileReaderMakerBase {
"Name of the `LoDTensorBlockingQueueHolder` variable"); "Name of the `LoDTensorBlockingQueueHolder` variable");
AddComment(R"DOC( AddComment(R"DOC(
Create PyReader to support LoDTensor data feeding in Python side. Create PyReader to support LoDTensor data feeding in Python side.
)DOC"); )DOC");
} }
}; };
......
...@@ -35,10 +35,10 @@ class ROIAlignOp : public framework::OperatorWithKernel { ...@@ -35,10 +35,10 @@ class ROIAlignOp : public framework::OperatorWithKernel {
"The format of input tensor is NCHW."); "The format of input tensor is NCHW.");
PADDLE_ENFORCE(rois_dims.size() == 2, PADDLE_ENFORCE(rois_dims.size() == 2,
"ROIs should be a 2-D LoDTensor of shape (num_rois, 4)" "ROIs should be a 2-D LoDTensor of shape (num_rois, 4)"
"given as [[x1, y1, x2, y2], ]."); "given as [[x1, y1, x2, y2], ...].");
PADDLE_ENFORCE(rois_dims[1] == 4, PADDLE_ENFORCE(rois_dims[1] == 4,
"ROIs should be a 2-D LoDTensor of shape (num_rois, 4)" "ROIs should be a 2-D LoDTensor of shape (num_rois, 4)"
"given as [[x1, y1, x2, y2], ]."); "given as [[x1, y1, x2, y2], ...].");
int pooled_height = ctx->Attrs().Get<int>("pooled_height"); int pooled_height = ctx->Attrs().Get<int>("pooled_height");
int pooled_width = ctx->Attrs().Get<int>("pooled_width"); int pooled_width = ctx->Attrs().Get<int>("pooled_width");
float spatial_scale = ctx->Attrs().Get<float>("spatial_scale"); float spatial_scale = ctx->Attrs().Get<float>("spatial_scale");
...@@ -103,7 +103,7 @@ class ROIAlignOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -103,7 +103,7 @@ class ROIAlignOpMaker : public framework::OpProtoAndCheckerMaker {
"(LoDTensor), " "(LoDTensor), "
"ROIs (Regions of Interest) to pool over. " "ROIs (Regions of Interest) to pool over. "
"should be a 2-D LoDTensor of shape (num_rois, 4)" "should be a 2-D LoDTensor of shape (num_rois, 4)"
"given as [[x1, y1, x2, y2], ]. " "given as [[x1, y1, x2, y2], ...]. "
"(x1, y1) is the top left coordinates, and " "(x1, y1) is the top left coordinates, and "
"(x2, y2) is the bottom right coordinates."); "(x2, y2) is the bottom right coordinates.");
AddOutput("Out", AddOutput("Out",
......
...@@ -40,10 +40,10 @@ class ROIPoolOp : public framework::OperatorWithKernel { ...@@ -40,10 +40,10 @@ class ROIPoolOp : public framework::OperatorWithKernel {
"The format of input tensor is NCHW."); "The format of input tensor is NCHW.");
PADDLE_ENFORCE(rois_dims.size() == 2, PADDLE_ENFORCE(rois_dims.size() == 2,
"ROIs should be a 2-D LoDTensor of shape (num_rois, 4)" "ROIs should be a 2-D LoDTensor of shape (num_rois, 4)"
"given as [[x1, y1, x2, y2], ]."); "given as [[x1, y1, x2, y2], ...].");
PADDLE_ENFORCE(rois_dims[1] == kROISize, PADDLE_ENFORCE(rois_dims[1] == kROISize,
"ROIs should be a 2-D LoDTensor of shape (num_rois, 4)" "ROIs should be a 2-D LoDTensor of shape (num_rois, 4)"
"given as [[x1, y1, x2, y2], ]."); "given as [[x1, y1, x2, y2], ...].");
int pooled_height = ctx->Attrs().Get<int>("pooled_height"); int pooled_height = ctx->Attrs().Get<int>("pooled_height");
int pooled_width = ctx->Attrs().Get<int>("pooled_width"); int pooled_width = ctx->Attrs().Get<int>("pooled_width");
...@@ -110,7 +110,7 @@ class ROIPoolOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -110,7 +110,7 @@ class ROIPoolOpMaker : public framework::OpProtoAndCheckerMaker {
"(LoDTensor), " "(LoDTensor), "
"ROIs (Regions of Interest) to pool over. " "ROIs (Regions of Interest) to pool over. "
"should be a 2-D LoDTensor of shape (num_rois, 4)" "should be a 2-D LoDTensor of shape (num_rois, 4)"
"given as [[x1, y1, x2, y2], ]. " "given as [[x1, y1, x2, y2], ...]. "
"Where batch_id is the id of the data, " "Where batch_id is the id of the data, "
"(x1, y1) is the top left coordinates, and " "(x1, y1) is the top left coordinates, and "
"(x2, y2) is the bottom right coordinates."); "(x2, y2) is the bottom right coordinates.");
......
...@@ -86,7 +86,7 @@ class SpaceToDepthOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -86,7 +86,7 @@ class SpaceToDepthOpMaker : public framework::OpProtoAndCheckerMaker {
.GreaterThan(1); .GreaterThan(1);
AddComment(R"DOC( AddComment(R"DOC(
reorg operator used in Yolo v2. reorg operator used in Yolo v2.
The equation is: C2 = C1/blocksize * blocksize, W2 = W1 * blocksize + offset % blocksize, H2 = H1 * blocksize + offset / blocksize, The equation is: C2 = C1/blocksize * blocksize, W2 = W1 * blocksize + offset % blocksize, H2 = H1 * blocksize + offset / blocksize,
Reshape Input(X) into the shape according to Attr(blocksize). The Reshape Input(X) into the shape according to Attr(blocksize). The
data in Input(X) are unchanged. data in Input(X) are unchanged.
......
if (NOT WIN32)
proto_library(profiler_proto SRCS profiler.proto DEPS framework_proto) proto_library(profiler_proto SRCS profiler.proto DEPS framework_proto)
py_proto_compile(profiler_py_proto SRCS profiler.proto) py_proto_compile(profiler_py_proto SRCS profiler.proto)
...@@ -6,11 +5,19 @@ add_custom_target(profiler_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch _ ...@@ -6,11 +5,19 @@ add_custom_target(profiler_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch _
add_dependencies(profiler_py_proto profiler_py_proto_init) add_dependencies(profiler_py_proto profiler_py_proto_init)
if (NOT WIN32)
add_custom_command(TARGET profiler_py_proto POST_BUILD add_custom_command(TARGET profiler_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler
COMMAND cp *.py ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler COMMAND cp *.py ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler
COMMENT "Copy generated python proto into directory paddle/fluid/proto/profiler." COMMENT "Copy generated python proto into directory paddle/fluid/proto/profiler."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
else(NOT WIN32)
string(REPLACE "/" "\\" proto_dstpath "${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler/")
add_custom_command(TARGET profiler_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler
COMMAND copy /Y *.py ${proto_dstpath}
COMMENT "Copy generated python proto into directory paddle/fluid/proto/profiler."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif(NOT WIN32) endif(NOT WIN32)
if(WITH_GPU) if(WITH_GPU)
...@@ -60,12 +67,9 @@ cc_test(init_test SRCS init_test.cc DEPS device_context) ...@@ -60,12 +67,9 @@ cc_test(init_test SRCS init_test.cc DEPS device_context)
nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda) nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda)
nv_test(transform_test SRCS transform_test.cu DEPS memory place device_context) nv_test(transform_test SRCS transform_test.cu DEPS memory place device_context)
if (NOT WIN32)
cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto ${GPU_CTX_DEPS}) cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto ${GPU_CTX_DEPS})
cc_library(profiler SRCS profiler.cc DEPS device_context device_tracer) cc_library(profiler SRCS profiler.cc DEPS device_context device_tracer)
cc_test(profiler_test SRCS profiler_test.cc DEPS profiler) cc_test(profiler_test SRCS profiler_test.cc DEPS profiler)
endif(NOT WIN32)
nv_test(float16_gpu_test SRCS float16_test.cu DEPS lod_tensor) nv_test(float16_gpu_test SRCS float16_test.cu DEPS lod_tensor)
cc_test(float16_test SRCS float16_test.cc DEPS lod_tensor) cc_test(float16_test SRCS float16_test.cc DEPS lod_tensor)
......
...@@ -29,6 +29,13 @@ namespace platform { ...@@ -29,6 +29,13 @@ namespace platform {
void SetNumThreads(int num_threads) { void SetNumThreads(int num_threads) {
#ifdef PADDLE_USE_OPENBLAS #ifdef PADDLE_USE_OPENBLAS
// windows has no support for openblas multi-thread
// please refer to: https://github.com/PaddlePaddle/Paddle/issues/7234
#ifdef _WIN32
if (num_threads > 1) {
num_threads = 1;
}
#endif
int real_num_threads = num_threads > 1 ? num_threads : 1; int real_num_threads = num_threads > 1 ? num_threads : 1;
openblas_set_num_threads(real_num_threads); openblas_set_num_threads(real_num_threads);
#elif defined(PADDLE_WITH_MKLML) #elif defined(PADDLE_WITH_MKLML)
......
...@@ -13,17 +13,11 @@ See the License for the specific language governing permissions and ...@@ -13,17 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#if !defined(_WIN32)
#include <sys/time.h>
#else
#include <windows.h>
#endif // !_WIN32
#include <time.h>
#include <chrono> // NOLINT #include <chrono> // NOLINT
#include <string> #include <string>
#include "paddle/fluid/platform/dynload/cupti.h" #include "paddle/fluid/platform/dynload/cupti.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/profiler.pb.h" #include "paddle/fluid/platform/profiler.pb.h"
namespace paddle { namespace paddle {
...@@ -32,15 +26,11 @@ namespace platform { ...@@ -32,15 +26,11 @@ namespace platform {
/////////////////////// ///////////////////////
// WARN: Under Development. Don't depend on it yet. // WARN: Under Development. Don't depend on it yet.
////////////////////// //////////////////////
#if !defined(_WIN32)
inline uint64_t PosixInNsec() { inline uint64_t PosixInNsec() {
struct timeval tv; struct timeval tv;
gettimeofday(&tv, nullptr); gettimeofday(&tv, nullptr);
return 1000 * (static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec); return 1000 * (static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec);
} }
#else
inline uint64_t PosixInNsec() { return static_cast<uint64_t>(0); }
#endif // !_WIN32
// DeviceTracer performs the following tasks: // DeviceTracer performs the following tasks:
// 1. Register cuda callbacks for various events: kernel, memcpy, etc. // 1. Register cuda callbacks for various events: kernel, memcpy, etc.
......
...@@ -13,8 +13,6 @@ See the License for the specific language governing permissions and ...@@ -13,8 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include <glog/logging.h> #include <glog/logging.h>
#include <cudnn.h> #include <cudnn.h>
......
...@@ -18,12 +18,6 @@ limitations under the License. */ ...@@ -18,12 +18,6 @@ limitations under the License. */
#include <cxxabi.h> // for __cxa_demangle #include <cxxabi.h> // for __cxa_demangle
#endif // __GNUC__ #endif // __GNUC__
#if defined(_WIN32)
#define NOMINMAX // msvc max/min macro conflict with std::min/max
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#define GOOGLE_GLOG_DLL_DECL
#endif
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
#include <cublas_v2.h> #include <cublas_v2.h>
#include <cudnn.h> #include <cudnn.h>
...@@ -127,14 +121,14 @@ struct EOFException : public std::exception { ...@@ -127,14 +121,14 @@ struct EOFException : public std::exception {
#define UNLIKELY(condition) __builtin_expect(static_cast<bool>(condition), 0) #define UNLIKELY(condition) __builtin_expect(static_cast<bool>(condition), 0)
#else #else
// there is no equivalent intrinsics in msvc. // there is no equivalent intrinsics in msvc.
#define UNLIKELY(condition) (condition == 0) #define UNLIKELY(condition) (condition)
#endif #endif
#if !defined(_WIN32) #if !defined(_WIN32)
#define LIKELY(condition) __builtin_expect(static_cast<bool>(condition), 1) #define LIKELY(condition) __builtin_expect(static_cast<bool>(condition), 1)
#else #else
// there is no equivalent intrinsics in msvc. // there is no equivalent intrinsics in msvc.
#define LIKELY(condition) (condition != 0) #define LIKELY(condition) (condition)
#endif #endif
template <typename... Args> template <typename... Args>
...@@ -248,7 +242,6 @@ inline void throw_on_error(T e) { ...@@ -248,7 +242,6 @@ inline void throw_on_error(T e) {
throw_on_error(e, ""); throw_on_error(e, "");
} }
#if !defined(_WIN32)
#define PADDLE_THROW(...) \ #define PADDLE_THROW(...) \
do { \ do { \
throw ::paddle::platform::EnforceNotMet( \ throw ::paddle::platform::EnforceNotMet( \
...@@ -272,17 +265,6 @@ inline void throw_on_error(T e) { ...@@ -272,17 +265,6 @@ inline void throw_on_error(T e) {
#define PADDLE_ENFORCE(...) ::paddle::platform::throw_on_error(__VA_ARGS__); #define PADDLE_ENFORCE(...) ::paddle::platform::throw_on_error(__VA_ARGS__);
#endif // REPLACE_ENFORCE_GLOG #endif // REPLACE_ENFORCE_GLOG
#else // !_WIN32
// disable enforce, caused by the varardic macro exception error
#define PADDLE_THROW(x) \
do { \
throw std::make_exception_ptr( \
std::runtime_error("Windows disable the enforce.")); \
} while (false)
#define PADDLE_ENFORCE(x, ...) x
#endif // !_WIN32
#define PADDLE_THROW_EOF() \ #define PADDLE_THROW_EOF() \
do { \ do { \
throw ::paddle::platform::EOFException("There is no next data.", __FILE__, \ throw ::paddle::platform::EOFException("There is no next data.", __FILE__, \
...@@ -302,20 +284,6 @@ inline void throw_on_error(T e) { ...@@ -302,20 +284,6 @@ inline void throw_on_error(T e) {
* extra messages is also supported, for example: * extra messages is also supported, for example:
* PADDLE_ENFORCE(a, b, "some simple enforce failed between %d numbers", 2) * PADDLE_ENFORCE(a, b, "some simple enforce failed between %d numbers", 2)
*/ */
#if !defined(_WIN32)
#define PADDLE_ENFORCE_EQ(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, ==, !=, __VA_ARGS__)
#define PADDLE_ENFORCE_NE(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, !=, ==, __VA_ARGS__)
#define PADDLE_ENFORCE_GT(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, >, <=, __VA_ARGS__)
#define PADDLE_ENFORCE_GE(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, >=, <, __VA_ARGS__)
#define PADDLE_ENFORCE_LT(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <, >=, __VA_ARGS__)
#define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__)
#define PADDLE_ENFORCE_NOT_NULL(__VAL, ...) \ #define PADDLE_ENFORCE_NOT_NULL(__VAL, ...) \
do { \ do { \
if (UNLIKELY(nullptr == (__VAL))) { \ if (UNLIKELY(nullptr == (__VAL))) { \
...@@ -335,27 +303,19 @@ inline void throw_on_error(T e) { ...@@ -335,27 +303,19 @@ inline void throw_on_error(T e) {
paddle::string::Sprintf("" __VA_ARGS__)); \ paddle::string::Sprintf("" __VA_ARGS__)); \
} \ } \
} while (0) } while (0)
#else
#define PADDLE_ENFORCE_EQ(__VAL0, __VAL1, ...) ((__VAL0) == (__VAL1)) #define PADDLE_ENFORCE_EQ(__VAL0, __VAL1, ...) \
#define PADDLE_ENFORCE_NE(__VAL0, __VAL1, ...) ((__VAL0) != (__VAL1)) __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, ==, !=, __VA_ARGS__)
#define PADDLE_ENFORCE_GT(__VAL0, __VAL1, ...) ((__VAL0) > (__VAL1)) #define PADDLE_ENFORCE_NE(__VAL0, __VAL1, ...) \
#define PADDLE_ENFORCE_GE(__VAL0, __VAL1, ...) ((__VAL0) >= (__VAL1)) __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, !=, ==, __VA_ARGS__)
#define PADDLE_ENFORCE_LT(__VAL0, __VAL1, ...) ((__VAL0) < (__VAL1)) #define PADDLE_ENFORCE_GT(__VAL0, __VAL1, ...) \
#define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) ((__VAL0) <= (__VAL1)) __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, >, <=, __VA_ARGS__)
#define PADDLE_ENFORCE_GE(__VAL0, __VAL1, ...) \
#define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \ __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, >=, <, __VA_ARGS__)
do { \ #define PADDLE_ENFORCE_LT(__VAL0, __VAL1, ...) \
if (!((__VAL0)__CMP(__VAL1))) { \ __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <, >=, __VA_ARGS__)
PADDLE_THROW("Windows disable the enforce. Enforce failed."); \ #define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \
} \ __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__)
} while (0)
#define PADDLE_ENFORCE_NOT_NULL(__VAL1, ...) \
do { \
if (nullptr == (__VAL1)) { \
PADDLE_THROW("Windows disable the enforce. Enforce failed"); \
} \
} while (0)
#endif // !_WIN32
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -117,13 +117,6 @@ void InitDevices(bool init_p2p, const std::vector<int> devices) { ...@@ -117,13 +117,6 @@ void InitDevices(bool init_p2p, const std::vector<int> devices) {
places.emplace_back(platform::CPUPlace()); places.emplace_back(platform::CPUPlace());
platform::DeviceContextPool::Init(places); platform::DeviceContextPool::Init(places);
// windows has no support for openblas multi-thread
#ifdef _WIN32
if (FLAGS_paddle_num_threads > 1) {
FLAGS_paddle_num_threads = 1;
}
#endif
#ifndef PADDLE_WITH_MKLDNN #ifndef PADDLE_WITH_MKLDNN
platform::SetNumThreads(FLAGS_paddle_num_threads); platform::SetNumThreads(FLAGS_paddle_num_threads);
#endif #endif
......
...@@ -16,9 +16,6 @@ limitations under the License. */ ...@@ -16,9 +16,6 @@ limitations under the License. */
#include <string> #include <string>
#include <vector> #include <vector>
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "glog/logging.h" #include "glog/logging.h"
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <cstdio> #include <cstdio>
#include <stdexcept> #include <stdexcept>
#include <time.h>
#include <memory> #include <memory>
#include <string> #include <string>
...@@ -27,8 +28,13 @@ ...@@ -27,8 +28,13 @@
#include <dlfcn.h> // dladdr #include <dlfcn.h> // dladdr
#include <execinfo.h> // backtrace #include <execinfo.h> // backtrace
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/time.h>
#include <algorithm> // std::accumulate #include <algorithm> // std::accumulate
#else #else
#define NOMINMAX // msvc max/min macro conflict with std::min/max
// solve static linking error in windows
// https://github.com/google/glog/issues/301
#define GOOGLE_GLOG_DLL_DECL
#include <io.h> // _popen, _pclose #include <io.h> // _popen, _pclose
#include <stdio.h> #include <stdio.h>
#include <windows.h> #include <windows.h>
...@@ -57,6 +63,25 @@ static void *dlopen(const char *filename, int flag) { ...@@ -57,6 +63,25 @@ static void *dlopen(const char *filename, int flag) {
return reinterpret_cast<void *>(hModule); return reinterpret_cast<void *>(hModule);
} }
static int gettimeofday(struct timeval *tp, void *tzp) {
time_t clock;
struct tm tm;
SYSTEMTIME wtm;
GetLocalTime(&wtm);
tm.tm_year = wtm.wYear - 1900;
tm.tm_mon = wtm.wMonth - 1;
tm.tm_mday = wtm.wDay;
tm.tm_hour = wtm.wHour;
tm.tm_min = wtm.wMinute;
tm.tm_sec = wtm.wSecond;
tm.tm_isdst = -1;
clock = mktime(&tm);
tp->tv_sec = clock;
tp->tv_usec = wtm.wMilliseconds * 1000;
return (0);
}
#endif // !_WIN32 #endif // !_WIN32
static void ExecShellCommand(const std::string &cmd, std::string *message) { static void ExecShellCommand(const std::string &cmd, std::string *message) {
...@@ -132,10 +157,12 @@ static void MkDir(const char *path) { ...@@ -132,10 +157,12 @@ static void MkDir(const char *path) {
} }
} }
#else #else
CreateDirectory(path, NULL); BOOL return_value = CreateDirectory(path, NULL);
auto errorno = GetLastError(); if (!return_value) {
if (errorno != ERROR_ALREADY_EXISTS) { auto errorno = GetLastError();
throw std::runtime_error(path_error); if (errorno != ERROR_ALREADY_EXISTS) {
throw std::runtime_error(path_error);
}
} }
#endif // !_WIN32 #endif // !_WIN32
} }
......
...@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and ...@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/port.h"
#include <sys/time.h>
#include <algorithm> #include <algorithm>
#include <iomanip> #include <iomanip>
#include <limits> #include <limits>
......
...@@ -69,7 +69,6 @@ void PushEvent(const std::string& name, const DeviceContext* dev_ctx); ...@@ -69,7 +69,6 @@ void PushEvent(const std::string& name, const DeviceContext* dev_ctx);
void PopEvent(const std::string& name, const DeviceContext* dev_ctx); void PopEvent(const std::string& name, const DeviceContext* dev_ctx);
#if !defined(_WIN32)
struct RecordEvent { struct RecordEvent {
// dev_ctx can be set to nullptr if device is cpu. // dev_ctx can be set to nullptr if device is cpu.
RecordEvent(const std::string& name, const DeviceContext* dev_ctx); RecordEvent(const std::string& name, const DeviceContext* dev_ctx);
...@@ -106,15 +105,6 @@ struct RecordBlock { ...@@ -106,15 +105,6 @@ struct RecordBlock {
std::string name_; std::string name_;
uint64_t start_ns_; uint64_t start_ns_;
}; };
#else
// windows do not support profiler temporarily.
struct RecordEvent {
RecordEvent(const std::string& name, const DeviceContext* dev_ctx) {}
};
struct RecordBlock {
explicit RecordBlock(int block_id) {}
};
#endif
// Return the event list of all threads. Assumed the returned value calls // Return the event list of all threads. Assumed the returned value calls
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread. // event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
......
...@@ -45,16 +45,15 @@ class StreamCallbackManager { ...@@ -45,16 +45,15 @@ class StreamCallbackManager {
inline void AddCallback(Callback &&callback) const { inline void AddCallback(Callback &&callback) const {
auto *stream_callback_context = auto *stream_callback_context =
new StreamCallbackContext(this, std::forward<Callback>(callback)); new StreamCallbackContext(this, std::forward<Callback>(callback));
PADDLE_ENFORCE(
#if CUDA_VERSION >= 10000 #if CUDA_VERSION >= 10000
cudaLaunchHostFunc(stream_, StreamCallbackManager::StreamCallbackFunc, PADDLE_ENFORCE(cudaLaunchHostFunc(stream_,
stream_callback_context) StreamCallbackManager::StreamCallbackFunc,
stream_callback_context)); // NOLINT
#else #else
cudaStreamAddCallback(stream_, PADDLE_ENFORCE(cudaStreamAddCallback(
StreamCallbackManager::StreamCallbackFunc, stream_, StreamCallbackManager::StreamCallbackFunc,
stream_callback_context, 0) stream_callback_context, 0)); // NOLINT
#endif #endif
); // NOLINT
} }
void Wait() const { thread_pool_.reset(new ThreadPool(1)); } void Wait() const { thread_pool_.reset(new ThreadPool(1)); }
......
set(PYBIND_DEPS pybind python proto_desc memory executor prune feed_fetch_method pass_builder) set(PYBIND_DEPS pybind python proto_desc memory executor prune feed_fetch_method pass_builder parallel_executor profiler)
set(PYBIND_SRCS pybind.cc exception.cc protobuf.cc const_value.cc) set(PYBIND_SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc)
if(NOT WIN32)
list(APPEND PYBIND_DEPS parallel_executor profiler)
list(APPEND PYBIND_SRCS recordio.cc)
endif(NOT WIN32)
if(WITH_PYTHON) if(WITH_PYTHON)
if(WITH_AMD_GPU) if(WITH_AMD_GPU)
hip_library(paddle_pybind SHARED hip_library(paddle_pybind SHARED
......
...@@ -21,13 +21,6 @@ limitations under the License. */ ...@@ -21,13 +21,6 @@ limitations under the License. */
#include <utility> #include <utility>
#include <vector> #include <vector>
#if defined(_WIN32)
#define NOMINMAX
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#define GOOGLE_GLOG_DLL_DECL
#include <Windows.h>
#endif
#include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/framework.pb.h"
...@@ -36,9 +29,7 @@ limitations under the License. */ ...@@ -36,9 +29,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#ifndef _WIN32
#include "paddle/fluid/framework/parallel_executor.h" #include "paddle/fluid/framework/parallel_executor.h"
#endif
#include "paddle/fluid/framework/prune.h" #include "paddle/fluid/framework/prune.h"
#include "paddle/fluid/framework/reader.h" #include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/selected_rows.h"
...@@ -359,22 +350,16 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -359,22 +350,16 @@ All parameter, weight, gradient are variables in Paddle.
return self.GetMutable<platform::Communicator>(); return self.GetMutable<platform::Communicator>();
}, },
py::return_value_policy::reference) py::return_value_policy::reference)
#endif #endif
#ifndef _WIN32
.def("get_reader", .def("get_reader",
[](Variable &self) -> framework::ReaderHolder * { [](Variable &self) -> framework::ReaderHolder * {
PADDLE_ENFORCE(self.IsType<framework::ReaderHolder>()); PADDLE_ENFORCE(self.IsType<framework::ReaderHolder>());
return self.GetMutable<framework::ReaderHolder>(); return self.GetMutable<framework::ReaderHolder>();
}, },
py::return_value_policy::reference) py::return_value_policy::reference);
#endif
; // NOLINT
#if !defined(_WIN32)
py::class_<framework::ReaderHolder>(m, "Reader", "") py::class_<framework::ReaderHolder>(m, "Reader", "")
.def("reset", &framework::ReaderHolder::ResetAll); .def("reset", &framework::ReaderHolder::ResetAll);
#endif
using LoDTensorBlockingQueue = using LoDTensorBlockingQueue =
::paddle::operators::reader::LoDTensorBlockingQueue; ::paddle::operators::reader::LoDTensorBlockingQueue;
...@@ -643,7 +628,6 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -643,7 +628,6 @@ All parameter, weight, gradient are variables in Paddle.
#endif #endif
#endif #endif
#ifndef _WIN32
py::enum_<platform::ProfilerState>(m, "ProfilerState", py::arithmetic()) py::enum_<platform::ProfilerState>(m, "ProfilerState", py::arithmetic())
.value("kDisabled", platform::ProfilerState::kDisabled) .value("kDisabled", platform::ProfilerState::kDisabled)
.value("kCPU", platform::ProfilerState::kCPU) .value("kCPU", platform::ProfilerState::kCPU)
...@@ -664,7 +648,6 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -664,7 +648,6 @@ All parameter, weight, gradient are variables in Paddle.
m.def("disable_profiler", platform::DisableProfiler); m.def("disable_profiler", platform::DisableProfiler);
m.def("is_profiler_enabled", platform::IsProfileEnabled); m.def("is_profiler_enabled", platform::IsProfileEnabled);
m.def("reset_profiler", platform::ResetProfiler); m.def("reset_profiler", platform::ResetProfiler);
#endif
py::class_<ir::Pass, std::shared_ptr<ir::Pass>> pass(m, "Pass"); py::class_<ir::Pass, std::shared_ptr<ir::Pass>> pass(m, "Pass");
pass.def(py::init()) pass.def(py::init())
...@@ -693,7 +676,6 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -693,7 +676,6 @@ All parameter, weight, gradient are variables in Paddle.
.def("remove_pass", .def("remove_pass",
[](ir::PassBuilder &self, size_t idx) { self.RemovePass(idx); }); [](ir::PassBuilder &self, size_t idx) { self.RemovePass(idx); });
#ifndef _WIN32
// -- python binds for parallel executor. // -- python binds for parallel executor.
py::class_<ParallelExecutor> pe(m, "ParallelExecutor"); py::class_<ParallelExecutor> pe(m, "ParallelExecutor");
py::class_<ExecutionStrategy> exec_strategy(pe, "ExecutionStrategy", R"DOC( py::class_<ExecutionStrategy> exec_strategy(pe, "ExecutionStrategy", R"DOC(
...@@ -921,7 +903,6 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -921,7 +903,6 @@ All parameter, weight, gradient are variables in Paddle.
}); });
BindRecordIOWriter(&m); BindRecordIOWriter(&m);
#endif
return m.ptr(); return m.ptr();
} }
} // namespace pybind } // namespace pybind
......
...@@ -115,9 +115,8 @@ def __bootstrap__(): ...@@ -115,9 +115,8 @@ def __bootstrap__():
'use_pinned_memory', 'check_nan_inf', 'benchmark', 'eager_delete_scope', 'use_pinned_memory', 'check_nan_inf', 'benchmark', 'eager_delete_scope',
'use_mkldnn', 'use_ngraph', 'initial_cpu_memory_in_mb', 'use_mkldnn', 'use_ngraph', 'initial_cpu_memory_in_mb',
'init_allocated_mem', 'free_idle_memory', 'paddle_num_threads', 'init_allocated_mem', 'free_idle_memory', 'paddle_num_threads',
"dist_threadpool_size", 'cpu_deterministic', 'eager_delete_tensor_gb', "dist_threadpool_size", 'eager_delete_tensor_gb', 'allocator_strategy',
'allocator_strategy', 'reader_queue_speed_test_mode', 'reader_queue_speed_test_mode', 'print_sub_graph_dir'
'print_sub_graph_dir'
] ]
if os.name != 'nt': if os.name != 'nt':
read_env_flags.append('warpctc_dir') read_env_flags.append('warpctc_dir')
......
...@@ -15,15 +15,13 @@ ...@@ -15,15 +15,13 @@
from __future__ import print_function from __future__ import print_function
import contextlib import contextlib
import os
from .. import core from .. import core
from .. import executor from .. import executor
from .. import framework from .. import framework
from .. import io from .. import io
if os.name != 'nt': from .. import parallel_executor
from .. import parallel_executor
from .. import unique_name from .. import unique_name
from .trainer import check_and_get_place from .trainer import check_and_get_place
......
...@@ -28,8 +28,7 @@ from .. import framework ...@@ -28,8 +28,7 @@ from .. import framework
from .. import io from .. import io
# optimizer is same as the parameter of Trainer.__init__. Rename it to opt_module # optimizer is same as the parameter of Trainer.__init__. Rename it to opt_module
from .. import optimizer as opt_module from .. import optimizer as opt_module
if os.name != 'nt': from .. import parallel_executor
from .. import parallel_executor
from ..transpiler import distribute_transpiler from ..transpiler import distribute_transpiler
__all__ = [ __all__ = [
......
...@@ -347,72 +347,70 @@ def _copy_reader_create_op_(block, op): ...@@ -347,72 +347,70 @@ def _copy_reader_create_op_(block, op):
return new_op return new_op
if os.name != 'nt': @templatedoc(op_type='create_recordio_file_reader')
def open_recordio_file(filename,
@templatedoc(op_type='create_recordio_file_reader') shapes,
def open_recordio_file(filename, lod_levels,
shapes, dtypes,
lod_levels, pass_num=1,
dtypes, for_parallel=True):
pass_num=1, """
for_parallel=True): ${comment}
"""
${comment}
Args:
filename(${filename_type}): ${filename_comment}.
shapes(list): List of tuples which declaring data shapes.
lod_levels(${lod_levels_type}): ${lod_levels_comment}.
dtypes(list): List of strs which declaring data type.
pass_num(int): Number of passes to run.
for_parallel(Bool): Set it as True if you are going to run
subsequent operators in parallel.
Returns:
${out_comment}.
Examples:
>>> import paddle.fluid as fluid
>>> reader = fluid.layers.io.open_recordio_file(
>>> filename='./data.recordio',
>>> shapes=[(3,224,224), (1)],
>>> lod_levels=[0, 0],
>>> dtypes=['float32', 'int64'])
>>> # Via the reader, we can use 'read_file' layer to get data:
>>> image, label = fluid.layers.io.read_file(reader)
"""
dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes]
shape_concat = []
ranks = []
for shape in shapes: Args:
shape_concat.extend(shape) filename(${filename_type}): ${filename_comment}.
ranks.append(len(shape)) shapes(list): List of tuples which declaring data shapes.
lod_levels(${lod_levels_type}): ${lod_levels_comment}.
dtypes(list): List of strs which declaring data type.
pass_num(int): Number of passes to run.
for_parallel(Bool): Set it as True if you are going to run
subsequent operators in parallel.
var_name = unique_name('open_recordio_file') Returns:
${out_comment}.
startup_blk = default_startup_program().current_block() Examples:
startup_var = startup_blk.create_var(name=var_name)
startup_blk.append_op(
type='create_recordio_file_reader',
outputs={'Out': [startup_var]},
attrs={
'shape_concat': shape_concat,
'lod_levels': lod_levels,
'filename': filename,
'ranks': ranks
})
startup_var.desc.set_dtypes(dtypes) >>> import paddle.fluid as fluid
startup_var.persistable = True >>> reader = fluid.layers.io.open_recordio_file(
main_prog_var = _copy_reader_var_( >>> filename='./data.recordio',
default_main_program().current_block(), startup_var) >>> shapes=[(3,224,224), (1)],
>>> lod_levels=[0, 0],
>>> dtypes=['float32', 'int64'])
>>> # Via the reader, we can use 'read_file' layer to get data:
>>> image, label = fluid.layers.io.read_file(reader)
"""
dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes]
shape_concat = []
ranks = []
if pass_num > 1: for shape in shapes:
main_prog_var = multi_pass(reader=main_prog_var, pass_num=pass_num) shape_concat.extend(shape)
ranks.append(len(shape))
var_name = unique_name('open_recordio_file')
startup_blk = default_startup_program().current_block()
startup_var = startup_blk.create_var(name=var_name)
startup_blk.append_op(
type='create_recordio_file_reader',
outputs={'Out': [startup_var]},
attrs={
'shape_concat': shape_concat,
'lod_levels': lod_levels,
'filename': filename,
'ranks': ranks
})
return monkey_patch_reader_methods(main_prog_var) startup_var.desc.set_dtypes(dtypes)
startup_var.persistable = True
main_prog_var = _copy_reader_var_(default_main_program().current_block(),
startup_var)
if pass_num > 1:
main_prog_var = multi_pass(reader=main_prog_var, pass_num=pass_num)
return monkey_patch_reader_methods(main_prog_var)
def random_data_generator(low, high, shapes, lod_levels, for_parallel=True): def random_data_generator(low, high, shapes, lod_levels, for_parallel=True):
......
...@@ -343,128 +343,126 @@ def embedding(input, ...@@ -343,128 +343,126 @@ def embedding(input,
return tmp return tmp
if os.name != 'nt': @templatedoc(op_type="lstm")
def dynamic_lstm(input,
size,
h_0=None,
c_0=None,
param_attr=None,
bias_attr=None,
use_peepholes=True,
is_reverse=False,
gate_activation='sigmoid',
cell_activation='tanh',
candidate_activation='tanh',
dtype='float32',
name=None):
"""
${comment}
@templatedoc(op_type="lstm") Args:
def dynamic_lstm(input, input (Variable): ${input_comment}
size, size (int): 4 * hidden size.
h_0=None, h_0(Variable): The initial hidden state is an optional input, default is zero.
c_0=None, This is a tensor with shape (N x D), where N is the
param_attr=None, batch size and D is the hidden size.
bias_attr=None, c_0(Variable): The initial cell state is an optional input, default is zero.
use_peepholes=True, This is a tensor with shape (N x D), where N is the
is_reverse=False, batch size. `h_0` and `c_0` can be NULL but only at the same time.
gate_activation='sigmoid', param_attr(ParamAttr|None): The parameter attribute for the learnable
cell_activation='tanh', hidden-hidden weights.
candidate_activation='tanh',
dtype='float32',
name=None):
"""
${comment}
Args:
input (Variable): ${input_comment}
size (int): 4 * hidden size.
h_0(Variable): The initial hidden state is an optional input, default is zero.
This is a tensor with shape (N x D), where N is the
batch size and D is the hidden size.
c_0(Variable): The initial cell state is an optional input, default is zero.
This is a tensor with shape (N x D), where N is the
batch size. `h_0` and `c_0` can be NULL but only at the same time.
param_attr(ParamAttr|None): The parameter attribute for the learnable
hidden-hidden weights.
- Weights = {:math:`W_{ch}, W_{ih}, \
W_{fh}, W_{oh}`}
- The shape is (D x 4D), where D is the hidden
size.
If it is set to None or one attribute of ParamAttr,
dynamic_lstm will create ParamAttr as param_attr.
If the Initializer of the param_attr is not set, the
parameter is initialized with Xavier. Default: None.
bias_attr (ParamAttr|None): The bias attribute for the learnable bias
weights, which contains two parts, input-hidden
bias weights and peephole connections weights if
setting `use_peepholes` to `True`.
1. `use_peepholes = False`
- Biases = {:math:`b_c, b_i, b_f, b_o`}.
- The shape is (1 x 4D).
2. `use_peepholes = True`
- Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \
W_{fc}, W_{oc}`}.
- The shape is (1 x 7D).
If it is set to None or one attribute of ParamAttr,
dynamic_lstm will create ParamAttr as bias_attr.
If the Initializer of the bias_attr is not set,
the bias is initialized zero. Default: None.
use_peepholes (bool): ${use_peepholes_comment}
is_reverse (bool): ${is_reverse_comment}
gate_activation (str): ${gate_activation_comment}
cell_activation (str): ${cell_activation_comment}
candidate_activation (str): ${candidate_activation_comment}
dtype (str): Data type. Choices = ["float32", "float64"], default "float32".
name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
tuple: The hidden state, and cell state of LSTM. The shape of both \
is (T x D), and lod is the same with the `input`.
Examples:
.. code-block:: python
hidden_dim = 512
forward_proj = fluid.layers.fc(input=input_seq, size=hidden_dim * 4,
bias_attr=False)
forward, _ = fluid.layers.dynamic_lstm(
input=forward_proj, size=hidden_dim * 4, use_peepholes=False)
"""
assert bias_attr is not False, "bias_attr should not be False in dynamic_lstmp."
helper = LayerHelper('lstm', **locals())
size = size // 4
weight = helper.create_parameter(
attr=helper.param_attr, shape=[size, 4 * size], dtype=dtype)
bias_size = [1, 7 * size]
if not use_peepholes:
bias_size[1] = 4 * size
bias = helper.create_parameter(
attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True)
hidden = helper.create_variable_for_type_inference(dtype) - Weights = {:math:`W_{ch}, W_{ih}, \
cell = helper.create_variable_for_type_inference(dtype) W_{fh}, W_{oh}`}
batch_gate = helper.create_variable_for_type_inference(dtype) - The shape is (D x 4D), where D is the hidden
batch_cell_pre_act = helper.create_variable_for_type_inference(dtype) size.
inputs = {'Input': input, 'Weight': weight, 'Bias': bias}
batch_size = input.shape[0]
if h_0:
assert h_0.shape == (batch_size, size), \
'The shape of h0 should be (batch_size, %d)' % size
inputs['H0'] = h_0
if c_0:
assert c_0.shape == (batch_size, size), \
'The shape of c0 should be (batch_size, %d)' % size
inputs['C0'] = c_0
helper.append_op( If it is set to None or one attribute of ParamAttr,
type='lstm', dynamic_lstm will create ParamAttr as param_attr.
inputs=inputs, If the Initializer of the param_attr is not set, the
outputs={ parameter is initialized with Xavier. Default: None.
'Hidden': hidden, bias_attr (ParamAttr|None): The bias attribute for the learnable bias
'Cell': cell, weights, which contains two parts, input-hidden
'BatchGate': batch_gate, bias weights and peephole connections weights if
'BatchCellPreAct': batch_cell_pre_act setting `use_peepholes` to `True`.
},
attrs={ 1. `use_peepholes = False`
'use_peepholes': use_peepholes, - Biases = {:math:`b_c, b_i, b_f, b_o`}.
'is_reverse': is_reverse, - The shape is (1 x 4D).
'gate_activation': gate_activation, 2. `use_peepholes = True`
'cell_activation': cell_activation, - Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \
'candidate_activation': candidate_activation W_{fc}, W_{oc}`}.
}) - The shape is (1 x 7D).
return hidden, cell
If it is set to None or one attribute of ParamAttr,
dynamic_lstm will create ParamAttr as bias_attr.
If the Initializer of the bias_attr is not set,
the bias is initialized zero. Default: None.
use_peepholes (bool): ${use_peepholes_comment}
is_reverse (bool): ${is_reverse_comment}
gate_activation (str): ${gate_activation_comment}
cell_activation (str): ${cell_activation_comment}
candidate_activation (str): ${candidate_activation_comment}
dtype (str): Data type. Choices = ["float32", "float64"], default "float32".
name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
tuple: The hidden state, and cell state of LSTM. The shape of both \
is (T x D), and lod is the same with the `input`.
Examples:
.. code-block:: python
hidden_dim = 512
forward_proj = fluid.layers.fc(input=input_seq, size=hidden_dim * 4,
bias_attr=False)
forward, _ = fluid.layers.dynamic_lstm(
input=forward_proj, size=hidden_dim * 4, use_peepholes=False)
"""
assert bias_attr is not False, "bias_attr should not be False in dynamic_lstmp."
helper = LayerHelper('lstm', **locals())
size = size // 4
weight = helper.create_parameter(
attr=helper.param_attr, shape=[size, 4 * size], dtype=dtype)
bias_size = [1, 7 * size]
if not use_peepholes:
bias_size[1] = 4 * size
bias = helper.create_parameter(
attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True)
hidden = helper.create_variable_for_type_inference(dtype)
cell = helper.create_variable_for_type_inference(dtype)
batch_gate = helper.create_variable_for_type_inference(dtype)
batch_cell_pre_act = helper.create_variable_for_type_inference(dtype)
inputs = {'Input': input, 'Weight': weight, 'Bias': bias}
batch_size = input.shape[0]
if h_0:
assert h_0.shape == (batch_size, size), \
'The shape of h0 should be (batch_size, %d)' % size
inputs['H0'] = h_0
if c_0:
assert c_0.shape == (batch_size, size), \
'The shape of c0 should be (batch_size, %d)' % size
inputs['C0'] = c_0
helper.append_op(
type='lstm',
inputs=inputs,
outputs={
'Hidden': hidden,
'Cell': cell,
'BatchGate': batch_gate,
'BatchCellPreAct': batch_cell_pre_act
},
attrs={
'use_peepholes': use_peepholes,
'is_reverse': is_reverse,
'gate_activation': gate_activation,
'cell_activation': cell_activation,
'candidate_activation': candidate_activation
})
return hidden, cell
def dynamic_lstmp(input, def dynamic_lstmp(input,
...@@ -963,43 +961,39 @@ def linear_chain_crf(input, label, param_attr=None): ...@@ -963,43 +961,39 @@ def linear_chain_crf(input, label, param_attr=None):
return log_likelihood return log_likelihood
if os.name != 'nt': @templatedoc()
def crf_decoding(input, param_attr, label=None):
@templatedoc() """
def crf_decoding(input, param_attr, label=None): ${comment}
"""
${comment}
Args: Args:
input(${emission_type}): ${emission_comment} input(${emission_type}): ${emission_comment}
param_attr(ParamAttr): The parameter attribute for training. param_attr(ParamAttr): The parameter attribute for training.
label(${label_type}): ${label_comment} label(${label_type}): ${label_comment}
Returns: Returns:
Variable: ${viterbi_path_comment} Variable: ${viterbi_path_comment}
Examples: Examples:
.. code-block:: python .. code-block:: python
crf_decode = layers.crf_decoding( crf_decode = layers.crf_decoding(
input=hidden, param_attr=ParamAttr(name="crfw")) input=hidden, param_attr=ParamAttr(name="crfw"))
""" """
helper = LayerHelper('crf_decoding', **locals()) helper = LayerHelper('crf_decoding', **locals())
transition = helper.get_parameter(param_attr.name) transition = helper.get_parameter(param_attr.name)
viterbi_path = helper.create_variable_for_type_inference( viterbi_path = helper.create_variable_for_type_inference(
dtype=helper.input_dtype()) dtype=helper.input_dtype())
helper.append_op( helper.append_op(
type='crf_decoding', type='crf_decoding',
inputs={ inputs={"Emission": [input],
"Emission": [input],
"Transition": transition, "Transition": transition,
"Label": label "Label": label},
}, outputs={"ViterbiPath": [viterbi_path]})
outputs={"ViterbiPath": [viterbi_path]})
return viterbi_path return viterbi_path
@templatedoc() @templatedoc()
...@@ -5593,48 +5587,42 @@ def label_smooth(label, ...@@ -5593,48 +5587,42 @@ def label_smooth(label,
return smooth_label return smooth_label
if os.name != 'nt': @templatedoc()
def roi_pool(input, rois, pooled_height=1, pooled_width=1, spatial_scale=1.0):
@templatedoc() """
def roi_pool(input, ${comment}
rois,
pooled_height=1, Args:
pooled_width=1, input (Variable): ${x_comment}
spatial_scale=1.0): rois (Variable): ROIs (Regions of Interest) to pool over.
""" pooled_height (integer): ${pooled_height_comment} Default: 1
${comment} pooled_width (integer): ${pooled_width_comment} Default: 1
spatial_scale (float): ${spatial_scale_comment} Default: 1.0
Args:
input (Variable): ${x_comment} Returns:
rois (Variable): ROIs (Regions of Interest) to pool over. Variable: ${out_comment}.
pooled_height (integer): ${pooled_height_comment} Default: 1
pooled_width (integer): ${pooled_width_comment} Default: 1 Examples:
spatial_scale (float): ${spatial_scale_comment} Default: 1.0 .. code-block:: python
Returns: pool_out = fluid.layers.roi_pool(input=x, rois=rois, 7, 7, 1.0)
Variable: ${out_comment}. """
helper = LayerHelper('roi_pool', **locals())
Examples: dtype = helper.input_dtype()
.. code-block:: python pool_out = helper.create_variable_for_type_inference(dtype)
argmaxes = helper.create_variable_for_type_inference(dtype='int32')
pool_out = fluid.layers.roi_pool(input=x, rois=rois, 7, 7, 1.0) helper.append_op(
""" type="roi_pool",
helper = LayerHelper('roi_pool', **locals()) inputs={"X": input,
dtype = helper.input_dtype() "ROIs": rois},
pool_out = helper.create_variable_for_type_inference(dtype) outputs={"Out": pool_out,
argmaxes = helper.create_variable_for_type_inference(dtype='int32') "Argmax": argmaxes},
helper.append_op( attrs={
type="roi_pool", "pooled_height": pooled_height,
inputs={"X": input, "pooled_width": pooled_width,
"ROIs": rois}, "spatial_scale": spatial_scale
outputs={"Out": pool_out, })
"Argmax": argmaxes}, return pool_out
attrs={
"pooled_height": pooled_height,
"pooled_width": pooled_width,
"spatial_scale": spatial_scale
})
return pool_out
@templatedoc() @templatedoc()
......
...@@ -100,26 +100,27 @@ Examples: ...@@ -100,26 +100,27 @@ Examples:
>>> result = fluid.layers.hard_shrink(x=data, threshold=0.3) >>> result = fluid.layers.hard_shrink(x=data, threshold=0.3)
""" """
if os.name != 'nt': __all__ += ['cumsum']
__all__ += ['cumsum']
_cum_sum_ = generate_layer_fn('cumsum')
_cum_sum_ = generate_layer_fn('cumsum')
def cumsum(x, axis=None, exclusive=None, reverse=None): def cumsum(x, axis=None, exclusive=None, reverse=None):
locals_var = locals().keys() locals_var = locals().keys()
kwargs = dict() kwargs = dict()
for name in locals_var: for name in locals_var:
val = locals()[name] val = locals()[name]
if val is not None: if val is not None:
kwargs[name] = val kwargs[name] = val
return _cum_sum_(**kwargs) return _cum_sum_(**kwargs)
cumsum.__doc__ = _cum_sum_.__doc__ + """
Examples: cumsum.__doc__ = _cum_sum_.__doc__ + """
Examples:
>>> data = fluid.layers.data(name="input", shape=[32, 784])
>>> result = fluid.layers.cumsum(data, axis=0) >>> data = fluid.layers.data(name="input", shape=[32, 784])
""" >>> result = fluid.layers.cumsum(data, axis=0)
"""
__all__ += ['thresholded_relu'] __all__ += ['thresholded_relu']
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册