From 6dd52c5b255f7399188818a29991f5d375cd175d Mon Sep 17 00:00:00 2001 From: hutuxian Date: Fri, 15 Jan 2021 14:50:46 +0800 Subject: [PATCH] Ascend rc (#30483) --- CMakeLists.txt | 5 + cmake/configure.cmake | 4 + cmake/external/ascend.cmake | 61 ++ cmake/external/cryptopp.cmake | 4 +- cmake/external/dlpack.cmake | 2 +- cmake/external/gflags.cmake | 6 +- cmake/external/glog.cmake | 6 +- cmake/external/grpc.cmake | 2 +- cmake/external/openblas.cmake | 2 +- cmake/external/protobuf.cmake | 8 +- cmake/external/pybind11.cmake | 4 +- cmake/external/threadpool.cmake | 2 +- cmake/external/warpctc.cmake | 5 +- cmake/external/xbyak.cmake | 2 +- cmake/external/xxhash.cmake | 2 +- cmake/external/zlib.cmake | 4 +- cmake/third_party.cmake | 5 + paddle/fluid/framework/fleet/CMakeLists.txt | 4 + .../fluid/framework/fleet/ascend_wrapper.cc | 22 + paddle/fluid/framework/fleet/ascend_wrapper.h | 183 +++++ paddle/fluid/operators/CMakeLists.txt | 3 + paddle/fluid/operators/ascend_trigger_op.cc | 52 ++ paddle/fluid/operators/ascend_trigger_op.h | 46 ++ paddle/fluid/pybind/CMakeLists.txt | 5 + paddle/fluid/pybind/ascend_wrapper_py.cc | 694 ++++++++++++++++++ paddle/fluid/pybind/ascend_wrapper_py.h | 31 + paddle/fluid/pybind/pybind.cc | 7 + .../ascend/ascend_optimizer.py | 179 +++++ .../meta_optimizers/ascend/ascend_parser.py | 529 +++++++++++++ .../tests/unittests/test_ascend_trigger.py | 49 ++ 30 files changed, 1904 insertions(+), 24 deletions(-) create mode 100644 cmake/external/ascend.cmake create mode 100644 paddle/fluid/framework/fleet/ascend_wrapper.cc create mode 100644 paddle/fluid/framework/fleet/ascend_wrapper.h create mode 100644 paddle/fluid/operators/ascend_trigger_op.cc create mode 100644 paddle/fluid/operators/ascend_trigger_op.h create mode 100644 paddle/fluid/pybind/ascend_wrapper_py.cc create mode 100644 paddle/fluid/pybind/ascend_wrapper_py.h create mode 100644 python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py create mode 100644 python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py create mode 100644 python/paddle/fluid/tests/unittests/test_ascend_trigger.py diff --git a/CMakeLists.txt b/CMakeLists.txt index a58640d942..d919dc5ac0 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,9 +31,13 @@ option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_F option(WITH_TENSORRT "Compile PaddlePaddle with NVIDIA TensorRT" OFF) option(WITH_XPU "Compile PaddlePaddle with BAIDU KUNLUN XPU" OFF) option(WITH_WIN_DUMP_DBG "Compile with windows core dump debug mode" OFF) +option(WITH_ASCEND "Compile PaddlePaddle with ASCEND" OFF) if (WITH_GPU AND WITH_XPU) message(FATAL_ERROR "Error when compile GPU and XPU at the same time") endif() +if (WITH_GPU AND WITH_ASCEND) + message(FATAL_ERROR "Error when compile GPU and ASCEND at the same time") +endif() # cmake 3.12, 3.13, 3.14 will append gcc link options to nvcc, and nvcc doesn't recognize them. if(WITH_GPU AND (${CMAKE_VERSION} VERSION_GREATER_EQUAL 3.12) AND (${CMAKE_VERSION} VERSION_LESS 3.15)) message(FATAL_ERROR "cmake ${CMAKE_VERSION} is not supported when WITH_GPU=ON because of bug https://cmake.org/pipermail/cmake/2018-September/068195.html. " @@ -322,6 +326,7 @@ set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0") if(ON_INFER) # you can trun off the paddle fluid and inference lib by set ON_INFER=OFF diff --git a/cmake/configure.cmake b/cmake/configure.cmake index aeec7da2e6..fc1e72ba3f 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -78,6 +78,10 @@ if(WITH_BOX_PS) add_definitions(-DPADDLE_WITH_BOX_PS) endif() +if(WITH_ASCEND) + add_definitions(-DPADDLE_WITH_ASCEND) +endif() + if(WITH_XPU) message(STATUS "Compile with XPU!") add_definitions(-DPADDLE_WITH_XPU) diff --git a/cmake/external/ascend.cmake b/cmake/external/ascend.cmake new file mode 100644 index 0000000000..bcf0c0a064 --- /dev/null +++ b/cmake/external/ascend.cmake @@ -0,0 +1,61 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +INCLUDE(ExternalProject) + +SET(ASCEND_PROJECT "extern_ascend") +IF((NOT DEFINED ASCEND_VER) OR (NOT DEFINED ASCEND_URL)) + MESSAGE(STATUS "use pre defined download url") + SET(ASCEND_VER "0.1.1" CACHE STRING "" FORCE) + SET(ASCEND_NAME "ascend" CACHE STRING "" FORCE) + SET(ASCEND_URL "http://paddle-ascend.bj.bcebos.com/ascend.tar.gz" CACHE STRING "" FORCE) +ENDIF() +MESSAGE(STATUS "ASCEND_NAME: ${ASCEND_NAME}, ASCEND_URL: ${ASCEND_URL}") +SET(ASCEND_SOURCE_DIR "${THIRD_PARTY_PATH}/ascend") +SET(ASCEND_DOWNLOAD_DIR "${ASCEND_SOURCE_DIR}/src/${ASCEND_PROJECT}") +SET(ASCEND_DST_DIR "ascend") +SET(ASCEND_INSTALL_ROOT "${THIRD_PARTY_PATH}/install") +SET(ASCEND_INSTALL_DIR ${ASCEND_INSTALL_ROOT}/${ASCEND_DST_DIR}) +SET(ASCEND_ROOT ${ASCEND_INSTALL_DIR}) +SET(ASCEND_INC_DIR ${ASCEND_ROOT}/include) +SET(ASCEND_LIB_DIR ${ASCEND_ROOT}/lib) +SET(ASCEND_LIB ${ASCEND_LIB_DIR}/libge_runner.so) +SET(ASCEND_GRAPH_LIB ${ASCEND_LIB_DIR}/libgraph.so) +SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${ASCEND_ROOT}/lib") + +INCLUDE_DIRECTORIES(${ASCEND_INC_DIR}) +FILE(WRITE ${ASCEND_DOWNLOAD_DIR}/CMakeLists.txt + "PROJECT(ASCEND)\n" + "cmake_minimum_required(VERSION 3.0)\n" + "install(DIRECTORY ${ASCEND_NAME}/include ${ASCEND_NAME}/lib \n" + " DESTINATION ${ASCEND_DST_DIR})\n") +ExternalProject_Add( + ${ASCEND_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${ASCEND_SOURCE_DIR} + DOWNLOAD_DIR ${ASCEND_DOWNLOAD_DIR} + DOWNLOAD_COMMAND wget --no-check-certificate ${ASCEND_URL} -c -q -O ${ASCEND_NAME}.tar.gz + && tar zxvf ${ASCEND_NAME}.tar.gz + DOWNLOAD_NO_PROGRESS 1 + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${ASCEND_INSTALL_ROOT} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ASCEND_INSTALL_ROOT} +) +ADD_LIBRARY(ascend SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET ascend PROPERTY IMPORTED_LOCATION ${ASCEND_LIB}) + +ADD_LIBRARY(ascend_graph SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET ascend_graph PROPERTY IMPORTED_LOCATION ${ASCEND_GRAPH_LIB}) +ADD_DEPENDENCIES(ascend ascend_graph ${ASCEND_PROJECT}) + diff --git a/cmake/external/cryptopp.cmake b/cmake/external/cryptopp.cmake index 3176e2a665..a9e1a4d67b 100644 --- a/cmake/external/cryptopp.cmake +++ b/cmake/external/cryptopp.cmake @@ -17,7 +17,7 @@ INCLUDE(ExternalProject) SET(CRYPTOPP_PREFIX_DIR ${THIRD_PARTY_PATH}/cryptopp) SET(CRYPTOPP_INSTALL_DIR ${THIRD_PARTY_PATH}/install/cryptopp) SET(CRYPTOPP_INCLUDE_DIR "${CRYPTOPP_INSTALL_DIR}/include" CACHE PATH "cryptopp include directory." FORCE) -SET(CRYPTOPP_REPOSITORY ${GIT_URL}/weidai11/cryptopp.git) +SET(CRYPTOPP_REPOSITORY https://gitee.com/tianjianhe/cryptopp.git) SET(CRYPTOPP_TAG CRYPTOPP_8_2_0) IF(WIN32) @@ -33,7 +33,7 @@ set(CRYPTOPP_CMAKE_ARGS ${COMMON_CMAKE_ARGS} -DCMAKE_INSTALL_LIBDIR=${CRYPTOPP_INSTALL_DIR}/lib -DCMAKE_INSTALL_PREFIX=${CRYPTOPP_INSTALL_DIR} -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + "-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0" -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} diff --git a/cmake/external/dlpack.cmake b/cmake/external/dlpack.cmake index 87db181d95..fa6f8e8d4c 100644 --- a/cmake/external/dlpack.cmake +++ b/cmake/external/dlpack.cmake @@ -17,7 +17,7 @@ include(ExternalProject) set(DLPACK_PREFIX_DIR ${THIRD_PARTY_PATH}/dlpack) set(DLPACK_SOURCE_DIR ${THIRD_PARTY_PATH}/dlpack/src/extern_dlpack) -set(DLPACK_REPOSITORY ${GIT_URL}/dmlc/dlpack.git) +set(DLPACK_REPOSITORY https://gitee.com/tianjianhe/dlpack.git) set(DLPACK_TAG v0.2) cache_third_party(extern_dlpack diff --git a/cmake/external/gflags.cmake b/cmake/external/gflags.cmake index 34f5d7e2be..8ee0c4cdcd 100644 --- a/cmake/external/gflags.cmake +++ b/cmake/external/gflags.cmake @@ -18,8 +18,8 @@ SET(GFLAGS_PREFIX_DIR ${THIRD_PARTY_PATH}/gflags) SET(GFLAGS_SOURCE_DIR ${THIRD_PARTY_PATH}/gflags/src/extern_gflags) SET(GFLAGS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/gflags) SET(GFLAGS_INCLUDE_DIR "${GFLAGS_INSTALL_DIR}/include" CACHE PATH "gflags include directory." FORCE) -set(GFLAGS_REPOSITORY ${GIT_URL}/gflags/gflags.git) -set(GFLAGS_TAG "v2.2.2") +set(GFLAGS_REPOSITORY https://gitee.com/tianjianhe/gflags.git) +set(GFLAGS_TAG 77592648e3f3be87d6c7123eb81cbad75f9aef5a) IF(WIN32) set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/gflags_static.lib" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE) ELSE(WIN32) @@ -48,7 +48,7 @@ ExternalProject_Add( INSTALL_COMMAND ${INSTALL_COMMAND} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + "-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0" -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} diff --git a/cmake/external/glog.cmake b/cmake/external/glog.cmake index 05b98e2b56..64410e99bd 100644 --- a/cmake/external/glog.cmake +++ b/cmake/external/glog.cmake @@ -18,8 +18,8 @@ SET(GLOG_PREFIX_DIR ${THIRD_PARTY_PATH}/glog) SET(GLOG_SOURCE_DIR ${THIRD_PARTY_PATH}/glog/src/extern_glog) SET(GLOG_INSTALL_DIR ${THIRD_PARTY_PATH}/install/glog) SET(GLOG_INCLUDE_DIR "${GLOG_INSTALL_DIR}/include" CACHE PATH "glog include directory." FORCE) -SET(GLOG_REPOSITORY ${GIT_URL}/google/glog.git) -SET(GLOG_TAG v0.4.0) +SET(GLOG_REPOSITORY https://gitee.com/tianjianhe/glog.git) +SET(GLOG_TAG v0.3.5) IF(WIN32) SET(GLOG_LIBRARIES "${GLOG_INSTALL_DIR}/lib/glog.lib" CACHE FILEPATH "glog library." FORCE) @@ -47,7 +47,7 @@ ExternalProject_Add( SOURCE_DIR ${GLOG_SOURCE_DIR} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_CXX_FLAGS=${GLOG_CMAKE_CXX_FLAGS} + "-DCMAKE_CXX_FLAGS=${GLOG_CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0" -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} diff --git a/cmake/external/grpc.cmake b/cmake/external/grpc.cmake index 536e95c1dc..bd2f4d11ed 100644 --- a/cmake/external/grpc.cmake +++ b/cmake/external/grpc.cmake @@ -28,7 +28,7 @@ IF(APPLE) SET(GRPC_INSTALL_CMD make prefix=${GRPC_INSTALL_DIR} install) ELSE() SET(GRPC_CFLAGS "-Wno-error -std=c11 ${CLFAGS}") - SET(GRPC_CXXFLAGS "-Wno-error -std=c++11 ${CXXFLAGS}") + SET(GRPC_CXXFLAGS "-Wno-error -std=c++11 ${CXXFLAGS} -D_GLIBCXX_USE_CXX11_ABI=0") SET(BUILD_CMD make CFLAGS=${GRPC_CFLAGS} CXXFLAGS=${GRPC_CXXFLAGS} HAS_SYSTEM_PROTOBUF=false -s -j ${NUM_OF_PROCESSOR} static grpc_cpp_plugin) SET(GRPC_INSTALL_CMD make prefix=${GRPC_INSTALL_DIR} install CFLAGS=${GRPC_CFLAGS} CXXFLAGS=${GRPC_CXXFLAGS}) ENDIF() diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake index 19ba6d15c5..f459bbfd47 100644 --- a/cmake/external/openblas.cmake +++ b/cmake/external/openblas.cmake @@ -17,7 +17,7 @@ INCLUDE(ExternalProject) SET(CBLAS_PREFIX_DIR ${THIRD_PARTY_PATH}/openblas) SET(CBLAS_SOURCE_DIR ${THIRD_PARTY_PATH}/openblas/src/extern_openblas) SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas) -SET(CBLAS_REPOSITORY ${GIT_URL}/xianyi/OpenBLAS.git) +SET(CBLAS_REPOSITORY https://gitee.com/tianjianhe/OpenBLAS.git) SET(CBLAS_TAG v0.3.7) if(WITH_MIPS) SET(CBLAS_TAG v0.3.13) diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 905c17b930..dd0de0d086 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -183,7 +183,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) "-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}" "-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}" "-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}" - "-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}" + "-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0" "-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}" "-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}" "-Dprotobuf_WITH_ZLIB=ON" @@ -198,8 +198,8 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) "-Dprotobuf_MSVC_STATIC_RUNTIME=${MSVC_STATIC_CRT}") ENDIF() - SET(PROTOBUF_REPOSITORY ${GIT_URL}/protocolbuffers/protobuf.git) - SET(PROTOBUF_TAG 9f75c5aa851cd877fb0d93ccc31b8567a6706546) + SET(PROTOBUF_REPOSITORY https://gitee.com/tianjianhe/protobuf.git) + SET(PROTOBUF_TAG v3.8.0) cache_third_party(${TARGET_NAME} REPOSITORY ${PROTOBUF_REPOSITORY} @@ -234,7 +234,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) ) ENDFUNCTION() -SET(PROTOBUF_VERSION 3.1.0) +# SET(PROTOBUF_VERSION 3.1.0) IF(NOT PROTOBUF_FOUND) build_protobuf(extern_protobuf FALSE) diff --git a/cmake/external/pybind11.cmake b/cmake/external/pybind11.cmake index 69bd68c277..c6be74811d 100644 --- a/cmake/external/pybind11.cmake +++ b/cmake/external/pybind11.cmake @@ -16,8 +16,8 @@ include(ExternalProject) set(PYBIND_PREFIX_DIR ${THIRD_PARTY_PATH}/pybind) set(PYBIND_SOURCE_DIR ${THIRD_PARTY_PATH}/pybind/src/extern_pybind) -SET(PYBIND_REPOSITORY ${GIT_URL}/pybind/pybind11.git) -SET(PYBIND_TAG v2.4.3) +SET(PYBIND_REPOSITORY https://gitee.com/tianjianhe/pybind11.git) +SET(PYBIND_TAG v2.6.0) cache_third_party(extern_pybind REPOSITORY ${PYBIND_REPOSITORY} diff --git a/cmake/external/threadpool.cmake b/cmake/external/threadpool.cmake index 205e8d26d9..6c7ff3d6d7 100644 --- a/cmake/external/threadpool.cmake +++ b/cmake/external/threadpool.cmake @@ -16,7 +16,7 @@ INCLUDE(ExternalProject) SET(THREADPOOL_PREFIX_DIR ${THIRD_PARTY_PATH}/threadpool) SET(THREADPOOL_SOURCE_DIR ${THIRD_PARTY_PATH}/threadpool/src/extern_threadpool) -SET(THREADPOOL_REPOSITORY ${GIT_URL}/progschj/ThreadPool.git) +SET(THREADPOOL_REPOSITORY https://gitee.com/tianjianhe/ThreadPool.git) SET(THREADPOOL_TAG 9a42ec1329f259a5f4881a291db1dcb8f2ad9040) cache_third_party(extern_threadpool diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index 0ee3e2116a..e5d79cf558 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -17,8 +17,9 @@ INCLUDE(ExternalProject) SET(WARPCTC_PREFIX_DIR ${THIRD_PARTY_PATH}/warpctc) SET(WARPCTC_SOURCE_DIR ${THIRD_PARTY_PATH}/warpctc/src/extern_warpctc) SET(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc) -set(WARPCTC_REPOSITORY ${GIT_URL}/baidu-research/warp-ctc.git) +set(WARPCTC_REPOSITORY https://gitee.com/tianjianhe/warp-ctc.git) set(WARPCTC_TAG 95a461eddeabd51099ef059dcfada1117eb1bfb8) +# set(WARPCTC_TAG bc29dcfff07ced1c7a19a4ecee48e5ad583cef8e) SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include" CACHE PATH "Warp-ctc Directory" FORCE) @@ -52,7 +53,7 @@ ExternalProject_Add( -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} - -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + "-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0" -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} -DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR} diff --git a/cmake/external/xbyak.cmake b/cmake/external/xbyak.cmake index 6627c4eed1..c4c04c98bc 100644 --- a/cmake/external/xbyak.cmake +++ b/cmake/external/xbyak.cmake @@ -19,7 +19,7 @@ set(XBYAK_PREFIX_DIR ${THIRD_PARTY_PATH}/xbyak) SET(XBYAK_SOURCE_DIR ${THIRD_PARTY_PATH}/xbyak/src/extern_xbyak) set(XBYAK_INSTALL_ROOT ${THIRD_PARTY_PATH}/install/xbyak) set(XBYAK_INC_DIR ${XBYAK_INSTALL_ROOT}/include) -set(XBYAK_REPOSITORY ${GIT_URL}/herumi/xbyak.git) +set(XBYAK_REPOSITORY https://gitee.com/tianjianhe/xbyak.git) set(XBYAK_TAG v5.661) # Jul 26th include_directories(${XBYAK_INC_DIR}) diff --git a/cmake/external/xxhash.cmake b/cmake/external/xxhash.cmake index bdd7df190f..4033237b9e 100644 --- a/cmake/external/xxhash.cmake +++ b/cmake/external/xxhash.cmake @@ -18,7 +18,7 @@ set(XXHASH_PREFIX_DIR ${THIRD_PARTY_PATH}/xxhash) set(XXHASH_SOURCE_DIR ${THIRD_PARTY_PATH}/xxhash/src/extern_xxhash) set(XXHASH_INSTALL_DIR ${THIRD_PARTY_PATH}/install/xxhash) set(XXHASH_INCLUDE_DIR "${XXHASH_INSTALL_DIR}/include") -set(XXHASH_REPOSITORY ${GIT_URL}/Cyan4973/xxHash.git) +set(XXHASH_REPOSITORY https://gitee.com/tianjianhe/xxHash.git) set(XXHASH_TAG v0.6.5) cache_third_party(extern_xxhash diff --git a/cmake/external/zlib.cmake b/cmake/external/zlib.cmake index 4464787a0c..334fe5c355 100644 --- a/cmake/external/zlib.cmake +++ b/cmake/external/zlib.cmake @@ -19,7 +19,7 @@ SET(ZLIB_SOURCE_DIR ${THIRD_PARTY_PATH}/zlib/src/extern_zlib) SET(ZLIB_INSTALL_DIR ${THIRD_PARTY_PATH}/install/zlib) SET(ZLIB_ROOT ${ZLIB_INSTALL_DIR} CACHE FILEPATH "zlib root directory." FORCE) SET(ZLIB_INCLUDE_DIR "${ZLIB_INSTALL_DIR}/include" CACHE PATH "zlib include directory." FORCE) -set(ZLIB_REPOSITORY ${GIT_URL}/madler/zlib.git) +set(ZLIB_REPOSITORY https://gitee.com/tianjianhe/zlib.git) set(ZLIB_TAG v1.2.8) INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR}) # For zlib code to include its own headers. @@ -41,7 +41,7 @@ ExternalProject_Add( CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + "-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0" -DCMAKE_INSTALL_PREFIX=${ZLIB_INSTALL_DIR} -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake index 84020f57f1..d576a299b8 100644 --- a/cmake/third_party.cmake +++ b/cmake/third_party.cmake @@ -274,6 +274,11 @@ if(WITH_BOX_PS) list(APPEND third_party_deps extern_box_ps) endif(WITH_BOX_PS) +if(WITH_ASCEND) + include(external/ascend) + list(APPEND third_party_deps extern_ascend) +endif (WITH_ASCEND) + if (WITH_PSCORE) include(external/snappy) list(APPEND third_party_deps extern_snappy) diff --git a/paddle/fluid/framework/fleet/CMakeLists.txt b/paddle/fluid/framework/fleet/CMakeLists.txt index c774a58e05..4d0cfb6297 100644 --- a/paddle/fluid/framework/fleet/CMakeLists.txt +++ b/paddle/fluid/framework/fleet/CMakeLists.txt @@ -31,3 +31,7 @@ endif(WITH_GLOO) cc_library(heter_wrapper SRCS heter_wrapper.cc DEPS framework_proto device_context heter_service_proto) cc_test(test_fleet_cc SRCS test_fleet.cc DEPS fleet_wrapper gloo_wrapper fs shell) + +if(WITH_ASCEND) + cc_library(ascend_wrapper SRCS ascend_wrapper.cc DEPS framework_proto lod_tensor ascend ascend_graph) +endif(WITH_ASCEND) diff --git a/paddle/fluid/framework/fleet/ascend_wrapper.cc b/paddle/fluid/framework/fleet/ascend_wrapper.cc new file mode 100644 index 0000000000..d1b2f51f70 --- /dev/null +++ b/paddle/fluid/framework/fleet/ascend_wrapper.cc @@ -0,0 +1,22 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifdef PADDLE_WITH_ASCEND +#include "paddle/fluid/framework/fleet/ascend_wrapper.h" +namespace paddle { +namespace framework { +std::shared_ptr AscendInstance::ascend_instance_ = nullptr; +} // end namespace framework +} // end namespace paddle +#endif diff --git a/paddle/fluid/framework/fleet/ascend_wrapper.h b/paddle/fluid/framework/fleet/ascend_wrapper.h new file mode 100644 index 0000000000..da79fccb8c --- /dev/null +++ b/paddle/fluid/framework/fleet/ascend_wrapper.h @@ -0,0 +1,183 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#ifdef PADDLE_WITH_ASCEND +#include + +#include +#include +#include +#include + +#include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/platform/gpu_info.h" +#include "paddle/fluid/platform/place.h" +#include "paddle/fluid/platform/timer.h" + +#include "ge/ge_api.h" +#include "ge/ge_api_types.h" +#include "graph/attr_value.h" +#include "graph/tensor.h" +#include "graph/types.h" + +namespace paddle { +namespace framework { + +// typedef std::vector AscendGraphDesc; +typedef ge::Graph AscendGraphDesc; + +class AscendInstance { + public: + virtual ~AscendInstance() {} + AscendInstance() {} + + std::map GetDefaultInitSessionOptions() { + std::map init_options; + init_options["a"] = "b"; + init_options["ge.trainFlag"] = "1"; + return init_options; + } + + // add other parameters here to init + void InitGlobalResouces() { + session_.reset(new ge::Session(GetDefaultInitSessionOptions())); + VLOG(1) << "InitGlobalResouces Done"; + } + + static std::shared_ptr GetInstance() { + if (nullptr == ascend_instance_) { + ascend_instance_.reset(new paddle::framework::AscendInstance()); + VLOG(1) << "Initialize AscendInstance Done"; + } + return ascend_instance_; + } + + void AddAscendSubgraph(int graph_idx, const AscendGraphDesc &graph) { + ge::Status status = session_->AddGraph(graph_idx, graph); + PADDLE_ENFORCE_EQ(status, ge::SUCCESS, + paddle::platform::errors::PreconditionNotMet( + "Calling addGraph of graph engine failed, please " + "check Ascend Log.")); + VLOG(1) << "AddAscendSubgraph " << graph_idx << " Done"; + } + + ge::DataType VarTypeToGeType(proto::VarType::Type type) { + if (type == proto::VarType::FP16) { + return ge::DataType::DT_FLOAT16; + } else if (type == proto::VarType::FP32) { + return ge::DataType::DT_FLOAT; + } else if (type == proto::VarType::FP64) { + return ge::DataType::DT_DOUBLE; + } else if (type == proto::VarType::INT32) { + return ge::DataType::DT_INT32; + } else if (type == proto::VarType::INT64) { + return ge::DataType::DT_INT64; + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "Not support %s as tensor type.", DataTypeToString(type))); + } + } + int GeTypeSize(proto::VarType::Type type) { + if (type == proto::VarType::FP16) { + return 2; + } else if (type == proto::VarType::FP32) { + return 4; + } else if (type == proto::VarType::FP64) { + return 8; + } else if (type == proto::VarType::INT32) { + return 4; + } else if (type == proto::VarType::INT64) { + return 8; + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "Not support %s as tensor type.", DataTypeToString(type))); + } + } + ge::Tensor ConvertToGeTensor(const Tensor *tensor) { + auto numel = tensor->numel(); + std::vector vec_dim; + auto dimen = arity(tensor->dims()); + for (auto i = 0; i < dimen; ++i) { + vec_dim.push_back(tensor->dims()[i]); + } + // For Debug + // VLOG(1) << "input numel: " << numel << ", dimen is " << vec_dim.size() << + // ", and shape is"; + // for (const auto e : vec_dim) { + // VLOG(0) << e; + // } + + ge::Shape shape(vec_dim); + ge::TensorDesc tensor_desc(shape, ge::Format::FORMAT_ND, + VarTypeToGeType(tensor->type())); + tensor_desc.SetRealDimCnt(vec_dim.size()); + + const uint8_t *data = + reinterpret_cast(tensor->data()); + std::vector dst(numel * GeTypeSize(tensor->type())); + memcpy(dst.data(), data, GeTypeSize(tensor->type()) * numel); + ge::Tensor ge_tensor(tensor_desc, dst); + return ge_tensor; + } + + void RunAscendSubgraph(int graph_idx, + const std::vector &inputs, + std::vector *outputs) { + VLOG(1) << "Ascend Graph[" << graph_idx << "] is about to run."; + // Convert paddle Tensor to GE Tensor + std::vector ge_inputs; + for (const auto &e : inputs) { + ge_inputs.push_back(ConvertToGeTensor(e)); + } + + // Run Graph + std::vector ge_outputs; + ge::Status status = session_->RunGraph(graph_idx, ge_inputs, ge_outputs); + PADDLE_ENFORCE_EQ(status, ge::SUCCESS, + paddle::platform::errors::PreconditionNotMet( + "Calling RunGraph of graph engine failed, please " + "check Ascend Log.")); + VLOG(1) << "Run Ascend Graph[" << graph_idx << "] Done"; + + // change tensor back, note all tensor's type computed in GE is uint8 + for (size_t i = 0; i < ge_outputs.size(); ++i) { + const uint8_t *ret_data = ge_outputs[i].GetData(); + size_t size = ge_outputs[i].GetSize(); + VLOG(1) << "GE Tensor size of the " << i << "th output var is " << size; + auto *dst = (*outputs)[i]->mutable_data({(int64_t)size}, + platform::CPUPlace()); + memcpy(dst, ret_data, size); + + // Following for debug: + // VLOG(0) << "output for " << i << " var: "; + // float *tmp = reinterpret_cast(dst); + // for (size_t j = 0; j < size / 4; ++j) { + // printf("%f ", tmp[j]); + // } + // printf("\n"); + } + } + + protected: + std::shared_ptr session_; + + private: + static std::shared_ptr ascend_instance_; +}; +} // end namespace framework +} // end namespace paddle +#endif diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 28741ce947..f46320acf1 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -115,6 +115,9 @@ set(COMMON_OP_DEPS ${COMMON_OP_DEPS} device_memory_aligment) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} layer) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} tensor_formatter) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} op_version_registry) +if (WITH_ASCEND) + set(COMMON_OP_DEPS ${COMMON_OP_DEPS} ascend_wrapper) +endif() # FIXME(typhoonzero): operator deps may not needed. # op_library(lod_tensor_to_array_op DEPS lod_rank_table_op) diff --git a/paddle/fluid/operators/ascend_trigger_op.cc b/paddle/fluid/operators/ascend_trigger_op.cc new file mode 100644 index 0000000000..b699ceec87 --- /dev/null +++ b/paddle/fluid/operators/ascend_trigger_op.cc @@ -0,0 +1,52 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/operators/ascend_trigger_op.h" + +namespace paddle { +namespace operators { + +class AscendTriggerOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override {} + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(framework::proto::VarType::FP32, + ctx.device_context()); + } +}; + +class AscendTriggerOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("FeedList", "FeedList of Ascend SubGraph").AsDuplicable(); + AddOutput("FetchList", "FetchList of Ascend SubGraph").AsDuplicable(); + AddAttr("graph_idx", "(int, the graph index").SetDefault(-1); + AddComment(R"DOC( +Trigger Ascend SubGraph + +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(ascend_trigger, ops::AscendTriggerOp, + ops::AscendTriggerOpMaker); +REGISTER_OP_CPU_KERNEL(ascend_trigger, ops::AscendTriggerCPUKernel) diff --git a/paddle/fluid/operators/ascend_trigger_op.h b/paddle/fluid/operators/ascend_trigger_op.h new file mode 100644 index 0000000000..eaa79da2ba --- /dev/null +++ b/paddle/fluid/operators/ascend_trigger_op.h @@ -0,0 +1,46 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include "paddle/fluid/framework/op_registry.h" +#ifdef PADDLE_WITH_ASCEND +#include "paddle/fluid/framework/fleet/ascend_wrapper.h" +#include "paddle/fluid/framework/tensor.h" +#endif + +namespace paddle { +namespace operators { + +template +class AscendTriggerCPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { +#ifdef PADDLE_WITH_ASCEND + auto ascend_ptr = paddle::framework::AscendInstance::GetInstance(); + auto graph_idx = ctx.Attr("graph_idx"); + VLOG(4) << "AscendTrigger Kernel, begin to run graph: " << graph_idx; + auto inputs = ctx.MultiInput("FeedList"); + auto outputs = ctx.MultiOutput("FetchList"); + ascend_ptr->RunAscendSubgraph(graph_idx, inputs, &outputs); +#else + PADDLE_THROW(platform::errors::PreconditionNotMet( + "Please compile WITH_ASCEND option to enable ascend_trigger op")); +#endif + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 1e4bf43f62..0f52d7344c 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -39,6 +39,11 @@ set(PYBIND_SRCS compatible.cc generator_py.cc) +if(WITH_ASCEND) + set(PYBIND_DEPS ${PYBIND_DEPS} ascend_wrapper) + set(PYBIND_SRCS ${PYBIND_SRCS} ascend_wrapper_py.cc) +endif(WITH_ASCEND) + if(WITH_GLOO) set(PYBIND_DEPS ${PYBIND_DEPS} gloo_context) set(PYBIND_SRCS ${PYBIND_SRCS} gloo_context_py.cc) diff --git a/paddle/fluid/pybind/ascend_wrapper_py.cc b/paddle/fluid/pybind/ascend_wrapper_py.cc new file mode 100644 index 0000000000..00eca38085 --- /dev/null +++ b/paddle/fluid/pybind/ascend_wrapper_py.cc @@ -0,0 +1,694 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_ASCEND +#include + +#ifdef _POSIX_C_SOURCE +#undef _POSIX_C_SOURCE +#endif + +#ifdef _XOPEN_SOURCE +#undef _XOPEN_SOURCE +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include "paddle/fluid/framework/fleet/ascend_wrapper.h" +#include "paddle/fluid/pybind/ascend_wrapper_py.h" + +using namespace ge; // NOLINT +namespace py = pybind11; + +namespace paddle { +namespace pybind { + +void BindAscendWrapper(py::module *m) { + py::class_>(*m, "AscendInstance") + .def(py::init([]() { return framework::AscendInstance::GetInstance(); })) + .def("init_global_resources", + &framework::AscendInstance::InitGlobalResouces, + py::call_guard()) + .def("add_ascend_subgraph", &framework::AscendInstance::AddAscendSubgraph, + py::call_guard()); +} // end AscendWrapper + +Status ge_initialize(std::map &options) { // NOLINT + py::gil_scoped_release release; + Status res = GEInitialize(options); + py::gil_scoped_acquire acquire; + return res; +} + +enum AttrType { + AT_INT64 = 0, + AT_INT32, + AT_UINT32, + AT_LIST_INT64, + AT_LIST_INT32, + AT_LIST_UINT32, + AT_FLOAT, + AT_LIST_FLOAT, + AT_ATTR_VALUE, + AT_STRING, + AT_LIST_STRING, + AT_BOOL, + AT_LIST_BOOL, + AT_TENSOR, + AT_LIST_TENSOR, + AT_LIST_UINT8, + AT_LIST_LIST_INT64, + AT_LIST_DT, + AT_DT, + AT_LIST_NAMEATTR, + AT_NAMEATTR +}; + +void BindAscendGraph(py::module *m) { + m->def("ge_initialize", &ge_initialize, "GEInitialize"); + m->def("ge_finalize", &GEFinalize, "GEFinalize"); + + //枚举封装 + py::enum_(*m, "GEGraphRunMode") + .value("PREDICTION", GraphRunMode::PREDICTION) + .value("TRAIN", GraphRunMode::TRAIN) + .export_values(); + + py::enum_(*m, "GEDataType") + .value("DT_FLOAT", DataType::DT_FLOAT) + .value("DT_FLOAT16", DataType::DT_FLOAT16) + .value("DT_INT8", DataType::DT_INT8) + .value("DT_INT16", DataType::DT_INT16) + .value("DT_UINT16", DataType::DT_UINT16) + .value("DT_UINT8", DataType::DT_UINT8) + .value("DT_INT32", DataType::DT_INT32) + .value("DT_INT64", DataType::DT_INT64) + .value("DT_UINT32", DataType::DT_UINT32) + .value("DT_UINT64", DataType::DT_UINT64) + .value("DT_BOOL", DataType::DT_BOOL) + .value("DT_DOUBLE", DataType::DT_DOUBLE) + .value("DT_STRING", DataType::DT_STRING) + .value("DT_DUAL_SUB_INT8", DataType::DT_DUAL_SUB_INT8) + .value("DT_DUAL_SUB_UINT8", DataType::DT_DUAL_SUB_UINT8) + .value("DT_COMPLEX64", DataType::DT_COMPLEX64) + .value("DT_COMPLEX128", DataType::DT_COMPLEX128) + .value("DT_QINT8", DataType::DT_QINT8) + .value("DT_QINT16", DataType::DT_QINT16) + .value("DT_QINT32", DataType::DT_QINT32) + .value("DT_QUINT8", DataType::DT_QUINT8) + .value("DT_QUINT16", DataType::DT_QUINT16) + .value("DT_RESOURCE", DataType::DT_RESOURCE) + .value("DT_STRING_REF", DataType::DT_STRING_REF) + .value("DT_DUAL", DataType::DT_DUAL) + .value("DT_UNDEFINED", DataType::DT_UNDEFINED) + .export_values(); + + py::enum_(*m, "GEFormat") + .value("FORMAT_NCHW", Format::FORMAT_NCHW) + .value("FORMAT_NHWC", Format::FORMAT_NHWC) + .value("FORMAT_ND", Format::FORMAT_ND) + .value("FORMAT_NC1HWC0", Format::FORMAT_NC1HWC0) + .value("FORMAT_FRACTAL_Z", Format::FORMAT_FRACTAL_Z) + .value("FORMAT_NC1C0HWPAD", Format::FORMAT_NC1C0HWPAD) + .value("FORMAT_NHWC1C0", Format::FORMAT_NHWC1C0) + .value("FORMAT_FSR_NCHW", Format::FORMAT_FSR_NCHW) + .value("FORMAT_FRACTAL_DECONV", Format::FORMAT_FRACTAL_DECONV) + .value("FORMAT_C1HWNC0", Format::FORMAT_C1HWNC0) + .value("FORMAT_FRACTAL_DECONV_TRANSPOSE", + Format::FORMAT_FRACTAL_DECONV_TRANSPOSE) + .value("FORMAT_FRACTAL_DECONV_SP_STRIDE_TRANS", + Format::FORMAT_FRACTAL_DECONV_SP_STRIDE_TRANS) + .value("FORMAT_NC1HWC0_C04", Format::FORMAT_NC1HWC0_C04) + .value("FORMAT_FRACTAL_Z_C04", Format::FORMAT_FRACTAL_Z_C04) + .value("FORMAT_CHWN", Format::FORMAT_CHWN) + .value("FORMAT_FRACTAL_DECONV_SP_STRIDE8_TRANS", + Format::FORMAT_FRACTAL_DECONV_SP_STRIDE8_TRANS) + .value("FORMAT_HWCN", Format::FORMAT_HWCN) + .value("FORMAT_NC1KHKWHWC0", Format::FORMAT_NC1KHKWHWC0) + .value("FORMAT_BN_WEIGHT", Format::FORMAT_BN_WEIGHT) + .value("FORMAT_FILTER_HWCK", Format::FORMAT_FILTER_HWCK) + .value("FORMAT_HASHTABLE_LOOKUP_LOOKUPS", + Format::FORMAT_HASHTABLE_LOOKUP_LOOKUPS) + .value("FORMAT_HASHTABLE_LOOKUP_KEYS", + Format::FORMAT_HASHTABLE_LOOKUP_KEYS) + .value("FORMAT_HASHTABLE_LOOKUP_VALUE", + Format::FORMAT_HASHTABLE_LOOKUP_VALUE) + .value("FORMAT_HASHTABLE_LOOKUP_OUTPUT", + Format::FORMAT_HASHTABLE_LOOKUP_OUTPUT) + .value("FORMAT_HASHTABLE_LOOKUP_HITS", + Format::FORMAT_HASHTABLE_LOOKUP_HITS) + .value("FORMAT_C1HWNCoC0", Format::FORMAT_C1HWNCoC0) + .value("FORMAT_MD", Format::FORMAT_MD) + .value("FORMAT_NDHWC", Format::FORMAT_NDHWC) + .value("FORMAT_FRACTAL_ZZ", Format::FORMAT_FRACTAL_ZZ) + .value("FORMAT_FRACTAL_NZ", Format::FORMAT_FRACTAL_NZ) + .value("FORMAT_NCDHW", Format::FORMAT_NCDHW) + .value("FORMAT_DHWCN", Format::FORMAT_DHWCN) + .value("FORMAT_NDC1HWC0", Format::FORMAT_NDC1HWC0) + .value("FORMAT_FRACTAL_Z_3D", Format::FORMAT_FRACTAL_Z_3D) + .value("FORMAT_CN", Format::FORMAT_CN) + .value("FORMAT_NC", Format::FORMAT_NC) + .value("FORMAT_DHWNC", Format::FORMAT_DHWNC) + .value("FORMAT_FRACTAL_Z_3D_TRANSPOSE", + Format::FORMAT_FRACTAL_Z_3D_TRANSPOSE) + .value("FORMAT_FRACTAL_ZN_LSTM", Format::FORMAT_FRACTAL_ZN_LSTM) + .value("FORMAT_FRACTAL_Z_G", Format::FORMAT_FRACTAL_Z_G) + .value("FORMAT_RESERVED", Format::FORMAT_RESERVED) + .value("FORMAT_ALL", Format::FORMAT_ALL) + .value("FORMAT_NULL", Format::FORMAT_NULL) + .export_values(); + + py::enum_(*m, "GEUnknowShapeOpType") + .value("DEPEND_IN_SHAPE", UnknowShapeOpType::DEPEND_IN_SHAPE) + .value("DEPEND_CONST_VALUE", UnknowShapeOpType::DEPEND_CONST_VALUE) + .value("DEPEND_SHAPE_RANGE", UnknowShapeOpType::DEPEND_SHAPE_RANGE) + .value("DEPEND_COMPUTE", UnknowShapeOpType::DEPEND_COMPUTE) + .export_values(); + + py::enum_(*m, "GEDeviceType") + .value("NPU", DeviceType::NPU) + .value("CPU", DeviceType::CPU) + .export_values(); + + py::enum_(*m, "GEAttrType") + .value("AT_INT64", AttrType::AT_INT64) + .value("AT_INT32", AttrType::AT_INT32) + .value("AT_UINT32", AttrType::AT_UINT32) + .value("AT_LIST_INT64", AttrType::AT_LIST_INT64) + .value("AT_LIST_INT32", AttrType::AT_LIST_INT32) + .value("AT_LIST_UINT32", AttrType::AT_LIST_UINT32) + .value("AT_FLOAT", AttrType::AT_FLOAT) + .value("AT_LIST_FLOAT", AttrType::AT_LIST_FLOAT) + .value("AT_ATTR_VALUE", AttrType::AT_ATTR_VALUE) + .value("AT_STRING", AttrType::AT_STRING) + .value("AT_LIST_STRING", AttrType::AT_LIST_STRING) + .value("AT_BOOL", AttrType::AT_BOOL) + .value("AT_LIST_BOOL", AttrType::AT_LIST_BOOL) + .value("AT_TENSOR", AttrType::AT_TENSOR) + .value("AT_LIST_TENSOR", AttrType::AT_LIST_TENSOR) + .value("AT_LIST_UINT8", AttrType::AT_LIST_UINT8) + .value("AT_LIST_LIST_INT64", AttrType::AT_LIST_LIST_INT64) + .value("AT_LIST_DT", AttrType::AT_LIST_DT) + .value("AT_DT", AttrType::AT_DT) + .value("AT_LIST_NAMEATTR", AttrType::AT_LIST_NAMEATTR) + .value("AT_NAMEATTR", AttrType::AT_NAMEATTR) + .export_values(); + + // 类封装 + py::class_(*m, "GESession") + .def(py::init &>()) + .def("add_graph", + (Status (Session::*)(uint32_t, const Graph &)) & Session::AddGraph) + .def("add_graph", + (Status (Session::*)(uint32_t, const Graph &, + const std::map &)) & + Session::AddGraph) + .def("remove_graph", &Session::RemoveGraph) + .def("run_graph", + [](Session &ss, uint32_t graphId, + const std::vector &inputs) -> py::tuple { + std::vector outputs; + Status res = ss.RunGraph(graphId, inputs, outputs); + return py::make_tuple(outputs, res); + }, + py::call_guard()) + .def("build_graph", &Session::BuildGraph) + .def("run_graph_async", &Session::RunGraphAsync) + .def("register_call_back_func", + (Status (Session::*)( // NOLINT + const std::string &, + std::function ¶ms_list)>)) & + Session::RegisterCallBackFunc) + .def("is_graph_need_rebuild", &Session::IsGraphNeedRebuild); + + py::class_(*m, "GEGraph") + .def(py::init<>()) + .def(py::init()) + .def("set_inputs", &Graph::SetInputs) + .def("set_outputs", (Graph & (Graph::*)(const std::vector &)) & + Graph::SetOutputs) + .def("set_outputs", + (Graph & (Graph::*)(const std::vector< + std::pair>> &)) & + Graph::SetOutputs) + .def("set_outputs", + (Graph & + (Graph::*)(const std::vector> + &)) & + Graph::SetOutputs) + .def("set_targets", &Graph::SetTargets) + .def("is_valid", &Graph::IsValid) + .def("add_op", &Graph::AddOp) + .def("find_op_by_name", + [](Graph &graph, const std::string &name) -> py::tuple { + ge::Operator op; + graphStatus status = graph.FindOpByName(name, op); + return py::make_tuple(op, status); + }) + .def("find_op_by_type", + [](Graph &graph, const std::string &type) -> py::tuple { + std::vector ops; + graphStatus status = graph.FindOpByType(type, ops); + return py::make_tuple(ops, status); + }) + .def("get_all_op_name", + [](Graph &graph) -> py::tuple { + std::vector op_name; + graphStatus status = graph.GetAllOpName(op_name); + return py::make_tuple(op_name, status); + }) + .def("save_to_file", &Graph::SaveToFile) + .def("load_from_file", &Graph::LoadFromFile) + .def("get_name", &Graph::GetName) + .def("set_need_iteration", &Graph::SetNeedIteration); + + py::class_(*m, "GEOperator") + .def(py::init<>()) + .def(py::init()) + .def(py::init()) + .def("is_empty", &Operator::IsEmpty) + .def("get_name", &Operator::GetName) + .def("get_op_type", &Operator::GetOpType) + .def("set_input", + (Operator & (Operator::*)(const std::string &, const Operator &)) & + Operator::SetInput) + .def("set_input", + (Operator & (Operator::*)(const std::string &, const Operator &, + const std::string &)) & + Operator::SetInput) + .def("set_input", (Operator & (Operator::*)(const std::string &, + const Operator &, uint32_t)) & + Operator::SetInput) + .def("add_control_input", &Operator::AddControlInput) + .def("get_input_const_data", + [](Operator &op, const std::string &dst_name) -> py::tuple { + Tensor data; + graphStatus res = op.GetInputConstData(dst_name, data); + return py::make_tuple(data, res); + }) + .def("get_input_desc", + (TensorDesc (Operator::*)(const std::string &) const) & + Operator::GetInputDesc) + .def("get_input_desc", + (TensorDesc (Operator::*)(uint32_t) const) & Operator::GetInputDesc) + .def("get_dynamic_output_num", &Operator::GetDynamicOutputNum) + .def("get_dynamic_input_num", &Operator::GetDynamicInputNum) + .def("try_get_input_desc", + [](Operator &op, const std::string &name) -> py::tuple { + TensorDesc tensor_desc; + graphStatus status = op.TryGetInputDesc(name, tensor_desc); + return py::make_tuple(tensor_desc, status); + }) + .def("update_input_desc", &Operator::UpdateInputDesc) + .def("get_output_desc", + (TensorDesc (Operator::*)(const std::string &) const) & + Operator::GetOutputDesc) + .def("get_output_desc", + (TensorDesc (Operator::*)(uint32_t) const) & Operator::GetOutputDesc) + .def("update_output_desc", &Operator::UpdateOutputDesc) + .def("get_dynamic_input_desc", &Operator::GetDynamicInputDesc) + .def("update_dynamic_input_desc", &Operator::UpdateDynamicInputDesc) + .def("get_dynamic_output_desc", &Operator::GetDynamicOutputDesc) + .def("update_dynamic_output_desc", &Operator::UpdateDynamicOutputDesc) + .def("infer_shape_and_type", &Operator::InferShapeAndType) + .def("set_inference_context", &Operator::SetInferenceContext) + .def("get_inference_context", &Operator::GetInferenceContext) + .def("verify_all_attr", &Operator::VerifyAllAttr) + .def("get_inputs_size", &Operator::GetInputsSize) + .def("get_outputs_size", &Operator::GetOutputsSize) + .def("get_all_attr_names_and_types", &Operator::GetAllAttrNamesAndTypes) + .def("set_attr_int64", + [](Operator &op, const std::string &name, + int64_t value) -> Operator & { + int64_t tar = (int64_t)value; + return op.SetAttr(name, tar); + }) + .def("set_attr_int32", + [](Operator &op, const std::string &name, + int32_t value) -> Operator & { + int32_t tar = (int32_t)value; + return op.SetAttr(name, tar); + }) + .def("set_attr_uint32", + [](Operator &op, const std::string &name, + uint32_t value) -> Operator & { + uint32_t tar = (uint32_t)value; + return op.SetAttr(name, tar); + }) + .def("set_attr_vec_int64", + [](Operator &op, const std::string &name, + const std::vector &value) -> Operator & { + int len = value.size(); + std::vector tar; + int64_t tmp; + for (int i = 0; i < len; i++) { + tmp = (int64_t)value[i]; + tar.push_back(tmp); + } + return op.SetAttr(name, tar); + }) + .def("set_attr_vec_int32", + [](Operator &op, const std::string &name, + const std::vector &value) -> Operator & { + int len = value.size(); + std::vector tar; + int32_t tmp; + for (int i = 0; i < len; i++) { + tmp = (int32_t)value[i]; + tar.push_back(tmp); + } + return op.SetAttr(name, tar); + }) + .def("set_attr_vec_uint32", + [](Operator &op, const std::string &name, + const std::vector &value) -> Operator & { + int len = value.size(); + std::vector tar; + uint32_t tmp; + for (int i = 0; i < len; i++) { + tmp = (uint32_t)value[i]; + tar.push_back(tmp); + } + return op.SetAttr(name, tar); + }) + .def("set_attr_list_int64", + [](Operator &op, const std::string &name, + std::initializer_list &attrValue) -> Operator & { + return op.SetAttr(name, std::move(attrValue)); + }) + .def("set_attr_attrvalue", + [](Operator &op, const std::string &name, AttrValue &attrValue) + -> Operator & { return op.SetAttr(name, std::move(attrValue)); }) + .def( + "set_attr_float", + [](Operator &op, const std::string &name, float value) -> Operator & { + float tar = static_cast(value); + return op.SetAttr(name, tar); + }) + .def("set_attr_vec_float", + [](Operator &op, const std::string &name, + const std::vector &value) -> Operator & { + int len = value.size(); + std::vector tar; + float tmp; + for (int i = 0; i < len; i++) { + tmp = static_cast(value[i]); + tar.push_back(tmp); + } + return op.SetAttr(name, tar); + }) + .def("set_attr_string", (Operator & (Operator::*)(const std::string &, + const std::string &)) & + Operator::SetAttr) + .def("set_attr_vec_string", + (Operator & (Operator::*)(const std::string &, + const std::vector &)) & + Operator::SetAttr) + .def("set_attr_bool", + [](Operator &op, const std::string &name, bool value) -> Operator & { + if (value) + return op.SetAttr(name, true); + else + return op.SetAttr(name, false); + }) + .def("set_attr_vec_bool", + [](Operator &op, const std::string &name, + const std::vector &value) -> Operator & { + int len = value.size(); + std::vector tar; + for (int i = 0; i < len; i++) { + if (value[i]) + tar.push_back(true); + else + tar.push_back(false); + } + return op.SetAttr(name, tar); + }) + .def("set_attr_tensor", + (Operator & (Operator::*)(const std::string &, const Tensor &)) & + Operator::SetAttr) + .def("set_attr_vec_tensor", + (Operator & + (Operator::*)(const std::string &, const std::vector &)) & + Operator::SetAttr) + .def("set_attr_vec_uint8", + [](Operator &op, const std::string &name, + const std::vector &value) -> Operator & { + int len = value.size(); + std::vector tar; + uint8_t tmp; + for (int i = 0; i < len; i++) { + tmp = (uint8_t)value[i]; + tar.push_back(tmp); + } + return op.SetAttr(name, tar); + }) + .def("set_attr_vec_vec_int64", + (Operator & + (Operator::*)(const std::string &, + const std::vector> &)) & + Operator::SetAttr) + .def("set_attr_vec_dtype", + [](Operator &op, const std::string &name, + const std::vector &value) -> Operator & { + int len = value.size(); + std::vector tar; + ge::DataType tmp; + for (int i = 0; i < len; i++) { + tmp = (ge::DataType)value[i]; + tar.push_back(tmp); + } + return op.SetAttr(name, tar); + }) + .def("set_attr_dtype", + [](Operator &op, const std::string &name, + const DataType &value) -> Operator & { + ge::DataType tar = (ge::DataType)value; + return op.SetAttr(name, tar); + }) + + .def("get_attr", + [](Operator &op, const std::string &name, + AttrType type) -> py::tuple { + graphStatus res = -1; + switch (type) { + case AT_INT64: { + int64_t i_64_av; + res = op.GetAttr(name, i_64_av); + return py::make_tuple(i_64_av, res); + } break; + case AT_INT32: { + int32_t i_32_av; + res = op.GetAttr(name, i_32_av); + return py::make_tuple(i_32_av, res); + } break; + case AT_UINT32: { + uint32_t ui_32_av; + res = op.GetAttr(name, ui_32_av); + return py::make_tuple(ui_32_av, res); + } break; + case AT_LIST_INT64: { + std::vector v_i_64_av; + res = op.GetAttr(name, v_i_64_av); + return py::make_tuple(v_i_64_av, res); + } break; + case AT_LIST_INT32: { + std::vector v_i_32_av; + res = op.GetAttr(name, v_i_32_av); + return py::make_tuple(v_i_32_av, res); + } break; + case AT_LIST_UINT32: { + std::vector v_ui_32_av; + res = op.GetAttr(name, v_ui_32_av); + return py::make_tuple(v_ui_32_av, res); + } break; + case AT_FLOAT: { + float f_av; + res = op.GetAttr(name, f_av); + return py::make_tuple(f_av, res); + } break; + case AT_LIST_FLOAT: { + std::vector v_f_av; + res = op.GetAttr(name, v_f_av); + return py::make_tuple(v_f_av, res); + } break; + case AT_ATTR_VALUE: { + AttrValue o_av; + res = op.GetAttr(name, o_av); + return py::make_tuple(o_av, res); + } break; + case AT_STRING: { + std::string s_av; + res = op.GetAttr(name, s_av); + return py::make_tuple(s_av, res); + } break; + case AT_LIST_STRING: { + std::vector v_s_av; + res = op.GetAttr(name, v_s_av); + return py::make_tuple(v_s_av, res); + } break; + case AT_BOOL: { + bool b_av; + res = op.GetAttr(name, b_av); + return py::make_tuple(b_av, res); + } break; + case AT_LIST_BOOL: { + std::vector v_b_av; + res = op.GetAttr(name, v_b_av); + return py::make_tuple(v_b_av, res); + } break; + case AT_TENSOR: { + Tensor t_av; + res = op.GetAttr(name, t_av); + return py::make_tuple(t_av, res); + } break; + case AT_LIST_TENSOR: { + std::vector v_t_av; + res = op.GetAttr(name, v_t_av); + return py::make_tuple(v_t_av, res); + } break; + case AT_LIST_UINT8: { + std::vector v_ui_8_av; + res = op.GetAttr(name, v_ui_8_av); + return py::make_tuple(v_ui_8_av, res); + } break; + case AT_LIST_LIST_INT64: { + std::vector> v_v_i_64_av; + res = op.GetAttr(name, v_v_i_64_av); + return py::make_tuple(v_v_i_64_av, res); + } break; + case AT_DT: { + ge::DataType dt_av; + res = op.GetAttr(name, dt_av); + return py::make_tuple(dt_av, res); + } break; + case AT_LIST_DT: { + std::vector v_dt_av; + res = op.GetAttr(name, v_dt_av); + return py::make_tuple(v_dt_av, res); + } break; + default: + return py::make_tuple(0, res); + break; + } + }) + .def("break_connect", &Operator::BreakConnect) + .def("get_subgraph_names_count", &Operator::GetSubgraphNamesCount) + .def("get_subgraph_names", &Operator::GetSubgraphNames) + .def("get_subgraph_builder", &Operator::GetSubgraphBuilder) + .def("get_subgraph", &Operator::GetSubgraph) + .def("get_dynamic_subgraph_builder", &Operator::GetDynamicSubgraphBuilder) + .def("get_dynamic_subgraph", &Operator::GetDynamicSubgraph); + + py::class_(*m, "GETensor") + .def(py::init<>()) + .def(py::init()) + .def(py::init &>()) + .def(py::init()) + .def("set_tensor_desc", &Tensor::SetTensorDesc) + .def("get_tensor_desc", &Tensor::GetTensorDesc) + // .def("set_data", (graphStatus(Tensor::*)(std::vector &&)) & + // Tensor::SetData) + .def("set_data", (graphStatus (Tensor::*)(const std::vector &)) & + Tensor::SetData) + .def("set_data", + (graphStatus (Tensor::*)(const uint8_t *, size_t)) & Tensor::SetData) + .def("set_data", + (graphStatus (Tensor::*)(const std::string &)) & Tensor::SetData) + .def("set_data", + (graphStatus (Tensor::*)(const std::vector &)) & + Tensor::SetData) + + .def("get_data", + [](Tensor &ts) -> py::list { + py::list v_data; + uint8_t *data = ts.GetData(); + size_t size = ts.GetSize(); + for (size_t i = 0; i < size; ++i) { + v_data.append(data[i]); + } + return v_data; + }) + .def("get_size", &Tensor::GetSize) + .def("is_valid", &Tensor::IsValid) + .def("clone", &Tensor::Clone); + + py::class_(*m, "GETensorDesc") + .def(py::init<>()) + .def(py::init(), py::arg("shape"), + py::arg("format") = FORMAT_ND, py::arg("dt") = DT_FLOAT) + .def(py::init()) + .def("update", + (void (TensorDesc::*)(Shape, Format, DataType)) & TensorDesc::Update, + py::arg("shape"), py::arg("format") = FORMAT_ND, + py::arg("dt") = DT_FLOAT) + .def("set_shape", &TensorDesc::SetShape) + .def("get_shape", &TensorDesc::GetShape) + .def("set_unknown_dim_num_shape", &TensorDesc::SetUnknownDimNumShape) + .def("set_shape_range", &TensorDesc::SetShapeRange) + .def("get_shape_range", + [](TensorDesc &tensorDesc) -> py::tuple { + std::vector> range; + graphStatus status = tensorDesc.GetShapeRange(range); + return py::make_tuple(range, status); + }) + .def("set_format", &TensorDesc::SetFormat) + .def("get_format", &TensorDesc::GetFormat) + .def("get_origin_shape", &TensorDesc::GetOriginShape) + .def("set_origin_shape", &TensorDesc::SetOriginShape) + .def("set_origin_format", &TensorDesc::SetOriginFormat) + .def("get_origin_format", &TensorDesc::GetOriginFormat) + .def("set_data_type", &TensorDesc::SetDataType) + .def("get_data_type", &TensorDesc::GetDataType) + .def("set_name", &TensorDesc::SetName) + .def("get_name", &TensorDesc::GetName) + .def("set_size", &TensorDesc::SetSize) + .def("get_size", &TensorDesc::GetSize) + .def("set_real_dim_cnt", &TensorDesc::SetRealDimCnt) + .def("get_real_dim_cnt", &TensorDesc::GetRealDimCnt); + + py::class_(*m, "GEShape") + .def(py::init<>()) + .def(py::init &>()) + .def("get_dim_num", &Shape::GetDimNum) + .def("set_dim", &Shape::SetDim) + .def("get_dim", &Shape::GetDim) + .def("get_dims", &Shape::GetDims) + .def("get_shape_size", &Shape::GetShapeSize); + + py::class_(*m, "GEAttrValue").def(py::init<>()); + + py::class_(*m, "GEOperatorFactory") + .def("create_operator", &OperatorFactory::CreateOperator) + .def("get_ops_type_list", + []() -> py::tuple { + std::vector all_ops; + graphStatus status = OperatorFactory::GetOpsTypeList(all_ops); + return py::make_tuple(all_ops, status); + }) + .def("is_exist_op", &OperatorFactory::IsExistOp); +} + +} // end namespace pybind +} // end namespace paddle +#endif diff --git a/paddle/fluid/pybind/ascend_wrapper_py.h b/paddle/fluid/pybind/ascend_wrapper_py.h new file mode 100644 index 0000000000..4af96d6ef4 --- /dev/null +++ b/paddle/fluid/pybind/ascend_wrapper_py.h @@ -0,0 +1,31 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#ifdef PADDLE_WITH_ASCEND +#include "pybind11/pybind11.h" +#include "pybind11/stl.h" + +namespace py = pybind11; + +namespace paddle { +namespace pybind { + +void BindAscendGraph(py::module* m); +void BindAscendWrapper(py::module* m); + +} // namespace pybind +} // namespace paddle +#endif diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index b66dd17bbc..72b3c9645b 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -65,6 +65,9 @@ limitations under the License. */ #include "paddle/fluid/platform/monitor.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler.h" +#ifdef PADDLE_WITH_ASCEND +#include "paddle/fluid/pybind/ascend_wrapper_py.h" +#endif #include "paddle/fluid/pybind/box_helper_py.h" #include "paddle/fluid/pybind/compatible.h" #include "paddle/fluid/pybind/const_value.h" @@ -2837,6 +2840,10 @@ All parameter, weight, gradient are variables in Paddle. BindCompatible(&m); BindDataset(&m); BindGenerator(&m); +#ifdef PADDLE_WITH_ASCEND + BindAscendWrapper(&m); + BindAscendGraph(&m); +#endif #ifdef PADDLE_WITH_CRYPTO BindCrypto(&m); #endif diff --git a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py new file mode 100644 index 0000000000..d7ac81bb5c --- /dev/null +++ b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py @@ -0,0 +1,179 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.fluid.framework as framework +from paddle.fluid.optimizer import Optimizer +import paddle.fluid.core as core +import numpy as np +import ascend_parser + + +class AscendIRParser(object): + def __init__(self): + self.graph_idx = 0 + + def _construct_input_map(self, input_varlist): + ret_map = {} + ge_in_operator = [] + for id, var in enumerate(input_varlist): + if var.is_data: # input data + ge_input = core.GEOperatorFactory.create_operator( + var.name, "Data").set_attr_int32("index", id) + ret_map[var.name] = ge_input + ge_in_operator.append(ge_input) + else: # param, learning ... + ge_input = core.GEOperatorFactory.create_operator(var.name, + "Variable") + ge_input.update_output_desc("y", + core.GETensorDesc( + core.GEShape(var.shape), + core.GEFormat.FORMAT_ND, + core.GEDataType.DT_FLOAT)) + ret_map[var.name] = ge_input + return ge_in_operator, ret_map + + def parse_op(self, op): + if op.type in ascend_parser.registerd_op: + print("Op[%s] has been registered, begin to parse it" % (op.type)) + op_parser = self.parser_factory.create_parse( + ascend_parser.registerd_op[op.type]) + op_parser.apply(op) + else: + print("Op[%s] has not been registered, so we have to skip it" % + (op.type)) + + def _parse_program(self, + graph_name, + program, + input_varlist=[], + fetch_list=[]): + begin_graph_idx = self.graph_idx + ge_in_operator = [] + ge_out_operator = [] + self.var2geop = {} + + block = program.global_block() + if len(block.ops) == 0: + print("There is no ops in program %s" % (graph_name)) + return [] + + graph = core.GEGraph(graph_name) + + ge_in_operator, self.var2geop = self._construct_input_map(input_varlist) + + self.parser_factory = ascend_parser.AscendParserFactory(graph, + self.var2geop) + for i, curop in list(enumerate(block.ops)): + self.parse_op(curop) + + # Set fetch_var for GE + for e in fetch_list: + name = e + if not isinstance(e, str): + name = e.name + ge_out_operator.append(self.var2geop[name]) + + # (Debug) If you want to print back prop vars, append/assign the varname in ge_out_operator here, such as: + # if graph_name == "main": + # ge_out_operator.append(self.var2geop["reduce_sum_0.tmp_0@GRAD"]) + + # Add ops that may be input of a graph, such as const. + for varname, geop in self.var2geop.items(): + if varname.startswith("geinput"): + ge_in_operator.append(geop) + + graph.set_inputs(ge_in_operator).set_outputs(ge_out_operator) + + # Remove ops of origin program + op_num = len(block.ops) + for i in range(op_num - 1, -1, -1): + block._remove_op(i) + + input_varlist = [var for var in input_varlist if var.is_data] + + block.append_op( + type="ascend_trigger", + inputs={"FeedList": input_varlist}, + outputs={"FetchList": fetch_list}, + attrs={'graph_idx': self.graph_idx}) + self.graph_idx += 1 + return graph + + def parse_program(self, startup_program, main_program, input_varlist, + fetch_list): + startup_graph = self._parse_program("startup", startup_program) + main_graph = self._parse_program("main", main_program, input_varlist, + fetch_list) + return startup_graph, main_graph + + +# AscendOptimizer is a wrapper for basic optimizer now +# We will make it part of fleet meta_optimizer in the future +class AscendOptimizer(Optimizer): + def __init__(self, optimizer, fetch_list=[]): + self.inner_opt = optimizer + self.fetch_list = fetch_list + + def __del__(self): + core.ge_finalize() + + def _can_apply(self): + if not self.user_defined_strategy.ascend: + return False + # TODO(hutuxian): other check here + return True + + def _disable_strategy(self, dist_strategy): + dist_strategy.ascend = False + dist_strategy.ascend_configs = {} + + def _get_input_varlist(program): + ret_list = [] + for var in program.list_vars(): + if var.is_data or var.persistable: + ret_list.append(var) + return ret_list + + def minimize(self, + loss, + startup_program=None, + parameter_list=None, + no_grad_set=None): + minimized = self.inner_opt.minimize( + loss, startup_program=startup_program) + + self.ascend_instance = core.AscendInstance() + + # Config about Graph Engine can be found in https://support.huaweicloud.com/ + config = { + "ge.exec.deviceId": "0", + "ge.graphRunMode": "1", + "ge.exec.precision_mode": "must_keep_origin_dtype" + } + core.ge_initialize(config) + + # Init Session + self.ascend_instance.init_global_resources() + + main_block = loss.block + self.parser = AscendIRParser() + + input_varlist = _get_input_varlist(main_block.program) + startup_graph, main_graph = self.parser.parse_program( + startup_program, main_block.program, input_varlist, self.fetch_list) + + self.ascend_instance.add_ascend_subgraph(0, startup_graph) + self.ascend_instance.add_ascend_subgraph(1, main_graph) + + return minimized diff --git a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py new file mode 100644 index 0000000000..b497b5eecd --- /dev/null +++ b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py @@ -0,0 +1,529 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.fluid.framework as framework +from paddle.fluid.optimizer import Optimizer +import paddle.fluid.core as core +import numpy as np + +registerd_op = { + "elementwise_add": "AddParser", + "matmul": "MatMulParser", + "mul": "MulParser", + "relu": "ReluParser", + "softmax_with_cross_entropy": "SoftmaxWithCrossEntropyParser", + "shape": "ShapeParser", + "fill_constant": "FillConstantParser", + "reduce_sum": "ReduceSumParser", + "reduce_sum_grad": "ReduceSumGradParser", + "matmul_grad": "MatMulGradParser", + "mul_grad": "MulGradParser", + "relu_grad": "ReluGradParser", + "softmax_with_cross_entropy_grad": "SoftmaxWithCrossEntropyGradParser", + "truncated_gaussian_random": "TruncatedNormalParser", + "sgd": "SGDParser" +} +global_cnt = -1 +global_input_cnt = -1 + + +class AscendHelper(object): + def __init__(self): + self.dtype2ge_map = { + 0: core.GEDataType.DT_BOOL, + 1: core.GEDataType.DT_INT16, + 2: core.GEDataType.DT_INT32, + 3: core.GEDataType.DT_INT64, + 4: core.GEDataType.DT_FLOAT16, + 5: core.GEDataType.DT_FLOAT, + 6: core.GEDataType.DT_DOUBLE + } + self.dtype2np_map = { + 0: "bool", + 1: "int16", + 2: "int32", + 3: "int64", + 4: "float16", + 5: "float32", + 6: "float64" + } + + def dtype2ge(self, dtype): + assert dtype in self.dtype2ge_map, "dtype[%d] is not supported %d" % ( + dtype) + return self.dtype2ge_map[dtype] + + def dtype2np(self, index): + assert index in self.dtype2np_map, "index[%d] is not supported %d" % ( + dtype) + return self.dtype2np_map[index] + + +class AscendParserFactory(object): + def __init__(self, graph, var2geop): + self.graph = graph + self.var2geop = var2geop + + def create_parse(self, parser_class): + try: + parser = globals()[parser_class](self.graph, self.var2geop) + return parser + except: + raise ValueError("parser class %s does not exist" % parser_class) + + +class AscendParserBase(object): + def __init__(self, graph, var2geop): + self.graph = graph + self.var2geop = var2geop + self.op = None + self.ascend_helper = AscendHelper() + + def _get_ge_input(self, input_var_name): + assert input_var_name in self.var2geop, "var %s not created before" % ( + input_var_name) + return self.var2geop[input_var_name] + + def update_output(self, geop_list, index_list): + output_num = len(self.op.output_names) + assert output_num == len( + index_list + ), "Parser[%s]'s output number[%d] is not equal to parameters number[%d]" % ( + self.parser_name, len(index_list), output_num) + for output_id in range(output_num): + arguments = self.op.output(self.op.output_names[output_id]) + print("%d argument: %s" % (output_id, str(arguments))) + if len(arguments) > 0: + assert len(arguments) == len( + index_list[output_id] + ), "Parser[%s]'s %dth argument number[%d] is not equal to paddle's number[%d]" % ( + self.parser_name, output_id, len(index_list[output_id]), + len(arguments)) + for i in range(len(arguments)): + print("assgin index_list[%d][%d] to %s" % + (output_id, i, arguments[i])) + self.var2geop[arguments[i]] = geop_list[index_list[ + output_id][i]] + + for geop in geop_list: + self.graph.add_op(geop) + + def apply(self, op): + self.op = op + assert self.op.type == self.parser_name, "op [%s] != parser_name[%s]" % ( + self.op.type, self.parser_name) + print("begin to parse op %s" % (self.parser_name)) + geop_list, index_list = self._apply() + self.update_output(geop_list, index_list) + + def _mark_as_input(self, ge_tensor): + global global_input_cnt + global_input_cnt += 1 + self.var2geop["geinput." + str(global_input_cnt)] = ge_tensor + + def _accumulated_op_id(self): + global global_cnt + global_cnt += 1 + return "." + str(global_cnt) + + def _create_ge_tensor(self, shape, dtype, value): + tensor_desc = core.GETensorDesc( + core.GEShape(shape), core.GEFormat.FORMAT_ND, + self.ascend_helper.dtype2ge(dtype)) + tensor = core.GETensor(tensor_desc) + + data = (value * np.ones(( + shape))).reshape(shape).astype(self.ascend_helper.dtype2np(dtype)) + buf = data.tobytes() + data_8 = np.frombuffer(buf, dtype=np.uint8) + tensor.set_data(data_8) + return tensor + + +class AddParser(AscendParserBase): + def __init__(self, graph, var2geop): + super(AddParser, self).__init__(graph, var2geop) + self.parser_name = "elementwise_add" + + def _apply(self): + x = self._get_ge_input(self.op.input_arg_names[0]) + y = self._get_ge_input(self.op.input_arg_names[1]) + add = core.GEOperatorFactory.create_operator( + "add" + self._accumulated_op_id(), "Add").set_input( + "x1", x).set_input("x2", y) + return [add], [[0]] + + +class ReduceSumParser(AscendParserBase): + def __init__(self, graph, var2geop): + super(ReduceSumParser, self).__init__(graph, var2geop) + self.parser_name = "reduce_sum" + + def _apply(self): + x = self._get_ge_input(self.op.input_arg_names[0]) + axes = self.op.attr("dim") + keep_dims = self.op.attr("keep_dim") + reduce_sum = core.GEOperatorFactory.create_operator( + "reduce_sum" + self._accumulated_op_id(), "ReduceSumD").set_input( + "x", x, 0).set_attr_vec_int32("axes", axes).set_attr_bool( + "keep_dims", keep_dims) + return [reduce_sum], [[0]] + + +class ReduceSumGradParser(AscendParserBase): + def __init__(self, graph, var2geop): + super(ReduceSumGradParser, self).__init__(graph, var2geop) + self.parser_name = "reduce_sum_grad" + + def _apply(self): + x = self._get_ge_input(self.op.input_arg_names[0]) + input = self._get_ge_input(self.op.input_arg_names[1]) + + shape_tensor = core.GEOperatorFactory.create_operator( + "shape" + self._accumulated_op_id(), "Shape").set_input("x", input, + 0) + axis_const = core.GEOperatorFactory.create_operator( + "const" + self._accumulated_op_id(), "Const").set_attr_tensor( + "value", self._create_ge_tensor([1], 2, -1)) + self._mark_as_input(axis_const) + + broadcast = core.GEOperatorFactory.create_operator( + "broadcast_to_d" + self._accumulated_op_id(), + "BroadcastTo").set_input("x", x).set_input("shape", shape_tensor) + # unsqueeze cannot get right result, but ExpandDims seems have the same functionality. + reduce_sum_grad = core.GEOperatorFactory.create_operator( + "expand" + self._accumulated_op_id(), "ExpandDims").set_input( + "x", broadcast).set_input("axis", axis_const) + return [shape_tensor, axis_const, broadcast, reduce_sum_grad], [[3]] + + +class MatMulParser(AscendParserBase): + def __init__(self, graph, var2geop): + super(MatMulParser, self).__init__(graph, var2geop) + self.parser_name = "matmul" + + def _apply(self): + x1 = self._get_ge_input(self.op.input_arg_names[0]) + x2 = self._get_ge_input(self.op.input_arg_names[1]) + matmul = core.GEOperatorFactory.create_operator( + "matmul" + self._accumulated_op_id(), "MatMul").set_input( + "x1", x1).set_input("x2", x2) + return [matmul], [[0]] + + +class MatMulGradParser(AscendParserBase): + def __init__(self, graph, var2geop): + super(MatMulGradParser, self).__init__(graph, var2geop) + self.parser_name = "matmul_grad" + + def _apply(self): + out_grad = self._get_ge_input(self.op.input_arg_names[0]) + x = self._get_ge_input(self.op.input_arg_names[1]) + y = self._get_ge_input(self.op.input_arg_names[2]) + + x_grad = core.GEOperatorFactory.create_operator( + self.parser_name + self._accumulated_op_id(), "MatMul").set_input( + "x1", out_grad).set_input("x2", y).set_attr_bool( + "transpose_x1", False).set_attr_bool("transpose_x2", True) + y_grad = core.GEOperatorFactory.create_operator( + self.parser_name + self._accumulated_op_id(), "MatMul").set_input( + "x1", x).set_input("x2", out_grad).set_attr_bool( + "transpose_x1", True).set_attr_bool("transpose_x2", False) + return [x_grad, y_grad], [[0], [1]] + + +class MulGradParser(AscendParserBase): + def __init__(self, graph, var2geop): + super(MulGradParser, self).__init__(graph, var2geop) + self.parser_name = "mul_grad" + + def _apply(self): + out_grad = self._get_ge_input(self.op.input_arg_names[0]) + x = self._get_ge_input(self.op.input_arg_names[1]) + y = self._get_ge_input(self.op.input_arg_names[2]) + + x_grad = core.GEOperatorFactory.create_operator( + self.parser_name + self._accumulated_op_id(), "MatMul").set_input( + "x1", out_grad).set_input("x2", y).set_attr_bool( + "transpose_x1", False).set_attr_bool("transpose_x2", True) + y_grad = core.GEOperatorFactory.create_operator( + self.parser_name + self._accumulated_op_id(), "MatMul").set_input( + "x1", x).set_input("x2", out_grad).set_attr_bool( + "transpose_x1", True).set_attr_bool("transpose_x2", False) + + return [x_grad, y_grad], [[0], [1]] + + +class MulParser(AscendParserBase): + def __init__(self, graph, var2geop): + super(MulParser, self).__init__(graph, var2geop) + self.parser_name = "mul" + + def _apply(self): + x = self._get_ge_input(self.op.input_arg_names[0]) + y = self._get_ge_input(self.op.input_arg_names[1]) + + matmul = core.GEOperatorFactory.create_operator( + "mul" + self._accumulated_op_id(), "MatMul").set_input( + "x1", x).set_input("x2", y) + return [matmul], [[0]] + + +class ReluParser(AscendParserBase): + def __init__(self, graph, var2geop): + super(ReluParser, self).__init__(graph, var2geop) + self.parser_name = "relu" + + def _apply(self): + x = self._get_ge_input(self.op.input_arg_names[0]) + relu = core.GEOperatorFactory.create_operator( + "relu" + self._accumulated_op_id(), "Relu").set_input("x", x) + return [relu], [[0]] + + +class ReluGradParser(AscendParserBase): + def __init__(self, graph, var2geop): + super(ReluGradParser, self).__init__(graph, var2geop) + self.parser_name = "relu_grad" + + def _apply(self): + out = self._get_ge_input(self.op.input_arg_names[0]) + out_grad = self._get_ge_input(self.op.input_arg_names[1]) + relu_grad = core.GEOperatorFactory.create_operator( + self.parser_name + self._accumulated_op_id(), "ReluGrad").set_input( + "gradients", out_grad).set_input("features", out) + return [relu_grad], [[0]] + + +class SoftmaxWithCrossEntropyParser(AscendParserBase): + def __init__(self, graph, var2geop): + super(SoftmaxWithCrossEntropyParser, self).__init__(graph, var2geop) + self.parser_name = "softmax_with_cross_entropy" + + def _apply(self): + label = self._get_ge_input(self.op.input_arg_names[0]) + logits = self._get_ge_input(self.op.input_arg_names[1]) + + cls_num = self.op.block.var(self.op.input_arg_names[1]).shape[1] + softmax = core.GEOperatorFactory.create_operator( + "softmax" + self._accumulated_op_id(), "SoftmaxV2").set_input( + "x", logits) + label = core.GEOperatorFactory.create_operator( + "cast" + self._accumulated_op_id(), "Cast").set_input( + "x", label).set_attr_int32("dst_type", 3) + + tensoron = self._create_ge_tensor([1], 5, 1) + on_const = core.GEOperatorFactory.create_operator( + "const" + self._accumulated_op_id(), "Const").set_attr_tensor( + "value", tensoron) + self._mark_as_input(on_const) + tensoroff = self._create_ge_tensor([1], 5, 0) + off_const = core.GEOperatorFactory.create_operator( + "const" + self._accumulated_op_id(), "Const").set_attr_tensor( + "value", tensoroff) + self._mark_as_input(off_const) + onehot = core.GEOperatorFactory.create_operator( + "onehot" + self._accumulated_op_id(), "OneHotD").set_input( + "x", label).set_input("on_value", on_const).set_input( + "off_value", off_const).set_attr_int32("depth", cls_num) + squeeze = core.GEOperatorFactory.create_operator( + "mul" + self._accumulated_op_id(), "Squeeze").set_input("x", onehot) + loss = core.GEOperatorFactory.create_operator( + "loss" + self._accumulated_op_id(), + "SoftmaxCrossEntropyWithLogits").set_input( + "features", logits).set_input("labels", squeeze) + + return [label, softmax, on_const, off_const, onehot, squeeze, + loss], [[6], [1]] + + +class SoftmaxWithCrossEntropyGradParser(AscendParserBase): + def __init__(self, graph, var2geop): + super(SoftmaxWithCrossEntropyGradParser, self).__init__(graph, var2geop) + self.parser_name = "softmax_with_cross_entropy_grad" + + def _apply(self): + label = self._get_ge_input(self.op.input_arg_names[0]) + loss_grad = self._get_ge_input(self.op.input_arg_names[1]) + softmax = self._get_ge_input(self.op.input_arg_names[2]) + cls_num = self.op.block.var(self.op.input_arg_names[2]).shape[1] + + tensoron = self._create_ge_tensor([1], 5, 1) + on_const = core.GEOperatorFactory.create_operator( + "const" + self._accumulated_op_id(), "Const").set_attr_tensor( + "value", tensoron) + self._mark_as_input(on_const) + tensoroff = self._create_ge_tensor([1], 5, 0) + off_const = core.GEOperatorFactory.create_operator( + "const" + self._accumulated_op_id(), "Const").set_attr_tensor( + "value", tensoroff) + self._mark_as_input(off_const) + label = core.GEOperatorFactory.create_operator( + "cast" + self._accumulated_op_id(), "Cast").set_input( + "x", label).set_attr_int32("dst_type", 3) + onehot = core.GEOperatorFactory.create_operator( + "onehot" + self._accumulated_op_id(), "OneHotD").set_input( + "x", label).set_input("on_value", on_const).set_input( + "off_value", off_const).set_attr_int32("depth", cls_num) + # the fuck onehot will add a demension, so must call squeeze afterward + squeeze = core.GEOperatorFactory.create_operator( + "mul" + self._accumulated_op_id(), "Squeeze").set_input("x", onehot) + sub = core.GEOperatorFactory.create_operator( + "sub" + self._accumulated_op_id(), "Sub").set_input( + "x1", softmax).set_input("x2", squeeze) + grad = core.GEOperatorFactory.create_operator( + "mul" + self._accumulated_op_id(), "Mul").set_input( + "x1", loss_grad).set_input("x2", sub) + return [on_const, off_const, label, onehot, squeeze, sub, grad], [[-1]] + + +class ShapeParser(AscendParserBase): + def __init__(self, graph, var2geop): + super(ShapeParser, self).__init__(graph, var2geop) + self.parser_name = "shape" + + def _apply(self): + x = self._get_ge_input(self.op.input_arg_names[0]) + shape = core.GEOperatorFactory.create_operator( + "shape" + self._accumulated_op_id(), "Shape").set_input("x", x) + return [shape], [[0]] + + +class FillConstantParser(AscendParserBase): + def __init__(self, graph, var2geop): + super(FillConstantParser, self).__init__(graph, var2geop) + self.parser_name = "fill_constant" + + def _apply(self): + shape = self.op.attr("shape") + dtype = self.op.attr("dtype") + value = self.op.attr("value") + print("shape: ", shape) + print("dtype: ", dtype) + print("value: ", value) + tensor = self._create_ge_tensor(shape, dtype, value) + const = core.GEOperatorFactory.create_operator( + "const" + self._accumulated_op_id(), "Const").set_attr_tensor( + "value", tensor) + self._mark_as_input(const) + if self.op.block.var(self.op.output('Out')[0]).persistable: + print("%s fill_constant" % (self.op.output('Out')[0])) + var = core.GEOperatorFactory.create_operator( + self.op.output('Out')[0], "Variable") + var.update_output_desc("y", + core.GETensorDesc( + core.GEShape(shape), + core.GEFormat.FORMAT_ND, + core.GEDataType.DT_FLOAT)) + assign = core.GEOperatorFactory.create_operator( + "assign" + self._accumulated_op_id(), "Assign").set_input( + "value", const).set_input("ref", var) + return [const], [[0]] + else: + print( + "self.op.output('Out')[0] is not persistable in fill_constant") + return [const], [[0]] + + +class SGDParser(AscendParserBase): + def __init__(self, graph, var2geop): + super(SGDParser, self).__init__(graph, var2geop) + self.parser_name = "sgd" + + def _apply(self): + grad = self._get_ge_input(self.op.input_arg_names[0]) + lr = self._get_ge_input(self.op.input_arg_names[1]) + param = self._get_ge_input(self.op.input_arg_names[2]) + sgd = core.GEOperatorFactory.create_operator( + "momentum" + self._accumulated_op_id(), + "ApplyGradientDescent").set_input("var", param).set_input( + "alpha", lr).set_input("delta", grad) + return [sgd], [[0]] + + +class TruncatedNormalParser(AscendParserBase): + def __init__(self, graph, var2geop): + super(TruncatedNormalParser, self).__init__(graph, var2geop) + self.parser_name = "truncated_gaussian_random" + + def _apply(self): + shape = self.op.attr("shape") + dtype = self.op.attr("dtype") + mean = self.op.attr("mean") + std = self.op.attr("std") + seed = self.op.attr("seed") + tensor1 = self._create_ge_tensor([len(shape)], 2, shape) + shape_tensor = core.GEOperatorFactory.create_operator( + "const" + self._accumulated_op_id(), "Const").set_attr_tensor( + "value", tensor1) + + tensor2 = self._create_ge_tensor([1], dtype, mean) + mean_tensor = core.GEOperatorFactory.create_operator( + "const" + self._accumulated_op_id(), "Const").set_attr_tensor( + "value", tensor2) + + tensor3 = self._create_ge_tensor([1], dtype, std) + std_tensor = core.GEOperatorFactory.create_operator( + "const" + self._accumulated_op_id(), "Const").set_attr_tensor( + "value", tensor3) + + tensor4 = self._create_ge_tensor([1], dtype, mean - 2 * std) + min_tensor = core.GEOperatorFactory.create_operator( + "const" + self._accumulated_op_id(), "Const").set_attr_tensor( + "value", tensor4) + + tensor5 = self._create_ge_tensor([1], dtype, mean + 2 * std) + max_tensor = core.GEOperatorFactory.create_operator( + "const" + self._accumulated_op_id(), "Const").set_attr_tensor( + "value", tensor5) + + self._mark_as_input(shape_tensor) + self._mark_as_input(mean_tensor) + self._mark_as_input(std_tensor) + self._mark_as_input(min_tensor) + self._mark_as_input(max_tensor) + + truncated_normal = core.GEOperatorFactory.create_operator( + "truncated_normal" + self._accumulated_op_id(), + "ParameterizedTruncatedNormal").set_input( + "shape", shape_tensor).set_input( + "means", mean_tensor).set_input( + "stdevs", std_tensor).set_input( + "min", min_tensor).set_input( + "max", max_tensor).set_attr_int32("seed", 0) + + ## wirte the output of truncatedNormal from startup_program to main_program + if self.op.block.var(self.op.output('Out')[0]).persistable: + print("%s is Persistable in truncated_normal" % + (self.op.output('Out')[0])) + #var = core.GEOperatorFactory.create_operator(self.op.output('Out')[0], "Variable").set_input("x", truncated_normal) + var = core.GEOperatorFactory.create_operator( + self.op.output('Out')[0], "Variable") + var.update_output_desc("y", + core.GETensorDesc( + core.GEShape(shape), + core.GEFormat.FORMAT_ND, + core.GEDataType.DT_FLOAT)) + assign = core.GEOperatorFactory.create_operator( + "assign" + self._accumulated_op_id(), "Assign").set_input( + "value", truncated_normal).set_input("ref", var) + return [ + shape_tensor, mean_tensor, std_tensor, min_tensor, max_tensor, + truncated_normal + ], [[-1]] + else: + print( + "self.op.output('Out')[0] is not persistable in truncated_noraml" + ) + return [truncated_normal], [[0]] #[assign] diff --git a/python/paddle/fluid/tests/unittests/test_ascend_trigger.py b/python/paddle/fluid/tests/unittests/test_ascend_trigger.py new file mode 100644 index 0000000000..644b550bc4 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_ascend_trigger.py @@ -0,0 +1,49 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.fluid as fluid +import paddle.fluid.layers as layers +import unittest + + +class TestAscendTriggerOP(unittest.TestCase): + """ TestCases for ascend_trigger op""" + + def test_ascend_trigger_op(self): + paddle.enable_static() + program = fluid.Program() + block = program.global_block() + with fluid.program_guard(program): + x = fluid.data(name='x', shape=[1], dtype='int64', lod_level=0) + y = fluid.data(name='y', shape=[1], dtype='int64', lod_level=0) + block.append_op( + type="ascend_trigger", + inputs={"FeedList": [x]}, + outputs={"FetchList": [y]}, + attrs={'graph_idx': 0}) + + exe = paddle.static.Executor(paddle.CPUPlace()) + try: + exe.run(program) + except RuntimeError as e: + pass + except: + self.assertTrue(False) + + paddle.disable_static() + + +if __name__ == '__main__': + unittest.main() -- GitLab