Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Serving
提交
9ba05c4d
S
Serving
项目概览
PaddlePaddle
/
Serving
大约 1 年 前同步成功
通知
186
Star
833
Fork
253
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
105
列表
看板
标记
里程碑
合并请求
10
Wiki
2
Wiki
分析
仓库
DevOps
项目成员
Pages
S
Serving
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
105
Issue
105
列表
看板
标记
里程碑
合并请求
10
合并请求
10
Pages
分析
分析
仓库分析
DevOps
Wiki
2
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9ba05c4d
编写于
3月 22, 2020
作者:
D
Dong Daxiang
提交者:
GitHub
3月 22, 2020
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #313 from guru4elephant/add_app
Add app
上级
22dd55e4
df92104f
变更
17
隐藏空白更改
内联
并排
Showing
17 changed file
with
861 addition
and
32 deletion
+861
-32
CMakeLists.txt
CMakeLists.txt
+12
-10
core/CMakeLists.txt
core/CMakeLists.txt
+10
-3
core/configure/CMakeLists.txt
core/configure/CMakeLists.txt
+2
-2
core/general-client/CMakeLists.txt
core/general-client/CMakeLists.txt
+1
-1
python/CMakeLists.txt
python/CMakeLists.txt
+24
-9
python/examples/bert/benchmark_batch.py
python/examples/bert/benchmark_batch.py
+6
-4
python/examples/bert/benchmark_batch.sh
python/examples/bert/benchmark_batch.sh
+7
-0
python/examples/bert/benchmark_with_profile.sh
python/examples/bert/benchmark_with_profile.sh
+10
-0
python/paddle_serving_app/__init__.py
python/paddle_serving_app/__init__.py
+14
-0
python/paddle_serving_app/reader/__init__.py
python/paddle_serving_app/reader/__init__.py
+13
-0
python/paddle_serving_app/reader/batching.py
python/paddle_serving_app/reader/batching.py
+126
-0
python/paddle_serving_app/reader/bert_base_reader.py
python/paddle_serving_app/reader/bert_base_reader.py
+24
-0
python/paddle_serving_app/reader/chinese_bert_reader.py
python/paddle_serving_app/reader/chinese_bert_reader.py
+128
-0
python/paddle_serving_app/reader/reader.py
python/paddle_serving_app/reader/reader.py
+24
-0
python/paddle_serving_app/reader/tokenization.py
python/paddle_serving_app/reader/tokenization.py
+441
-0
python/paddle_serving_app/version.py
python/paddle_serving_app/version.py
+15
-0
tools/serving_build.sh
tools/serving_build.sh
+4
-3
未找到文件。
CMakeLists.txt
浏览文件 @
9ba05c4d
...
@@ -49,7 +49,9 @@ set(THIRD_PARTY_BUILD_TYPE Release)
...
@@ -49,7 +49,9 @@ set(THIRD_PARTY_BUILD_TYPE Release)
option
(
WITH_AVX
"Compile Paddle Serving with AVX intrinsics"
OFF
)
option
(
WITH_AVX
"Compile Paddle Serving with AVX intrinsics"
OFF
)
option
(
WITH_MKL
"Compile Paddle Serving with MKL support."
OFF
)
option
(
WITH_MKL
"Compile Paddle Serving with MKL support."
OFF
)
option
(
WITH_GPU
"Compile Paddle Serving with NVIDIA GPU"
OFF
)
option
(
WITH_GPU
"Compile Paddle Serving with NVIDIA GPU"
OFF
)
option
(
CLIENT_ONLY
"Compile client libraries and demos only"
OFF
)
option
(
CLIENT
"Compile Paddle Serving Client"
OFF
)
option
(
SERVER
"Compile Paddle Serving Server"
OFF
)
option
(
APP
"Compile Paddle Serving App package"
OFF
)
option
(
WITH_ELASTIC_CTR
"Compile ELASITC-CTR solution"
OFF
)
option
(
WITH_ELASTIC_CTR
"Compile ELASITC-CTR solution"
OFF
)
option
(
PACK
"Compile for whl"
OFF
)
option
(
PACK
"Compile for whl"
OFF
)
...
@@ -63,12 +65,12 @@ if (NOT DEFINED WITH_MKLDNN)
...
@@ -63,12 +65,12 @@ if (NOT DEFINED WITH_MKLDNN)
endif
()
endif
()
endif
()
endif
()
if
(
NOT CLIENT_ONLY
)
if
(
SERVER
)
include
(
external/jsoncpp
)
include
(
external/jsoncpp
)
#include(external/rocksdb)
#include(external/rocksdb)
endif
()
endif
()
#include(external/gtest)
if
(
SERVER OR CLIENT
)
include
(
external/snappy
)
include
(
external/snappy
)
include
(
external/leveldb
)
include
(
external/leveldb
)
include
(
external/zlib
)
include
(
external/zlib
)
...
@@ -81,8 +83,9 @@ include(external/pybind11)
...
@@ -81,8 +83,9 @@ include(external/pybind11)
include
(
external/python
)
include
(
external/python
)
include
(
generic
)
include
(
generic
)
include
(
flags
)
include
(
flags
)
endif
()
if
(
NOT CLIENT_ONLY
)
if
(
SERVER
)
include
(
external/cudnn
)
include
(
external/cudnn
)
include
(
paddlepaddle
)
include
(
paddlepaddle
)
endif
()
endif
()
...
@@ -91,7 +94,7 @@ message("paddle serving source dir: " ${PADDLE_SERVING_SOURCE_DIR})
...
@@ -91,7 +94,7 @@ message("paddle serving source dir: " ${PADDLE_SERVING_SOURCE_DIR})
include_directories
(
${
PADDLE_SERVING_SOURCE_DIR
}
)
include_directories
(
${
PADDLE_SERVING_SOURCE_DIR
}
)
include_directories
(
${
PADDLE_SERVING_BINARY_DIR
}
)
include_directories
(
${
PADDLE_SERVING_BINARY_DIR
}
)
if
(
NOT CLIENT_ONLY
)
if
(
SERVER
)
set
(
EXTERNAL_LIBS
set
(
EXTERNAL_LIBS
jsoncpp
jsoncpp
gflags
gflags
...
@@ -109,28 +112,27 @@ set(EXTERNAL_LIBS
...
@@ -109,28 +112,27 @@ set(EXTERNAL_LIBS
brpc
brpc
)
)
if
(
NOT CLIENT_ONLY
)
if
(
SERVER
)
if
(
WITH_MKLML
)
if
(
WITH_MKLML
)
list
(
APPEND EXTERNAL_LIBS
${
MKLML_IOMP_LIB
}
)
list
(
APPEND EXTERNAL_LIBS
${
MKLML_IOMP_LIB
}
)
endif
()
endif
()
endif
()
endif
()
if
(
NOT CLIENT_ONLY
)
if
(
SERVER
)
if
(
WITH_MKLDNN
)
if
(
WITH_MKLDNN
)
list
(
APPEND EXTERNAL_LIBS
${
MKLDNN_LIB
}
)
list
(
APPEND EXTERNAL_LIBS
${
MKLDNN_LIB
}
)
endif
()
endif
()
endif
()
endif
()
if
(
NOT CLIENT_ONLY
)
if
(
SERVER
)
list
(
APPEND EXTERNAL_LIBS paddlepaddle
)
list
(
APPEND EXTERNAL_LIBS paddlepaddle
)
endif
()
endif
()
add_subdirectory
(
core
)
add_subdirectory
(
core
)
if
(
NOT CLIENT_ONLY
)
if
(
SERVER
)
add_subdirectory
(
paddle_inference
)
add_subdirectory
(
paddle_inference
)
endif
()
endif
()
add_subdirectory
(
python
)
add_subdirectory
(
python
)
#add_subdirectory(examples)
core/CMakeLists.txt
浏览文件 @
9ba05c4d
...
@@ -12,19 +12,26 @@
...
@@ -12,19 +12,26 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License
# limitations under the License
if
(
NOT CLIENT_ONLY
)
if
(
SERVER
)
add_subdirectory
(
cube
)
add_subdirectory
(
cube
)
#add_subdirectory(kvdb)
#add_subdirectory(kvdb)
endif
()
endif
()
if
(
CLIENT OR SERVER
)
add_subdirectory
(
configure
)
add_subdirectory
(
configure
)
add_subdirectory
(
pdcodegen
)
add_subdirectory
(
pdcodegen
)
add_subdirectory
(
sdk-cpp
)
add_subdirectory
(
sdk-cpp
)
if
(
CLIENT_ONLY
)
endif
()
if
(
CLIENT
)
add_subdirectory
(
general-client
)
add_subdirectory
(
general-client
)
endif
()
endif
()
if
(
NOT CLIENT_ONLY
)
if
(
SERVER
)
add_subdirectory
(
predictor
)
add_subdirectory
(
predictor
)
add_subdirectory
(
general-server
)
add_subdirectory
(
general-server
)
endif
()
endif
()
if
(
CLIENT OR SERVER
)
add_subdirectory
(
util
)
add_subdirectory
(
util
)
endif
()
core/configure/CMakeLists.txt
浏览文件 @
9ba05c4d
...
@@ -33,7 +33,7 @@ py_proto_compile(general_model_config_py_proto SRCS proto/general_model_config.p
...
@@ -33,7 +33,7 @@ py_proto_compile(general_model_config_py_proto SRCS proto/general_model_config.p
add_custom_target
(
general_model_config_py_proto_init ALL COMMAND
${
CMAKE_COMMAND
}
-E touch __init__.py
)
add_custom_target
(
general_model_config_py_proto_init ALL COMMAND
${
CMAKE_COMMAND
}
-E touch __init__.py
)
add_dependencies
(
general_model_config_py_proto general_model_config_py_proto_init
)
add_dependencies
(
general_model_config_py_proto general_model_config_py_proto_init
)
if
(
CLIENT
_ONLY
)
if
(
CLIENT
)
py_proto_compile
(
sdk_configure_py_proto SRCS proto/sdk_configure.proto
)
py_proto_compile
(
sdk_configure_py_proto SRCS proto/sdk_configure.proto
)
add_custom_target
(
sdk_configure_py_proto_init ALL COMMAND
${
CMAKE_COMMAND
}
-E touch __init__.py
)
add_custom_target
(
sdk_configure_py_proto_init ALL COMMAND
${
CMAKE_COMMAND
}
-E touch __init__.py
)
add_dependencies
(
sdk_configure_py_proto sdk_configure_py_proto_init
)
add_dependencies
(
sdk_configure_py_proto sdk_configure_py_proto_init
)
...
@@ -51,7 +51,7 @@ add_custom_command(TARGET general_model_config_py_proto POST_BUILD
...
@@ -51,7 +51,7 @@ add_custom_command(TARGET general_model_config_py_proto POST_BUILD
endif
()
endif
()
if
(
NOT CLIENT_ONLY
)
if
(
SERVER
)
py_proto_compile
(
server_config_py_proto SRCS proto/server_configure.proto
)
py_proto_compile
(
server_config_py_proto SRCS proto/server_configure.proto
)
add_custom_target
(
server_config_py_proto_init ALL COMMAND
${
CMAKE_COMMAND
}
-E touch __init__.py
)
add_custom_target
(
server_config_py_proto_init ALL COMMAND
${
CMAKE_COMMAND
}
-E touch __init__.py
)
add_dependencies
(
server_config_py_proto server_config_py_proto_init
)
add_dependencies
(
server_config_py_proto server_config_py_proto_init
)
...
...
core/general-client/CMakeLists.txt
浏览文件 @
9ba05c4d
if
(
CLIENT
_ONLY
)
if
(
CLIENT
)
add_subdirectory
(
pybind11
)
add_subdirectory
(
pybind11
)
pybind11_add_module
(
serving_client src/general_model.cpp src/pybind_general_model.cpp
)
pybind11_add_module
(
serving_client src/general_model.cpp src/pybind_general_model.cpp
)
target_link_libraries
(
serving_client PRIVATE -Wl,--whole-archive utils sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz
)
target_link_libraries
(
serving_client PRIVATE -Wl,--whole-archive utils sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz
)
...
...
python/CMakeLists.txt
浏览文件 @
9ba05c4d
if
(
CLIENT
_ONLY
)
if
(
CLIENT
)
file
(
GLOB_RECURSE SERVING_CLIENT_PY_FILES paddle_serving_client/*.py
)
file
(
GLOB_RECURSE SERVING_CLIENT_PY_FILES paddle_serving_client/*.py
)
set
(
PY_FILES
${
SERVING_CLIENT_PY_FILES
}
)
set
(
PY_FILES
${
SERVING_CLIENT_PY_FILES
}
)
SET
(
PACKAGE_NAME
"serving_client"
)
SET
(
PACKAGE_NAME
"serving_client"
)
set
(
SETUP_LOG_FILE
"setup.py.client.log"
)
set
(
SETUP_LOG_FILE
"setup.py.client.log"
)
endif
()
endif
()
if
(
NOT CLIENT_ONLY
)
if
(
SERVER
)
if
(
NOT WITH_GPU
)
if
(
NOT WITH_GPU
)
file
(
GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py
)
file
(
GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py
)
else
()
else
()
...
@@ -16,12 +16,17 @@ if (NOT CLIENT_ONLY)
...
@@ -16,12 +16,17 @@ if (NOT CLIENT_ONLY)
set
(
SETUP_LOG_FILE
"setup.py.server.log"
)
set
(
SETUP_LOG_FILE
"setup.py.server.log"
)
endif
()
endif
()
if
(
CLIENT
_ONLY
)
if
(
CLIENT
)
configure_file
(
${
CMAKE_CURRENT_SOURCE_DIR
}
/setup.py.client.in
configure_file
(
${
CMAKE_CURRENT_SOURCE_DIR
}
/setup.py.client.in
${
CMAKE_CURRENT_BINARY_DIR
}
/setup.py
)
${
CMAKE_CURRENT_BINARY_DIR
}
/setup.py
)
endif
()
endif
()
if
(
NOT CLIENT_ONLY
)
if
(
APP
)
configure_file
(
${
CMAKE_CURRENT_SOURCE_DIR
}
/setup.py.app.in
${
CMAKE_CURRENT_BINARY_DIR
}
/setup.py
)
endif
()
if
(
SERVER
)
if
(
NOT WITH_GPU
)
if
(
NOT WITH_GPU
)
configure_file
(
${
CMAKE_CURRENT_SOURCE_DIR
}
/setup.py.server.in
configure_file
(
${
CMAKE_CURRENT_SOURCE_DIR
}
/setup.py.server.in
${
CMAKE_CURRENT_BINARY_DIR
}
/setup.py
)
${
CMAKE_CURRENT_BINARY_DIR
}
/setup.py
)
...
@@ -34,7 +39,15 @@ endif()
...
@@ -34,7 +39,15 @@ endif()
set
(
SERVING_CLIENT_CORE
${
PADDLE_SERVING_BINARY_DIR
}
/core/general-client/*.so
)
set
(
SERVING_CLIENT_CORE
${
PADDLE_SERVING_BINARY_DIR
}
/core/general-client/*.so
)
message
(
"python env: "
${
py_env
}
)
message
(
"python env: "
${
py_env
}
)
if
(
CLIENT_ONLY
)
if
(
APP
)
add_custom_command
(
OUTPUT
${
PADDLE_SERVING_BINARY_DIR
}
/.timestamp
COMMAND cp -r
${
CMAKE_CURRENT_SOURCE_DIR
}
/paddle_serving_app/
${
PADDLE_SERVING_BINARY_DIR
}
/python/
COMMAND env
${
py_env
}
${
PYTHON_EXECUTABLE
}
setup.py bdist_wheel
)
add_custom_target
(
paddle_python ALL DEPENDS
${
PADDLE_SERVING_BINARY_DIR
}
/.timestamp
)
endif
()
if
(
CLIENT
)
add_custom_command
(
add_custom_command
(
OUTPUT
${
PADDLE_SERVING_BINARY_DIR
}
/.timestamp
OUTPUT
${
PADDLE_SERVING_BINARY_DIR
}
/.timestamp
COMMAND cp -r
${
CMAKE_CURRENT_SOURCE_DIR
}
/paddle_serving_client/
${
PADDLE_SERVING_BINARY_DIR
}
/python/
COMMAND cp -r
${
CMAKE_CURRENT_SOURCE_DIR
}
/paddle_serving_client/
${
PADDLE_SERVING_BINARY_DIR
}
/python/
...
@@ -44,7 +57,7 @@ add_custom_command(
...
@@ -44,7 +57,7 @@ add_custom_command(
add_custom_target
(
paddle_python ALL DEPENDS serving_client
${
PADDLE_SERVING_BINARY_DIR
}
/.timestamp
)
add_custom_target
(
paddle_python ALL DEPENDS serving_client
${
PADDLE_SERVING_BINARY_DIR
}
/.timestamp
)
endif
()
endif
()
if
(
NOT CLIENT_ONLY
)
if
(
SERVER
)
if
(
NOT WITH_GPU
)
if
(
NOT WITH_GPU
)
add_custom_command
(
add_custom_command
(
OUTPUT
${
PADDLE_SERVING_BINARY_DIR
}
/.timestamp
OUTPUT
${
PADDLE_SERVING_BINARY_DIR
}
/.timestamp
...
@@ -66,20 +79,22 @@ endif()
...
@@ -66,20 +79,22 @@ endif()
set
(
SERVING_CLIENT_PYTHON_PACKAGE_DIR
${
CMAKE_CURRENT_BINARY_DIR
}
/dist/
)
set
(
SERVING_CLIENT_PYTHON_PACKAGE_DIR
${
CMAKE_CURRENT_BINARY_DIR
}
/dist/
)
set
(
SERVING_SERVER_PYTHON_PACKAGE_DIR
${
CMAKE_CURRENT_BINARY_DIR
}
/dist/
)
set
(
SERVING_SERVER_PYTHON_PACKAGE_DIR
${
CMAKE_CURRENT_BINARY_DIR
}
/dist/
)
if
(
CLIENT
_ONLY
)
if
(
CLIENT
)
install
(
DIRECTORY
${
SERVING_CLIENT_PYTHON_PACKAGE_DIR
}
install
(
DIRECTORY
${
SERVING_CLIENT_PYTHON_PACKAGE_DIR
}
DESTINATION opt/serving_client/share/wheels
DESTINATION opt/serving_client/share/wheels
)
)
endif
()
endif
()
if
(
NOT CLIENT_ONLY
)
if
(
SERVER
)
install
(
DIRECTORY
${
SERVING_SERVER_PYTHON_PACKAGE_DIR
}
install
(
DIRECTORY
${
SERVING_SERVER_PYTHON_PACKAGE_DIR
}
DESTINATION opt/serving_server/share/wheels
DESTINATION opt/serving_server/share/wheels
)
)
endif
()
endif
()
if
(
CLIENT OR SERVER
)
find_program
(
PATCHELF_EXECUTABLE patchelf
)
find_program
(
PATCHELF_EXECUTABLE patchelf
)
if
(
NOT PATCHELF_EXECUTABLE
)
if
(
NOT PATCHELF_EXECUTABLE
)
message
(
FATAL_ERROR
"patchelf not found, please install it.
\n
"
message
(
FATAL_ERROR
"patchelf not found, please install it.
\n
"
"For Ubuntu, the command is: apt-get install -y patchelf."
)
"For Ubuntu, the command is: apt-get install -y patchelf."
)
endif
()
endif
()
endif
()
python/examples/bert/benchmark_batch.py
浏览文件 @
9ba05c4d
...
@@ -41,13 +41,13 @@ def single_func(idx, resource):
...
@@ -41,13 +41,13 @@ def single_func(idx, resource):
client
=
Client
()
client
=
Client
()
client
.
load_client_config
(
args
.
model
)
client
.
load_client_config
(
args
.
model
)
client
.
connect
([
resource
[
"endpoint"
][
idx
%
len
(
resource
[
"endpoint"
])]])
client
.
connect
([
resource
[
"endpoint"
][
idx
%
len
(
resource
[
"endpoint"
])]])
feed_batch
=
[]
for
bi
in
range
(
args
.
batch_size
):
feed_batch
.
append
(
reader
.
process
(
dataset
[
bi
]))
start
=
time
.
time
()
start
=
time
.
time
()
for
i
in
range
(
1000
):
for
i
in
range
(
1000
):
if
args
.
batch_size
>=
1
:
if
args
.
batch_size
>=
1
:
feed_batch
=
[]
for
bi
in
range
(
args
.
batch_size
):
feed_batch
.
append
(
reader
.
process
(
dataset
[
i
]))
result
=
client
.
batch_predict
(
result
=
client
.
batch_predict
(
feed_batch
=
feed_batch
,
fetch
=
fetch
)
feed_batch
=
feed_batch
,
fetch
=
fetch
)
else
:
else
:
...
@@ -61,7 +61,9 @@ def single_func(idx, resource):
...
@@ -61,7 +61,9 @@ def single_func(idx, resource):
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
multi_thread_runner
=
MultiThreadRunner
()
multi_thread_runner
=
MultiThreadRunner
()
endpoint_list
=
[
"127.0.0.1:9292"
]
endpoint_list
=
[
"127.0.0.1:9295"
,
"127.0.0.1:9296"
,
"127.0.0.1:9297"
,
"127.0.0.1:9298"
]
result
=
multi_thread_runner
.
run
(
single_func
,
args
.
thread
,
result
=
multi_thread_runner
.
run
(
single_func
,
args
.
thread
,
{
"endpoint"
:
endpoint_list
})
{
"endpoint"
:
endpoint_list
})
avg_cost
=
0
avg_cost
=
0
...
...
python/examples/bert/benchmark_batch.sh
浏览文件 @
9ba05c4d
rm
profile_log
rm
profile_log
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3
python
-m
paddle_serving_server_gpu.serve
--model
bert_seq20_model/
--port
9295
--thread
4
--gpu_ids
0,1,2,3 2> elog
>
stdlog &
sleep
5
for
thread_num
in
1 2 4 8 16
for
thread_num
in
1 2 4 8 16
do
do
for
batch_size
in
1 2 4 8 16 32 64 128 256 512
for
batch_size
in
1 2 4 8 16 32 64 128 256 512
do
do
$PYTHONROOT
/bin/python benchmark_batch.py
--thread
$thread_num
--batch_size
$batch_size
--model
serving_client_conf/serving_client_conf.prototxt
--request
rpc
>
profile 2>&1
$PYTHONROOT
/bin/python benchmark_batch.py
--thread
$thread_num
--batch_size
$batch_size
--model
serving_client_conf/serving_client_conf.prototxt
--request
rpc
>
profile 2>&1
echo
"========================================"
echo
"========================================"
echo
"thread num: "
,
$thread_num
echo
"batch size: "
,
$batch_size
echo
"batch size :
$batch_size
"
>>
profile_log
echo
"batch size :
$batch_size
"
>>
profile_log
$PYTHONROOT
/bin/python ../util/show_profile.py profile
$thread_num
>>
profile_log
$PYTHONROOT
/bin/python ../util/show_profile.py profile
$thread_num
>>
profile_log
tail
-n
1 profile
>>
profile_log
tail
-n
1 profile
>>
profile_log
...
...
python/examples/bert/benchmark_with_profile.sh
0 → 100644
浏览文件 @
9ba05c4d
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3
python
-m
paddle_serving_server_gpu.serve
--model
bert_seq20_model/
--port
9295
--thread
4
--gpu_ids
0,1,2,3 2> elog
>
stdlog &
export
FLAGS_profile_client
=
1
export
FLAGS_profile_server
=
1
sleep
5
thread_num
=
4
python benchmark_batch.py
--thread
${
thread_num
}
--batch_size
64
--model
serving_client_conf/serving_client_conf.prototxt 2> profile
python show_profile.py profile
${
thread_num
}
python timeline_trace.py profile trace
python/paddle_serving_app/__init__.py
0 → 100644
浏览文件 @
9ba05c4d
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.reader.chinese_bert_reader
import
ChineseBertReader
python/paddle_serving_app/reader/__init__.py
0 → 100644
浏览文件 @
9ba05c4d
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
python/paddle_serving_app/reader/batching.py
0 → 100644
浏览文件 @
9ba05c4d
#coding:utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Mask, padding and batching."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
def
prepare_batch_data
(
insts
,
total_token_num
,
max_seq_len
=
128
,
pad_id
=
None
,
cls_id
=
None
,
sep_id
=
None
,
mask_id
=
None
,
return_input_mask
=
True
,
return_max_len
=
True
,
return_num_token
=
False
):
"""
1. generate Tensor of data
2. generate Tensor of position
3. generate self attention mask, [shape: batch_size * max_len * max_len]
"""
batch_src_ids
=
[
inst
[
0
]
for
inst
in
insts
]
batch_sent_ids
=
[
inst
[
1
]
for
inst
in
insts
]
batch_pos_ids
=
[
inst
[
2
]
for
inst
in
insts
]
labels_list
=
[]
# compatible with squad, whose example includes start/end positions,
# or unique id
for
i
in
range
(
3
,
len
(
insts
[
0
]),
1
):
labels
=
[
inst
[
i
]
for
inst
in
insts
]
labels
=
np
.
array
(
labels
).
astype
(
"int64"
).
reshape
([
-
1
,
1
])
labels_list
.
append
(
labels
)
out
=
batch_src_ids
# Second step: padding
src_id
,
self_input_mask
=
pad_batch_data
(
out
,
pad_idx
=
pad_id
,
max_seq_len
=
max_seq_len
,
return_input_mask
=
True
)
pos_id
=
pad_batch_data
(
batch_pos_ids
,
pad_idx
=
pad_id
,
max_seq_len
=
max_seq_len
,
return_pos
=
False
,
return_input_mask
=
False
)
sent_id
=
pad_batch_data
(
batch_sent_ids
,
pad_idx
=
pad_id
,
max_seq_len
=
max_seq_len
,
return_pos
=
False
,
return_input_mask
=
False
)
return_list
=
[
src_id
,
pos_id
,
sent_id
,
self_input_mask
]
+
labels_list
return
return_list
if
len
(
return_list
)
>
1
else
return_list
[
0
]
def
pad_batch_data
(
insts
,
pad_idx
=
0
,
max_seq_len
=
128
,
return_pos
=
False
,
return_input_mask
=
False
,
return_max_len
=
False
,
return_num_token
=
False
,
return_seq_lens
=
False
):
"""
Pad the instances to the max sequence length in batch, and generate the
corresponding position data and input mask.
"""
return_list
=
[]
#max_len = max(len(inst) for inst in insts)
max_len
=
max_seq_len
# Any token included in dict can be used to pad, since the paddings' loss
# will be masked out by weights and make no effect on parameter gradients.
inst_data
=
np
.
array
([
list
(
inst
)
+
list
([
pad_idx
]
*
(
max_len
-
len
(
inst
)))
for
inst
in
insts
])
return_list
+=
[
inst_data
.
astype
(
"int64"
).
reshape
([
-
1
,
max_len
,
1
])]
# position data
if
return_pos
:
inst_pos
=
np
.
array
([
list
(
range
(
0
,
len
(
inst
)))
+
[
pad_idx
]
*
(
max_len
-
len
(
inst
))
for
inst
in
insts
])
return_list
+=
[
inst_pos
.
astype
(
"int64"
).
reshape
([
-
1
,
max_len
,
1
])]
if
return_input_mask
:
# This is used to avoid attention on paddings.
input_mask_data
=
np
.
array
(
[[
1
]
*
len
(
inst
)
+
[
0
]
*
(
max_len
-
len
(
inst
))
for
inst
in
insts
])
input_mask_data
=
np
.
expand_dims
(
input_mask_data
,
axis
=-
1
)
return_list
+=
[
input_mask_data
.
astype
(
"float32"
)]
if
return_max_len
:
return_list
+=
[
max_len
]
if
return_num_token
:
num_token
=
0
for
inst
in
insts
:
num_token
+=
len
(
inst
)
return_list
+=
[
num_token
]
if
return_seq_lens
:
seq_lens
=
np
.
array
([
len
(
inst
)
for
inst
in
insts
])
return_list
+=
[
seq_lens
.
astype
(
"int64"
).
reshape
([
-
1
,
1
])]
return
return_list
if
len
(
return_list
)
>
1
else
return_list
[
0
]
python/paddle_serving_app/reader/bert_base_reader.py
0 → 100644
浏览文件 @
9ba05c4d
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.reader
import
ReaderBase
class
BertBaseReader
(
ReaderBase
):
def
__init__
(
self
):
super
(
BertBaseReader
,
self
).
__init__
()
pass
def
process
(
self
,
line
):
super
(
BertBaseReader
,
self
).
process
(
line
)
pass
python/paddle_serving_app/reader/chinese_bert_reader.py
0 → 100644
浏览文件 @
9ba05c4d
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# coding=utf-8
from
.bert_base_reader
import
BertBaseReader
from
.batching
import
pad_batch_data
from
.tokenization
import
FullTokenizer
,
convert_to_unicode
class
ChineseBertReader
(
BertBaseReader
):
"""
ChineseBertReader handles the most traditional Chinese Bert
preprocessing, a user can define the vocab file through initialization
Examples:
from paddle_serving_app import ChineseBertReader
line = ["this is China"]
reader = ChineseBertReader()
reader.process(line[0])
"""
def
__init__
(
self
,
args
=
{}):
super
(
ChineseBertReader
,
self
).
__init__
()
vocab_file
=
""
if
"vocab_file"
in
args
:
vocab_file
=
args
[
"vocab_file"
]
else
:
vocab_file
=
self
.
_download_or_not
()
self
.
tokenizer
=
FullTokenizer
(
vocab_file
=
vocab_file
)
if
"max_seq_len"
in
args
:
self
.
max_seq_len
=
args
[
"max_seq_len"
]
else
:
self
.
max_seq_len
=
20
self
.
vocab
=
self
.
tokenizer
.
vocab
self
.
pad_id
=
self
.
vocab
[
"[PAD]"
]
self
.
cls_id
=
self
.
vocab
[
"[CLS]"
]
self
.
sep_id
=
self
.
vocab
[
"[SEP]"
]
self
.
mask_id
=
self
.
vocab
[
"[MASK]"
]
self
.
feed_keys
=
[
"input_ids"
,
"position_ids"
,
"segment_ids"
,
"input_mask"
]
"""
inner function
"""
def
_download_or_not
(
self
):
import
os
import
paddle_serving_app
module_path
=
os
.
path
.
dirname
(
paddle_serving_app
.
__file__
)
full_path
=
"{}/tmp/chinese_bert"
.
format
(
module_path
)
os
.
system
(
"mkdir -p {}"
.
format
(
full_path
))
if
os
.
path
.
exists
(
"{}/vocab.txt"
.
format
(
full_path
)):
pass
else
:
url
=
"https://paddle-serving.bj.bcebos.com/reader/chinese_bert/vocab.txt"
r
=
os
.
system
(
"wget --no-check-certificate "
+
url
)
os
.
system
(
"mv vocab.txt {}"
.
format
(
full_path
))
if
r
!=
0
:
raise
SystemExit
(
'Download failed, please check your network'
)
return
"{}/vocab.txt"
.
format
(
full_path
)
"""
inner function
"""
def
_pad_batch
(
self
,
token_ids
,
text_type_ids
,
position_ids
):
batch_token_ids
=
[
token_ids
]
batch_text_type_ids
=
[
text_type_ids
]
batch_position_ids
=
[
position_ids
]
padded_token_ids
,
input_mask
=
pad_batch_data
(
batch_token_ids
,
max_seq_len
=
self
.
max_seq_len
,
pad_idx
=
self
.
pad_id
,
return_input_mask
=
True
)
padded_text_type_ids
=
pad_batch_data
(
batch_text_type_ids
,
max_seq_len
=
self
.
max_seq_len
,
pad_idx
=
self
.
pad_id
)
padded_position_ids
=
pad_batch_data
(
batch_position_ids
,
max_seq_len
=
self
.
max_seq_len
,
pad_idx
=
self
.
pad_id
)
return
padded_token_ids
,
padded_position_ids
,
padded_text_type_ids
,
input_mask
"""
process function deals with a raw Chinese string as a sentence
this funtion returns a feed_dict
default key of the returned feed_dict: input_ids, position_ids, segment_ids, input_mask
"""
def
process
(
self
,
line
):
text_a
=
convert_to_unicode
(
line
)
tokens_a
=
self
.
tokenizer
.
tokenize
(
text_a
)
if
len
(
tokens_a
)
>
self
.
max_seq_len
-
2
:
tokens_a
=
tokens_a
[
0
:(
self
.
max_seq_len
-
2
)]
tokens
=
[]
text_type_ids
=
[]
tokens
.
append
(
"[CLS]"
)
text_type_ids
.
append
(
0
)
for
token
in
tokens_a
:
tokens
.
append
(
token
)
text_type_ids
.
append
(
0
)
token_ids
=
self
.
tokenizer
.
convert_tokens_to_ids
(
tokens
)
position_ids
=
list
(
range
(
len
(
token_ids
)))
p_token_ids
,
p_pos_ids
,
p_text_type_ids
,
input_mask
=
\
self
.
_pad_batch
(
token_ids
,
text_type_ids
,
position_ids
)
feed_result
=
{
self
.
feed_keys
[
0
]:
p_token_ids
.
reshape
(
-
1
).
tolist
(),
self
.
feed_keys
[
1
]:
p_pos_ids
.
reshape
(
-
1
).
tolist
(),
self
.
feed_keys
[
2
]:
p_text_type_ids
.
reshape
(
-
1
).
tolist
(),
self
.
feed_keys
[
3
]:
input_mask
.
reshape
(
-
1
).
tolist
()
}
return
feed_result
python/paddle_serving_app/reader/reader.py
0 → 100644
浏览文件 @
9ba05c4d
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
class
ReaderBase
(
object
):
def
__init__
(
self
):
self
.
feed_keys
=
[]
def
set_feed_keys
(
self
,
keys
):
self
.
feed_keys
=
keys
def
get_feed_keys
(
self
):
return
self
.
feed_keys
python/paddle_serving_app/reader/tokenization.py
0 → 100644
浏览文件 @
9ba05c4d
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tokenization classes."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
collections
import
io
import
unicodedata
import
six
import
sentencepiece
as
spm
import
pickle
def
convert_to_unicode
(
text
):
# pylint: disable=doc-string-with-all-args
"""Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
if
six
.
PY3
:
if
isinstance
(
text
,
str
):
return
text
elif
isinstance
(
text
,
bytes
):
return
text
.
decode
(
"utf-8"
,
"ignore"
)
else
:
raise
ValueError
(
"Unsupported string type: %s"
%
(
type
(
text
)))
elif
six
.
PY2
:
if
isinstance
(
text
,
str
):
return
text
.
decode
(
"utf-8"
,
"ignore"
)
elif
isinstance
(
text
,
unicode
):
# noqa
return
text
else
:
raise
ValueError
(
"Unsupported string type: %s"
%
(
type
(
text
)))
else
:
raise
ValueError
(
"Not running on Python2 or Python 3?"
)
def
printable_text
(
text
):
# pylint: disable=doc-string-with-all-args
"""Returns text encoded in a way suitable for print or `tf.logging`."""
# These functions want `str` for both Python2 and Python3, but in one case
# it's a Unicode string and in the other it's a byte string.
if
six
.
PY3
:
if
isinstance
(
text
,
str
):
return
text
elif
isinstance
(
text
,
bytes
):
return
text
.
decode
(
"utf-8"
,
"ignore"
)
else
:
raise
ValueError
(
"Unsupported string type: %s"
%
(
type
(
text
)))
elif
six
.
PY2
:
if
isinstance
(
text
,
str
):
return
text
elif
isinstance
(
text
,
unicode
):
# noqa
return
text
.
encode
(
"utf-8"
)
else
:
raise
ValueError
(
"Unsupported string type: %s"
%
(
type
(
text
)))
else
:
raise
ValueError
(
"Not running on Python2 or Python 3?"
)
def
load_vocab
(
vocab_file
):
# pylint: disable=doc-string-with-all-args, doc-string-with-returns
"""Loads a vocabulary file into a dictionary."""
vocab
=
collections
.
OrderedDict
()
fin
=
io
.
open
(
vocab_file
,
"r"
,
encoding
=
"UTF-8"
)
for
num
,
line
in
enumerate
(
fin
):
items
=
convert_to_unicode
(
line
.
strip
()).
split
(
"
\t
"
)
if
len
(
items
)
>
2
:
break
token
=
items
[
0
]
index
=
items
[
1
]
if
len
(
items
)
==
2
else
num
token
=
token
.
strip
()
vocab
[
token
]
=
int
(
index
)
fin
.
close
()
return
vocab
def
convert_by_vocab
(
vocab
,
items
):
"""Converts a sequence of [tokens|ids] using the vocab."""
output
=
[]
for
item
in
items
:
output
.
append
(
vocab
[
item
])
return
output
def
convert_tokens_to_ids
(
vocab
,
tokens
):
return
convert_by_vocab
(
vocab
,
tokens
)
def
convert_ids_to_tokens
(
inv_vocab
,
ids
):
return
convert_by_vocab
(
inv_vocab
,
ids
)
def
whitespace_tokenize
(
text
):
"""Runs basic whitespace cleaning and splitting on a peice of text."""
text
=
text
.
strip
()
if
not
text
:
return
[]
tokens
=
text
.
split
()
return
tokens
class
FullTokenizer
(
object
):
"""Runs end-to-end tokenziation."""
def
__init__
(
self
,
vocab_file
,
do_lower_case
=
True
,
use_sentence_piece_vocab
=
False
):
self
.
vocab
=
load_vocab
(
vocab_file
)
self
.
inv_vocab
=
{
v
:
k
for
k
,
v
in
self
.
vocab
.
items
()}
self
.
basic_tokenizer
=
BasicTokenizer
(
do_lower_case
=
do_lower_case
)
self
.
use_sentence_piece_vocab
=
use_sentence_piece_vocab
self
.
wordpiece_tokenizer
=
WordpieceTokenizer
(
vocab
=
self
.
vocab
,
use_sentence_piece_vocab
=
self
.
use_sentence_piece_vocab
)
def
tokenize
(
self
,
text
):
split_tokens
=
[]
for
token
in
self
.
basic_tokenizer
.
tokenize
(
text
):
for
sub_token
in
self
.
wordpiece_tokenizer
.
tokenize
(
token
):
split_tokens
.
append
(
sub_token
)
return
split_tokens
def
convert_tokens_to_ids
(
self
,
tokens
):
return
convert_by_vocab
(
self
.
vocab
,
tokens
)
def
convert_ids_to_tokens
(
self
,
ids
):
return
convert_by_vocab
(
self
.
inv_vocab
,
ids
)
class
CharTokenizer
(
object
):
"""Runs end-to-end tokenziation."""
def
__init__
(
self
,
vocab_file
,
do_lower_case
=
True
):
self
.
vocab
=
load_vocab
(
vocab_file
)
self
.
inv_vocab
=
{
v
:
k
for
k
,
v
in
self
.
vocab
.
items
()}
self
.
wordpiece_tokenizer
=
WordpieceTokenizer
(
vocab
=
self
.
vocab
)
def
tokenize
(
self
,
text
):
split_tokens
=
[]
for
token
in
text
.
lower
().
split
(
" "
):
for
sub_token
in
self
.
wordpiece_tokenizer
.
tokenize
(
token
):
split_tokens
.
append
(
sub_token
)
return
split_tokens
def
convert_tokens_to_ids
(
self
,
tokens
):
return
convert_by_vocab
(
self
.
vocab
,
tokens
)
def
convert_ids_to_tokens
(
self
,
ids
):
return
convert_by_vocab
(
self
.
inv_vocab
,
ids
)
class
WSSPTokenizer
(
object
):
# pylint: disable=doc-string-missing
def
__init__
(
self
,
vocab_file
,
sp_model_dir
,
word_dict
,
ws
=
True
,
lower
=
True
):
self
.
vocab
=
load_vocab
(
vocab_file
)
self
.
inv_vocab
=
{
v
:
k
for
k
,
v
in
self
.
vocab
.
items
()}
self
.
ws
=
ws
self
.
lower
=
lower
self
.
dict
=
pickle
.
load
(
open
(
word_dict
,
'rb'
))
self
.
sp_model
=
spm
.
SentencePieceProcessor
()
self
.
window_size
=
5
self
.
sp_model
.
Load
(
sp_model_dir
)
def
cut
(
self
,
chars
):
# pylint: disable=doc-string-missing
words
=
[]
idx
=
0
while
idx
<
len
(
chars
):
matched
=
False
for
i
in
range
(
self
.
window_size
,
0
,
-
1
):
cand
=
chars
[
idx
:
idx
+
i
]
if
cand
in
self
.
dict
:
words
.
append
(
cand
)
matched
=
True
break
if
not
matched
:
i
=
1
words
.
append
(
chars
[
idx
])
idx
+=
i
return
words
def
tokenize
(
self
,
text
,
unk_token
=
"[UNK]"
):
# pylint: disable=doc-string-missing
text
=
convert_to_unicode
(
text
)
if
self
.
ws
:
text
=
[
s
for
s
in
self
.
cut
(
text
)
if
s
!=
' '
]
else
:
text
=
text
.
split
(
' '
)
if
self
.
lower
:
text
=
[
s
.
lower
()
for
s
in
text
]
text
=
' '
.
join
(
text
)
tokens
=
self
.
sp_model
.
EncodeAsPieces
(
text
)
in_vocab_tokens
=
[]
for
token
in
tokens
:
if
token
in
self
.
vocab
:
in_vocab_tokens
.
append
(
token
)
else
:
in_vocab_tokens
.
append
(
unk_token
)
return
in_vocab_tokens
def
convert_tokens_to_ids
(
self
,
tokens
):
return
convert_by_vocab
(
self
.
vocab
,
tokens
)
def
convert_ids_to_tokens
(
self
,
ids
):
return
convert_by_vocab
(
self
.
inv_vocab
,
ids
)
class
BasicTokenizer
(
object
):
"""Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
def
__init__
(
self
,
do_lower_case
=
True
):
"""Constructs a BasicTokenizer.
Args:
do_lower_case: Whether to lower case the input.
"""
self
.
do_lower_case
=
do_lower_case
def
tokenize
(
self
,
text
):
# pylint: disable=doc-string-with-all-args, doc-string-with-returns
"""Tokenizes a piece of text."""
text
=
convert_to_unicode
(
text
)
text
=
self
.
_clean_text
(
text
)
# This was added on November 1st, 2018 for the multilingual and Chinese
# models. This is also applied to the English models now, but it doesn't
# matter since the English models were not trained on any Chinese data
# and generally don't have any Chinese data in them (there are Chinese
# characters in the vocabulary because Wikipedia does have some Chinese
# words in the English Wikipedia.).
text
=
self
.
_tokenize_chinese_chars
(
text
)
orig_tokens
=
whitespace_tokenize
(
text
)
split_tokens
=
[]
for
token
in
orig_tokens
:
if
self
.
do_lower_case
:
token
=
token
.
lower
()
token
=
self
.
_run_strip_accents
(
token
)
split_tokens
.
extend
(
self
.
_run_split_on_punc
(
token
))
output_tokens
=
whitespace_tokenize
(
" "
.
join
(
split_tokens
))
return
output_tokens
def
_run_strip_accents
(
self
,
text
):
"""Strips accents from a piece of text."""
text
=
unicodedata
.
normalize
(
"NFD"
,
text
)
output
=
[]
for
char
in
text
:
cat
=
unicodedata
.
category
(
char
)
if
cat
==
"Mn"
:
continue
output
.
append
(
char
)
return
""
.
join
(
output
)
def
_run_split_on_punc
(
self
,
text
):
"""Splits punctuation on a piece of text."""
chars
=
list
(
text
)
i
=
0
start_new_word
=
True
output
=
[]
while
i
<
len
(
chars
):
char
=
chars
[
i
]
if
_is_punctuation
(
char
):
output
.
append
([
char
])
start_new_word
=
True
else
:
if
start_new_word
:
output
.
append
([])
start_new_word
=
False
output
[
-
1
].
append
(
char
)
i
+=
1
return
[
""
.
join
(
x
)
for
x
in
output
]
def
_tokenize_chinese_chars
(
self
,
text
):
"""Adds whitespace around any CJK character."""
output
=
[]
for
char
in
text
:
cp
=
ord
(
char
)
if
self
.
_is_chinese_char
(
cp
):
output
.
append
(
" "
)
output
.
append
(
char
)
output
.
append
(
" "
)
else
:
output
.
append
(
char
)
return
""
.
join
(
output
)
def
_is_chinese_char
(
self
,
cp
):
"""Checks whether CP is the codepoint of a CJK character."""
# This defines a "chinese character" as anything in the CJK Unicode block:
# https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
#
# Note that the CJK Unicode block is NOT all Japanese and Korean characters,
# despite its name. The modern Korean Hangul alphabet is a different block,
# as is Japanese Hiragana and Katakana. Those alphabets are used to write
# space-separated words, so they are not treated specially and handled
# like the all of the other languages.
if
((
cp
>=
0x4E00
and
cp
<=
0x9FFF
)
or
#
(
cp
>=
0x3400
and
cp
<=
0x4DBF
)
or
#
(
cp
>=
0x20000
and
cp
<=
0x2A6DF
)
or
#
(
cp
>=
0x2A700
and
cp
<=
0x2B73F
)
or
#
(
cp
>=
0x2B740
and
cp
<=
0x2B81F
)
or
#
(
cp
>=
0x2B820
and
cp
<=
0x2CEAF
)
or
(
cp
>=
0xF900
and
cp
<=
0xFAFF
)
or
#
(
cp
>=
0x2F800
and
cp
<=
0x2FA1F
)):
#
return
True
return
False
def
_clean_text
(
self
,
text
):
"""Performs invalid character removal and whitespace cleanup on text."""
output
=
[]
for
char
in
text
:
cp
=
ord
(
char
)
if
cp
==
0
or
cp
==
0xfffd
or
_is_control
(
char
):
continue
if
_is_whitespace
(
char
):
output
.
append
(
" "
)
else
:
output
.
append
(
char
)
return
""
.
join
(
output
)
class
WordpieceTokenizer
(
object
):
"""Runs WordPiece tokenziation."""
def
__init__
(
self
,
vocab
,
unk_token
=
"[UNK]"
,
max_input_chars_per_word
=
100
,
use_sentence_piece_vocab
=
False
):
self
.
vocab
=
vocab
self
.
unk_token
=
unk_token
self
.
max_input_chars_per_word
=
max_input_chars_per_word
self
.
use_sentence_piece_vocab
=
use_sentence_piece_vocab
def
tokenize
(
self
,
text
):
# pylint: disable=doc-string-with-all-args
"""Tokenizes a piece of text into its word pieces.
This uses a greedy longest-match-first algorithm to perform tokenization
using the given vocabulary.
For example:
input = "unaffable"
output = ["un", "##aff", "##able"]
Args:
text: A single token or whitespace separated tokens. This should have
already been passed through `BasicTokenizer.
Returns:
A list of wordpiece tokens.
"""
text
=
convert_to_unicode
(
text
)
output_tokens
=
[]
for
token
in
whitespace_tokenize
(
text
):
chars
=
list
(
token
)
if
len
(
chars
)
>
self
.
max_input_chars_per_word
:
output_tokens
.
append
(
self
.
unk_token
)
continue
is_bad
=
False
start
=
0
sub_tokens
=
[]
while
start
<
len
(
chars
):
end
=
len
(
chars
)
cur_substr
=
None
while
start
<
end
:
substr
=
""
.
join
(
chars
[
start
:
end
])
if
start
==
0
and
self
.
use_sentence_piece_vocab
:
substr
=
u
'
\u2581
'
+
substr
if
start
>
0
and
not
self
.
use_sentence_piece_vocab
:
substr
=
"##"
+
substr
if
substr
in
self
.
vocab
:
cur_substr
=
substr
break
end
-=
1
if
cur_substr
is
None
:
is_bad
=
True
break
sub_tokens
.
append
(
cur_substr
)
start
=
end
if
is_bad
:
output_tokens
.
append
(
self
.
unk_token
)
else
:
output_tokens
.
extend
(
sub_tokens
)
return
output_tokens
def
_is_whitespace
(
char
):
"""Checks whether `chars` is a whitespace character."""
# \t, \n, and \r are technically contorl characters but we treat them
# as whitespace since they are generally considered as such.
if
char
==
" "
or
char
==
"
\t
"
or
char
==
"
\n
"
or
char
==
"
\r
"
:
return
True
cat
=
unicodedata
.
category
(
char
)
if
cat
==
"Zs"
:
return
True
return
False
def
_is_control
(
char
):
"""Checks whether `chars` is a control character."""
# These are technically control characters but we count them as whitespace
# characters.
if
char
==
"
\t
"
or
char
==
"
\n
"
or
char
==
"
\r
"
:
return
False
cat
=
unicodedata
.
category
(
char
)
if
cat
.
startswith
(
"C"
):
return
True
return
False
def
_is_punctuation
(
char
):
"""Checks whether `chars` is a punctuation character."""
cp
=
ord
(
char
)
# We treat all non-letter/number ASCII as punctuation.
# Characters such as "^", "$", and "`" are not in the Unicode
# Punctuation class but we treat them as punctuation anyways, for
# consistency.
if
((
cp
>=
33
and
cp
<=
47
)
or
(
cp
>=
58
and
cp
<=
64
)
or
(
cp
>=
91
and
cp
<=
96
)
or
(
cp
>=
123
and
cp
<=
126
)):
return
True
cat
=
unicodedata
.
category
(
char
)
if
cat
.
startswith
(
"P"
):
return
True
return
False
python/paddle_serving_app/version.py
0 → 100644
浏览文件 @
9ba05c4d
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Paddle Serving App version string """
serving_app_version
=
"0.0.1"
tools/serving_build.sh
浏览文件 @
9ba05c4d
...
@@ -58,7 +58,7 @@ function build_client() {
...
@@ -58,7 +58,7 @@ function build_client() {
cmake
-DPYTHON_INCLUDE_DIR
=
$PYTHONROOT
/include/python2.7/
\
cmake
-DPYTHON_INCLUDE_DIR
=
$PYTHONROOT
/include/python2.7/
\
-DPYTHON_LIBRARIES
=
$PYTHONROOT
/lib64/libpython2.7.so
\
-DPYTHON_LIBRARIES
=
$PYTHONROOT
/lib64/libpython2.7.so
\
-DPYTHON_EXECUTABLE
=
$PYTHONROOT
/bin/python
\
-DPYTHON_EXECUTABLE
=
$PYTHONROOT
/bin/python
\
-DCLIENT
_ONLY
=
ON ..
-DCLIENT
=
ON ..
rerun
"make -j2 >/dev/null"
3
# due to some network reasons, compilation may fail
rerun
"make -j2 >/dev/null"
3
# due to some network reasons, compilation may fail
pip
install
-U
python/dist/paddle_serving_client
*
>
/dev/null
pip
install
-U
python/dist/paddle_serving_client
*
>
/dev/null
;;
;;
...
@@ -82,7 +82,7 @@ function build_server() {
...
@@ -82,7 +82,7 @@ function build_server() {
cmake
-DPYTHON_INCLUDE_DIR
=
$PYTHONROOT
/include/python2.7/
\
cmake
-DPYTHON_INCLUDE_DIR
=
$PYTHONROOT
/include/python2.7/
\
-DPYTHON_LIBRARIES
=
$PYTHONROOT
/lib64/libpython2.7.so
\
-DPYTHON_LIBRARIES
=
$PYTHONROOT
/lib64/libpython2.7.so
\
-DPYTHON_EXECUTABLE
=
$PYTHONROOT
/bin/python
\
-DPYTHON_EXECUTABLE
=
$PYTHONROOT
/bin/python
\
-D
CLIENT_ONLY
=
OFF
..
-D
SERVER
=
ON
..
rerun
"make -j2 >/dev/null"
3
# due to some network reasons, compilation may fail
rerun
"make -j2 >/dev/null"
3
# due to some network reasons, compilation may fail
check_cmd
"make install -j2 >/dev/null"
check_cmd
"make install -j2 >/dev/null"
pip
install
-U
python/dist/paddle_serving_server
*
>
/dev/null
pip
install
-U
python/dist/paddle_serving_server
*
>
/dev/null
...
@@ -91,7 +91,7 @@ function build_server() {
...
@@ -91,7 +91,7 @@ function build_server() {
cmake
-DPYTHON_INCLUDE_DIR
=
$PYTHONROOT
/include/python2.7/
\
cmake
-DPYTHON_INCLUDE_DIR
=
$PYTHONROOT
/include/python2.7/
\
-DPYTHON_LIBRARIES
=
$PYTHONROOT
/lib64/libpython2.7.so
\
-DPYTHON_LIBRARIES
=
$PYTHONROOT
/lib64/libpython2.7.so
\
-DPYTHON_EXECUTABLE
=
$PYTHONROOT
/bin/python
\
-DPYTHON_EXECUTABLE
=
$PYTHONROOT
/bin/python
\
-D
CLIENT_ONLY
=
OFF
\
-D
SERVER
=
ON
\
-DWITH_GPU
=
ON ..
-DWITH_GPU
=
ON ..
rerun
"make -j2 >/dev/null"
3
# due to some network reasons, compilation may fail
rerun
"make -j2 >/dev/null"
3
# due to some network reasons, compilation may fail
check_cmd
"make install -j2 >/dev/null"
check_cmd
"make install -j2 >/dev/null"
...
@@ -111,6 +111,7 @@ function kill_server_process() {
...
@@ -111,6 +111,7 @@ function kill_server_process() {
ps
-ef
|
grep
"serving"
|
grep
-v
serving_build |
grep
-v
grep
|
awk
'{print $2}'
| xargs
kill
ps
-ef
|
grep
"serving"
|
grep
-v
serving_build |
grep
-v
grep
|
awk
'{print $2}'
| xargs
kill
}
}
function
python_test_fit_a_line
()
{
function
python_test_fit_a_line
()
{
# pwd: /Serving/python/examples
# pwd: /Serving/python/examples
cd
fit_a_line
# pwd: /Serving/python/examples/fit_a_line
cd
fit_a_line
# pwd: /Serving/python/examples/fit_a_line
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录