Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleHub
提交
b5a4bb1b
P
PaddleHub
项目概览
PaddlePaddle
/
PaddleHub
1 年多 前同步成功
通知
283
Star
12117
Fork
2091
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
200
列表
看板
标记
里程碑
合并请求
4
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleHub
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
200
Issue
200
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b5a4bb1b
编写于
6月 27, 2019
作者:
W
wuzewu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix encoding bug
上级
d34e9473
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
37 addition
and
6 deletion
+37
-6
paddlehub/commands/hub.py
paddlehub/commands/hub.py
+3
-2
paddlehub/common/utils.py
paddlehub/common/utils.py
+27
-0
paddlehub/io/parser.py
paddlehub/io/parser.py
+5
-3
paddlehub/reader/nlp_reader.py
paddlehub/reader/nlp_reader.py
+2
-1
未找到文件。
paddlehub/commands/hub.py
浏览文件 @
b5a4bb1b
...
...
@@ -22,6 +22,7 @@ import sys
import
requests
from
paddlehub.common.logger
import
logger
from
paddlehub.common.utils
import
sys_stdin_encoding
from
paddlehub.common
import
stats
from
paddlehub.commands.base_command
import
BaseCommand
from
paddlehub.commands
import
show
...
...
@@ -63,7 +64,7 @@ def main():
argv
=
[]
for
item
in
sys
.
argv
:
if
six
.
PY2
:
argv
.
append
(
item
.
decode
(
sys
.
stdin
.
encoding
).
decode
(
"utf8"
))
argv
.
append
(
item
.
decode
(
sys
_stdin_encoding
()
).
decode
(
"utf8"
))
else
:
argv
.
append
(
item
)
command
.
execute
(
argv
[
1
:])
...
...
@@ -73,7 +74,7 @@ if __name__ == "__main__":
argv
=
[]
for
item
in
sys
.
argv
:
if
six
.
PY2
:
argv
.
append
(
item
.
decode
(
sys
.
stdin
.
encoding
).
decode
(
"utf8"
))
argv
.
append
(
item
.
decode
(
sys
_stdin_encoding
()
).
decode
(
"utf8"
))
else
:
argv
.
append
(
item
)
command
.
execute
(
argv
[
1
:])
paddlehub/common/utils.py
浏览文件 @
b5a4bb1b
...
...
@@ -17,6 +17,7 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
sys
import
os
import
time
import
multiprocessing
...
...
@@ -231,3 +232,29 @@ def get_running_device_info(config):
dev_count
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
return
place
,
dev_count
def
get_platform_default_encoding
():
if
platform
.
platform
().
lower
().
startswith
(
"windows"
):
return
"gbk"
return
"utf8"
def
sys_stdin_encoding
():
encoding
=
sys
.
stdin
.
encoding
if
encoding
is
None
:
encoding
=
sys
.
getdefaultencoding
()
if
encoding
is
None
:
encoding
=
get_platform_default_encoding
()
return
encoding
def
sys_stdout_encoding
():
encoding
=
sys
.
stdout
.
encoding
if
encoding
is
None
:
encoding
=
sys
.
getdefaultencoding
()
if
encoding
is
None
:
encoding
=
get_platform_default_encoding
()
return
encoding
paddlehub/io/parser.py
浏览文件 @
b5a4bb1b
...
...
@@ -21,6 +21,8 @@ import codecs
import
sys
import
yaml
from
paddlehub.common.utils
import
sys_stdin_encoding
class
CSVFileParser
(
object
):
def
__init__
(
self
):
...
...
@@ -30,7 +32,7 @@ class CSVFileParser(object):
pass
def
parse
(
self
,
csv_file
):
with
codecs
.
open
(
csv_file
,
"r"
,
sys
.
stdin
.
encoding
)
as
file
:
with
codecs
.
open
(
csv_file
,
"r"
,
sys
_stdin_encoding
()
)
as
file
:
content
=
file
.
read
()
content
=
content
.
split
(
'
\n
'
)
self
.
title
=
content
[
0
].
split
(
','
)
...
...
@@ -57,7 +59,7 @@ class YAMLFileParser(object):
pass
def
parse
(
self
,
yaml_file
):
with
codecs
.
open
(
yaml_file
,
"r"
,
sys
.
stdin
.
encoding
)
as
file
:
with
codecs
.
open
(
yaml_file
,
"r"
,
sys
_stdin_encoding
()
)
as
file
:
content
=
file
.
read
()
return
yaml
.
load
(
content
,
Loader
=
yaml
.
BaseLoader
)
...
...
@@ -70,7 +72,7 @@ class TextFileParser(object):
pass
def
parse
(
self
,
txt_file
):
with
codecs
.
open
(
txt_file
,
"r"
,
sys
.
stdin
.
encoding
)
as
file
:
with
codecs
.
open
(
txt_file
,
"r"
,
sys
_stdin_encoding
()
)
as
file
:
contents
=
[]
for
line
in
file
:
line
=
line
.
strip
()
...
...
paddlehub/reader/nlp_reader.py
浏览文件 @
b5a4bb1b
...
...
@@ -29,6 +29,7 @@ import paddle
from
paddlehub.reader
import
tokenization
from
paddlehub.common.logger
import
logger
from
paddlehub.common.utils
import
sys_stdout_encoding
from
paddlehub.dataset.dataset
import
InputExample
from
.batching
import
pad_batch_data
import
paddlehub
as
hub
...
...
@@ -527,7 +528,7 @@ class LACClassifyReader(object):
]
if
len
(
processed
)
==
0
:
if
six
.
PY2
:
text
=
text
.
encode
(
sys
.
stdout
.
encoding
)
text
=
text
.
encode
(
sys
_stdout_encoding
()
)
logger
.
warning
(
"The words in text %s can't be found in the vocabulary."
%
(
text
))
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录