Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PALM
提交
b0d11b3b
P
PALM
项目概览
PaddlePaddle
/
PALM
通知
8
Star
3
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
10
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PALM
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
10
Issue
10
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b0d11b3b
编写于
10月 17, 2019
作者:
X
xixiaoyao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix bugs
上级
5649e623
变更
13
显示空白变更内容
内联
并排
Showing
13 changed file
with
40 addition
and
16 deletion
+40
-16
mtl_run.py
mtl_run.py
+1
-1
paradigm/answer_matching.py
paradigm/answer_matching.py
+1
-1
paradigm/mask_language_model.py
paradigm/mask_language_model.py
+1
-0
paradigm/reading_comprehension.py
paradigm/reading_comprehension.py
+1
-1
reader/answer_matching_reader.py
reader/answer_matching_reader.py
+2
-1
reader/joint_reader.py
reader/joint_reader.py
+15
-1
reader/mask_language_model_reader.py
reader/mask_language_model_reader.py
+2
-1
reader/reading_comprehension_reader.py
reader/reading_comprehension_reader.py
+7
-5
utils/batching.py
utils/batching.py
+1
-0
utils/configure.py
utils/configure.py
+5
-3
utils/fp16.py
utils/fp16.py
+1
-0
utils/init.py
utils/init.py
+1
-0
utils/tokenization.py
utils/tokenization.py
+2
-2
未找到文件。
mtl_run.py
浏览文件 @
b0d11b3b
...
@@ -11,7 +11,7 @@
...
@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
#
encoding=utf8
#
-*- coding: utf-8 -*-
import
os
import
os
import
sys
import
sys
...
...
paradigm/answer_matching.py
浏览文件 @
b0d11b3b
...
@@ -11,7 +11,7 @@
...
@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
#
encoding=utf8
#
-*- coding: utf-8 -*-
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
...
...
paradigm/mask_language_model.py
浏览文件 @
b0d11b3b
...
@@ -11,6 +11,7 @@
...
@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# -*- coding: utf-8 -*-
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
backbone.utils.transformer
import
pre_process_layer
from
backbone.utils.transformer
import
pre_process_layer
...
...
paradigm/reading_comprehension.py
浏览文件 @
b0d11b3b
...
@@ -11,7 +11,7 @@
...
@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
#
encoding=utf8
#
-*- coding: utf-8 -*-
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
collections
import
collections
...
...
reader/answer_matching_reader.py
浏览文件 @
b0d11b3b
...
@@ -17,6 +17,7 @@ import types
...
@@ -17,6 +17,7 @@ import types
import
csv
import
csv
import
numpy
as
np
import
numpy
as
np
from
utils
import
tokenization
from
utils
import
tokenization
import
io
from
utils.batching
import
prepare_batch_data
from
utils.batching
import
prepare_batch_data
...
@@ -115,7 +116,7 @@ class BaseProcessor(object):
...
@@ -115,7 +116,7 @@ class BaseProcessor(object):
@
classmethod
@
classmethod
def
_read_tsv
(
cls
,
input_file
,
quotechar
=
None
):
def
_read_tsv
(
cls
,
input_file
,
quotechar
=
None
):
"""Reads a tab separated value file."""
"""Reads a tab separated value file."""
with
open
(
input_file
,
"r
"
)
as
f
:
with
io
.
open
(
input_file
,
"r"
,
encoding
=
"utf8
"
)
as
f
:
reader
=
csv
.
reader
(
f
,
delimiter
=
"
\t
"
,
quotechar
=
quotechar
)
reader
=
csv
.
reader
(
f
,
delimiter
=
"
\t
"
,
quotechar
=
quotechar
)
lines
=
[]
lines
=
[]
for
line
in
reader
:
for
line
in
reader
:
...
...
reader/joint_reader.py
浏览文件 @
b0d11b3b
#encoding=utf8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -*- coding: utf-8 -*-
import
os
import
os
import
sys
import
sys
import
random
import
random
...
...
reader/mask_language_model_reader.py
浏览文件 @
b0d11b3b
...
@@ -22,6 +22,7 @@ import gzip
...
@@ -22,6 +22,7 @@ import gzip
import
logging
import
logging
import
re
import
re
import
six
import
six
import
io
import
collections
import
collections
from
utils
import
tokenization
from
utils
import
tokenization
from
utils.batching
import
prepare_batch_data
from
utils.batching
import
prepare_batch_data
...
@@ -126,7 +127,7 @@ class DataProcessor(object):
...
@@ -126,7 +127,7 @@ class DataProcessor(object):
def
load_vocab
(
self
,
vocab_file
):
def
load_vocab
(
self
,
vocab_file
):
"""Loads a vocabulary file into a dictionary."""
"""Loads a vocabulary file into a dictionary."""
vocab
=
collections
.
OrderedDict
()
vocab
=
collections
.
OrderedDict
()
fin
=
open
(
vocab_file
)
fin
=
io
.
open
(
vocab_file
,
encoding
=
'utf8'
)
for
num
,
line
in
enumerate
(
fin
):
for
num
,
line
in
enumerate
(
fin
):
items
=
self
.
convert_to_unicode
(
line
.
strip
()).
split
(
"
\t
"
)
items
=
self
.
convert_to_unicode
(
line
.
strip
()).
split
(
"
\t
"
)
if
len
(
items
)
>
2
:
if
len
(
items
)
>
2
:
...
...
reader/reading_comprehension_reader.py
浏览文件 @
b0d11b3b
...
@@ -14,9 +14,11 @@
...
@@ -14,9 +14,11 @@
"""Run MRQA"""
"""Run MRQA"""
import
six
import
six
import
io
import
math
import
math
import
json
import
json
import
random
import
random
import
io
import
collections
import
collections
import
numpy
as
np
import
numpy
as
np
from
utils
import
tokenization
from
utils
import
tokenization
...
@@ -401,14 +403,14 @@ class DataProcessor(object):
...
@@ -401,14 +403,14 @@ class DataProcessor(object):
all_nbest_json
[
example
.
qas_id
]
=
nbest_json
all_nbest_json
[
example
.
qas_id
]
=
nbest_json
with
open
(
output_prediction_file
,
"w
"
)
as
writer
:
with
io
.
open
(
output_prediction_file
,
"w"
,
encoding
=
"utf8
"
)
as
writer
:
writer
.
write
(
json
.
dumps
(
all_predictions
,
indent
=
4
)
+
"
\n
"
)
writer
.
write
(
json
.
dumps
(
all_predictions
,
indent
=
4
)
+
"
\n
"
)
with
open
(
output_nbest_file
,
"w
"
)
as
writer
:
with
io
.
open
(
output_nbest_file
,
"w"
,
encoding
=
"utf8
"
)
as
writer
:
writer
.
write
(
json
.
dumps
(
all_nbest_json
,
indent
=
4
)
+
"
\n
"
)
writer
.
write
(
json
.
dumps
(
all_nbest_json
,
indent
=
4
)
+
"
\n
"
)
if
with_negative
:
if
with_negative
:
with
open
(
output_null_log_odds_file
,
"w
"
)
as
writer
:
with
io
.
open
(
output_null_log_odds_file
,
"w"
,
encoding
=
"utf8
"
)
as
writer
:
writer
.
write
(
json
.
dumps
(
scores_diff_json
,
indent
=
4
)
+
"
\n
"
)
writer
.
write
(
json
.
dumps
(
scores_diff_json
,
indent
=
4
)
+
"
\n
"
)
...
@@ -486,7 +488,7 @@ def read_mrqa_examples(input_file, is_training, with_negative=False):
...
@@ -486,7 +488,7 @@ def read_mrqa_examples(input_file, is_training, with_negative=False):
"""Read a MRQA json file into a list of MRQAExample."""
"""Read a MRQA json file into a list of MRQAExample."""
phase
=
'training'
if
is_training
else
'testing'
phase
=
'training'
if
is_training
else
'testing'
print
(
"loading mrqa {} data..."
.
format
(
phase
))
print
(
"loading mrqa {} data..."
.
format
(
phase
))
with
open
(
input_file
,
"r
"
)
as
reader
:
with
io
.
open
(
input_file
,
"r"
,
encoding
=
"utf8
"
)
as
reader
:
input_data
=
json
.
load
(
reader
)[
"data"
]
input_data
=
json
.
load
(
reader
)[
"data"
]
def
is_whitespace
(
c
):
def
is_whitespace
(
c
):
...
@@ -736,7 +738,7 @@ def estimate_runtime_examples(data_path, sample_rate, tokenizer, \
...
@@ -736,7 +738,7 @@ def estimate_runtime_examples(data_path, sample_rate, tokenizer, \
assert
sample_rate
>
0.0
and
sample_rate
<=
1.0
,
"sample_rate must be set between 0.0~1.0"
assert
sample_rate
>
0.0
and
sample_rate
<=
1.0
,
"sample_rate must be set between 0.0~1.0"
print
(
"loading data with json parser..."
)
print
(
"loading data with json parser..."
)
with
open
(
data_path
,
"r
"
)
as
reader
:
with
io
.
open
(
data_path
,
"r"
,
encoding
=
"utf8
"
)
as
reader
:
data
=
json
.
load
(
reader
)[
"data"
]
data
=
json
.
load
(
reader
)[
"data"
]
num_raw_examples
=
0
num_raw_examples
=
0
...
...
utils/batching.py
浏览文件 @
b0d11b3b
...
@@ -11,6 +11,7 @@
...
@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# -*- coding: utf-8 -*-
"""Mask, padding and batching."""
"""Mask, padding and batching."""
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
division
...
...
utils/configure.py
浏览文件 @
b0d11b3b
...
@@ -11,6 +11,7 @@
...
@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# -*- coding: utf-8 -*-
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
division
...
@@ -18,6 +19,7 @@ from __future__ import print_function
...
@@ -18,6 +19,7 @@ from __future__ import print_function
import
os
import
os
import
sys
import
sys
import
io
import
argparse
import
argparse
import
json
import
json
import
yaml
import
yaml
...
@@ -38,7 +40,7 @@ class JsonConfig(object):
...
@@ -38,7 +40,7 @@ class JsonConfig(object):
def
_parse
(
self
,
config_path
):
def
_parse
(
self
,
config_path
):
try
:
try
:
with
open
(
config_path
)
as
json_file
:
with
io
.
open
(
config_path
,
encoding
=
"utf8"
)
as
json_file
:
config_dict
=
json
.
load
(
json_file
)
config_dict
=
json
.
load
(
json_file
)
assert
isinstance
(
config_dict
,
dict
),
"Object in {} is NOT a dict."
.
format
(
config_path
)
assert
isinstance
(
config_dict
,
dict
),
"Object in {} is NOT a dict."
.
format
(
config_path
)
except
:
except
:
...
@@ -216,7 +218,7 @@ class PDConfig(object):
...
@@ -216,7 +218,7 @@ class PDConfig(object):
raise
Warning
(
"the json file %s does not exist."
%
file_path
)
raise
Warning
(
"the json file %s does not exist."
%
file_path
)
return
return
with
open
(
file_path
,
"r
"
)
as
fin
:
with
io
.
open
(
file_path
,
"r"
,
encoding
=
"utf8
"
)
as
fin
:
self
.
json_config
=
json
.
loads
(
fin
.
read
())
self
.
json_config
=
json
.
loads
(
fin
.
read
())
fin
.
close
()
fin
.
close
()
...
@@ -241,7 +243,7 @@ class PDConfig(object):
...
@@ -241,7 +243,7 @@ class PDConfig(object):
raise
Warning
(
"the yaml file %s does not exist."
%
file_path
)
raise
Warning
(
"the yaml file %s does not exist."
%
file_path
)
return
return
with
open
(
file_path
,
"r
"
)
as
fin
:
with
io
.
open
(
file_path
,
"r"
,
encoding
=
"utf8
"
)
as
fin
:
self
.
yaml_config
=
yaml
.
load
(
fin
,
Loader
=
yaml
.
SafeLoader
)
self
.
yaml_config
=
yaml
.
load
(
fin
,
Loader
=
yaml
.
SafeLoader
)
fin
.
close
()
fin
.
close
()
...
...
utils/fp16.py
浏览文件 @
b0d11b3b
...
@@ -11,6 +11,7 @@
...
@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# -*- coding: utf-8 -*-
from
__future__
import
print_function
from
__future__
import
print_function
import
paddle
import
paddle
...
...
utils/init.py
浏览文件 @
b0d11b3b
...
@@ -11,6 +11,7 @@
...
@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# -*- coding: utf-8 -*-
from
__future__
import
print_function
from
__future__
import
print_function
...
...
utils/tokenization.py
浏览文件 @
b0d11b3b
...
@@ -20,7 +20,7 @@ from __future__ import print_function
...
@@ -20,7 +20,7 @@ from __future__ import print_function
import
collections
import
collections
import
unicodedata
import
unicodedata
import
six
import
six
import
io
def
convert_to_unicode
(
text
):
def
convert_to_unicode
(
text
):
"""Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
"""Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
...
@@ -68,7 +68,7 @@ def printable_text(text):
...
@@ -68,7 +68,7 @@ def printable_text(text):
def
load_vocab
(
vocab_file
):
def
load_vocab
(
vocab_file
):
"""Loads a vocabulary file into a dictionary."""
"""Loads a vocabulary file into a dictionary."""
vocab
=
collections
.
OrderedDict
()
vocab
=
collections
.
OrderedDict
()
fin
=
open
(
vocab_file
)
fin
=
io
.
open
(
vocab_file
,
encoding
=
"utf8"
)
for
num
,
line
in
enumerate
(
fin
):
for
num
,
line
in
enumerate
(
fin
):
items
=
convert_to_unicode
(
line
.
strip
()).
split
(
"
\t
"
)
items
=
convert_to_unicode
(
line
.
strip
()).
split
(
"
\t
"
)
if
len
(
items
)
>
2
:
if
len
(
items
)
>
2
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录