Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PALM
提交
a4987b6a
P
PALM
项目概览
PaddlePaddle
/
PALM
通知
5
Star
3
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
10
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PALM
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
10
Issue
10
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
a4987b6a
编写于
10月 17, 2019
作者:
X
Xiaoyao Xi
提交者:
GitHub
10月 17, 2019
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #8 from xixiaoyao/master
fix bugs
上级
6ef6adcb
5f3bb81e
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
52 addition
and
24 deletion
+52
-24
.gitignore
.gitignore
+4
-0
mtl_run.py
mtl_run.py
+1
-1
paradigm/answer_matching.py
paradigm/answer_matching.py
+1
-1
paradigm/mask_language_model.py
paradigm/mask_language_model.py
+1
-0
paradigm/reading_comprehension.py
paradigm/reading_comprehension.py
+1
-1
reader/answer_matching_reader.py
reader/answer_matching_reader.py
+2
-1
reader/joint_reader.py
reader/joint_reader.py
+15
-1
reader/mask_language_model_reader.py
reader/mask_language_model_reader.py
+2
-1
reader/reading_comprehension_reader.py
reader/reading_comprehension_reader.py
+7
-5
utils/batching.py
utils/batching.py
+1
-0
utils/configure.py
utils/configure.py
+5
-3
utils/fp16.py
utils/fp16.py
+1
-0
utils/init.py
utils/init.py
+1
-0
utils/tokenization.py
utils/tokenization.py
+10
-10
未找到文件。
.gitignore
0 → 100644
浏览文件 @
a4987b6a
*.pyc
__pycache__
pretrain_model
output_model
mtl_run.py
浏览文件 @
a4987b6a
...
...
@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
encoding=utf8
#
-*- coding: utf-8 -*-
import
os
import
sys
...
...
paradigm/answer_matching.py
浏览文件 @
a4987b6a
...
...
@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
encoding=utf8
#
-*- coding: utf-8 -*-
import
paddle.fluid
as
fluid
...
...
paradigm/mask_language_model.py
浏览文件 @
a4987b6a
...
...
@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -*- coding: utf-8 -*-
import
paddle.fluid
as
fluid
from
backbone.utils.transformer
import
pre_process_layer
...
...
paradigm/reading_comprehension.py
浏览文件 @
a4987b6a
...
...
@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
encoding=utf8
#
-*- coding: utf-8 -*-
import
paddle.fluid
as
fluid
import
collections
...
...
reader/answer_matching_reader.py
浏览文件 @
a4987b6a
...
...
@@ -17,6 +17,7 @@ import types
import
csv
import
numpy
as
np
from
utils
import
tokenization
import
io
from
utils.batching
import
prepare_batch_data
...
...
@@ -115,7 +116,7 @@ class BaseProcessor(object):
@
classmethod
def
_read_tsv
(
cls
,
input_file
,
quotechar
=
None
):
"""Reads a tab separated value file."""
with
open
(
input_file
,
"r
"
)
as
f
:
with
io
.
open
(
input_file
,
"r"
,
encoding
=
"utf8
"
)
as
f
:
reader
=
csv
.
reader
(
f
,
delimiter
=
"
\t
"
,
quotechar
=
quotechar
)
lines
=
[]
for
line
in
reader
:
...
...
reader/joint_reader.py
浏览文件 @
a4987b6a
#encoding=utf8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -*- coding: utf-8 -*-
import
os
import
sys
import
random
...
...
reader/mask_language_model_reader.py
浏览文件 @
a4987b6a
...
...
@@ -22,6 +22,7 @@ import gzip
import
logging
import
re
import
six
import
io
import
collections
from
utils
import
tokenization
from
utils.batching
import
prepare_batch_data
...
...
@@ -126,7 +127,7 @@ class DataProcessor(object):
def
load_vocab
(
self
,
vocab_file
):
"""Loads a vocabulary file into a dictionary."""
vocab
=
collections
.
OrderedDict
()
fin
=
open
(
vocab_file
)
fin
=
io
.
open
(
vocab_file
,
encoding
=
'utf8'
)
for
num
,
line
in
enumerate
(
fin
):
items
=
self
.
convert_to_unicode
(
line
.
strip
()).
split
(
"
\t
"
)
if
len
(
items
)
>
2
:
...
...
reader/reading_comprehension_reader.py
浏览文件 @
a4987b6a
...
...
@@ -14,9 +14,11 @@
"""Run MRQA"""
import
six
import
io
import
math
import
json
import
random
import
io
import
collections
import
numpy
as
np
from
utils
import
tokenization
...
...
@@ -401,14 +403,14 @@ class DataProcessor(object):
all_nbest_json
[
example
.
qas_id
]
=
nbest_json
with
open
(
output_prediction_file
,
"w
"
)
as
writer
:
with
io
.
open
(
output_prediction_file
,
"w"
,
encoding
=
"utf8
"
)
as
writer
:
writer
.
write
(
json
.
dumps
(
all_predictions
,
indent
=
4
)
+
"
\n
"
)
with
open
(
output_nbest_file
,
"w
"
)
as
writer
:
with
io
.
open
(
output_nbest_file
,
"w"
,
encoding
=
"utf8
"
)
as
writer
:
writer
.
write
(
json
.
dumps
(
all_nbest_json
,
indent
=
4
)
+
"
\n
"
)
if
with_negative
:
with
open
(
output_null_log_odds_file
,
"w
"
)
as
writer
:
with
io
.
open
(
output_null_log_odds_file
,
"w"
,
encoding
=
"utf8
"
)
as
writer
:
writer
.
write
(
json
.
dumps
(
scores_diff_json
,
indent
=
4
)
+
"
\n
"
)
...
...
@@ -486,7 +488,7 @@ def read_mrqa_examples(input_file, is_training, with_negative=False):
"""Read a MRQA json file into a list of MRQAExample."""
phase
=
'training'
if
is_training
else
'testing'
print
(
"loading mrqa {} data..."
.
format
(
phase
))
with
open
(
input_file
,
"r
"
)
as
reader
:
with
io
.
open
(
input_file
,
"r"
,
encoding
=
"utf8
"
)
as
reader
:
input_data
=
json
.
load
(
reader
)[
"data"
]
def
is_whitespace
(
c
):
...
...
@@ -736,7 +738,7 @@ def estimate_runtime_examples(data_path, sample_rate, tokenizer, \
assert
sample_rate
>
0.0
and
sample_rate
<=
1.0
,
"sample_rate must be set between 0.0~1.0"
print
(
"loading data with json parser..."
)
with
open
(
data_path
,
"r
"
)
as
reader
:
with
io
.
open
(
data_path
,
"r"
,
encoding
=
"utf8
"
)
as
reader
:
data
=
json
.
load
(
reader
)[
"data"
]
num_raw_examples
=
0
...
...
utils/batching.py
浏览文件 @
a4987b6a
...
...
@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -*- coding: utf-8 -*-
"""Mask, padding and batching."""
from
__future__
import
absolute_import
from
__future__
import
division
...
...
utils/configure.py
浏览文件 @
a4987b6a
...
...
@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -*- coding: utf-8 -*-
from
__future__
import
absolute_import
from
__future__
import
division
...
...
@@ -18,6 +19,7 @@ from __future__ import print_function
import
os
import
sys
import
io
import
argparse
import
json
import
yaml
...
...
@@ -38,7 +40,7 @@ class JsonConfig(object):
def
_parse
(
self
,
config_path
):
try
:
with
open
(
config_path
)
as
json_file
:
with
io
.
open
(
config_path
,
encoding
=
"utf8"
)
as
json_file
:
config_dict
=
json
.
load
(
json_file
)
assert
isinstance
(
config_dict
,
dict
),
"Object in {} is NOT a dict."
.
format
(
config_path
)
except
:
...
...
@@ -216,7 +218,7 @@ class PDConfig(object):
raise
Warning
(
"the json file %s does not exist."
%
file_path
)
return
with
open
(
file_path
,
"r
"
)
as
fin
:
with
io
.
open
(
file_path
,
"r"
,
encoding
=
"utf8
"
)
as
fin
:
self
.
json_config
=
json
.
loads
(
fin
.
read
())
fin
.
close
()
...
...
@@ -241,7 +243,7 @@ class PDConfig(object):
raise
Warning
(
"the yaml file %s does not exist."
%
file_path
)
return
with
open
(
file_path
,
"r
"
)
as
fin
:
with
io
.
open
(
file_path
,
"r"
,
encoding
=
"utf8
"
)
as
fin
:
self
.
yaml_config
=
yaml
.
load
(
fin
,
Loader
=
yaml
.
SafeLoader
)
fin
.
close
()
...
...
utils/fp16.py
浏览文件 @
a4987b6a
...
...
@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -*- coding: utf-8 -*-
from
__future__
import
print_function
import
paddle
...
...
utils/init.py
浏览文件 @
a4987b6a
...
...
@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -*- coding: utf-8 -*-
from
__future__
import
print_function
...
...
utils/tokenization.py
浏览文件 @
a4987b6a
...
...
@@ -20,7 +20,7 @@ from __future__ import print_function
import
collections
import
unicodedata
import
six
import
io
def
convert_to_unicode
(
text
):
"""Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
...
...
@@ -68,15 +68,15 @@ def printable_text(text):
def
load_vocab
(
vocab_file
):
"""Loads a vocabulary file into a dictionary."""
vocab
=
collections
.
OrderedDict
()
fin
=
open
(
vocab_file
)
for
num
,
line
in
enumerate
(
fin
):
items
=
convert_to_unicode
(
line
.
strip
()).
split
(
"
\t
"
)
if
len
(
items
)
>
2
:
break
token
=
items
[
0
]
index
=
items
[
1
]
if
len
(
items
)
==
2
else
num
token
=
token
.
strip
()
vocab
[
token
]
=
int
(
index
)
with
io
.
open
(
vocab_file
,
encoding
=
"utf8"
)
as
fin
:
for
num
,
line
in
enumerate
(
fin
):
items
=
convert_to_unicode
(
line
.
strip
()).
split
(
"
\t
"
)
if
len
(
items
)
>
2
:
break
token
=
items
[
0
]
index
=
items
[
1
]
if
len
(
items
)
==
2
else
num
token
=
token
.
strip
()
vocab
[
token
]
=
int
(
index
)
return
vocab
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录