提交 c915a749 编写于 作者: Z zhangxuefei

Fix the bug that read_csv compatibility between py2 and py3

上级 2a8e0688
export CUDA_VISIBLE_DEVICES=3 export CUDA_VISIBLE_DEVICES=0
# User can select chnsenticorp, nlpcc_dbqa, lcqmc for different task # User can select chnsenticorp, nlpcc_dbqa, lcqmc for different task
DATASET="chnsenticorp" DATASET="chnsenticorp"
......
...@@ -11,6 +11,10 @@ ...@@ -11,6 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
#coding:utf-8
import six
from . import module from . import module
from . import common from . import common
from . import io from . import io
...@@ -41,3 +45,8 @@ from .finetune.finetune import finetune_and_eval ...@@ -41,3 +45,8 @@ from .finetune.finetune import finetune_and_eval
from .finetune.config import RunConfig from .finetune.config import RunConfig
from .finetune.strategy import AdamWeightDecayStrategy from .finetune.strategy import AdamWeightDecayStrategy
from .finetune.strategy import DefaultStrategy from .finetune.strategy import DefaultStrategy
if six.PY2:
import sys
reload(sys)
sys.setdefaultencoding("UTF-8")
\ No newline at end of file
...@@ -17,6 +17,7 @@ from __future__ import division ...@@ -17,6 +17,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
from collections import namedtuple from collections import namedtuple
import codecs
import os import os
import csv import csv
...@@ -79,7 +80,7 @@ class ChnSentiCorp(HubDataset): ...@@ -79,7 +80,7 @@ class ChnSentiCorp(HubDataset):
def _read_tsv(self, input_file, quotechar=None): def _read_tsv(self, input_file, quotechar=None):
"""Reads a tab separated value file.""" """Reads a tab separated value file."""
with open(input_file, "r") as f: with codecs.open(input_file, "r", encoding="UTF-8") as f:
reader = csv.reader(f, delimiter="\t", quotechar=quotechar) reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
examples = [] examples = []
seq_id = 0 seq_id = 0
......
...@@ -17,6 +17,7 @@ from __future__ import division ...@@ -17,6 +17,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
from collections import namedtuple from collections import namedtuple
import codecs
import os import os
import csv import csv
...@@ -75,7 +76,7 @@ class LCQMC(HubDataset): ...@@ -75,7 +76,7 @@ class LCQMC(HubDataset):
def _read_tsv(self, input_file, quotechar=None): def _read_tsv(self, input_file, quotechar=None):
"""Reads a tab separated value file.""" """Reads a tab separated value file."""
with open(input_file, "r") as f: with codecs.open(input_file, "r", encoding="UTF-8") as f:
reader = csv.reader(f, delimiter="\t", quotechar=quotechar) reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
examples = [] examples = []
seq_id = 0 seq_id = 0
......
...@@ -17,8 +17,10 @@ from __future__ import division ...@@ -17,8 +17,10 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import os import os
import codecs
import csv import csv
import json import json
import six
from collections import namedtuple from collections import namedtuple
from paddlehub.dataset import InputExample, HubDataset from paddlehub.dataset import InputExample, HubDataset
...@@ -85,7 +87,7 @@ class MSRA_NER(HubDataset): ...@@ -85,7 +87,7 @@ class MSRA_NER(HubDataset):
def _read_tsv(self, input_file, quotechar=None): def _read_tsv(self, input_file, quotechar=None):
"""Reads a tab separated value file.""" """Reads a tab separated value file."""
with open(input_file, "r") as f: with codecs.open(input_file, "r", encoding="UTF-8") as f:
reader = csv.reader(f, delimiter="\t", quotechar=quotechar) reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
examples = [] examples = []
seq_id = 0 seq_id = 0
......
...@@ -17,6 +17,7 @@ from __future__ import division ...@@ -17,6 +17,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
from collections import namedtuple from collections import namedtuple
import codecs
import os import os
import csv import csv
...@@ -81,7 +82,7 @@ class NLPCC_DBQA(HubDataset): ...@@ -81,7 +82,7 @@ class NLPCC_DBQA(HubDataset):
def _read_tsv(self, input_file, quotechar=None): def _read_tsv(self, input_file, quotechar=None):
"""Reads a tab separated value file.""" """Reads a tab separated value file."""
with open(input_file, "r") as f: with codecs.open(input_file, "r", encoding="UTF-8") as f:
reader = csv.reader(f, delimiter="\t", quotechar=quotechar) reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
examples = [] examples = []
seq_id = 0 seq_id = 0
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册