未验证 提交 3c4d4065 编写于 作者: T Tao Luo 提交者: GitHub

Merge pull request #5775 from luotao1/ProtoData

remove ProtoData
......@@ -11,7 +11,6 @@ add_unittest_without_exec(test_Trainer
test_Trainer.cpp)
add_test(NAME test_Trainer
COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/
${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/paddle/trainer/tests/gen_proto_data.py &&
${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/
${CMAKE_CURRENT_BINARY_DIR}/test_Trainer
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
......
#edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
TrainData(ProtoData(
files = 'trainer/tests/train_files.txt',
usage_ratio = 1.0,
))
TestData(ProtoData(
files = 'trainer/tests/test_files.txt'
))
default_initial_std(1)
default_decay_rate(4e-4)
default_device(0)
Inputs("features", "word", "pos", "chunk")
Outputs("crf")
Layer(
name = "features",
type = "data",
size = 4339,
)
Layer(
name = "word",
type = "data",
size = 478,
)
Layer(
name = "pos",
type = "data",
size = 45
)
Layer(
name = "chunk",
type = "data",
size = 23
)
Layer(
name = "output",
type = "mixed",
size = 23,
bias = False,
device = -1,
inputs = [
FullMatrixProjection("features", parameter_name="feature_weights"),
# TableProjection("word"),
# TableProjection("pos"),
],
)
Layer(
name = "crf",
type = "crf",
size = 23,
device = -1,
inputs = [
Input("output", parameter_name="crfw"),
"chunk"
]
)
Layer(
name = "crf_decoding",
type = "crf_decoding",
size = 23,
device = -1,
inputs = [
Input("output", parameter_name="crfw"),
"chunk"
]
)
Evaluator(
name = "error",
type = "sum",
inputs = "crf_decoding",
)
'''
# chuck evaluator cannot be used for GPU training
Evaluator(
name = "chunk_f1",
type = "chunk",
inputs = ["crf_decoding", "chunk"],
chunk_scheme = "IOB",
num_chunk_types = 11,
)
'''
Settings(
algorithm = 'sgd',
batch_size = 100,
average_window = 0.5,
max_average_window = 2500,
learning_rate = 1e-1,
learning_rate_decay_a = 5e-7,
learning_rate_decay_b = 0.75,
l1weight = 0,
l2weight = 1,
c1 = 0.0001,
backoff = 0.5,
owlqn_steps = 100,
max_backoff = 5,
)
因为 它太大了无法显示 source diff 。你可以改为 查看blob
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from cStringIO import StringIO
import paddle.proto.DataFormat_pb2 as DataFormat
from google.protobuf.internal.encoder import _EncodeVarint
import logging
import pprint
logging.basicConfig(
format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s', )
logger = logging.getLogger('paddle')
logger.setLevel(logging.INFO)
OOV_POLICY_IGNORE = 0
OOV_POLICY_USE = 1
OOV_POLICY_ERROR = 2
num_original_columns = 3
# Feature combination patterns.
# [[-1,0], [0,0]] means previous token at column 0 and current token at
# column 0 are combined as one feature.
patterns = [
[[-2, 0]],
[[-1, 0]],
[[0, 0]],
[[1, 0]],
[[2, 0]],
[[-1, 0], [0, 0]],
[[0, 0], [1, 0]],
[[-2, 1]],
[[-1, 1]],
[[0, 1]],
[[1, 1]],
[[2, 1]],
[[-2, 1], [-1, 1]],
[[-1, 1], [0, 1]],
[[0, 1], [1, 1]],
[[1, 1], [2, 1]],
[[-2, 1], [-1, 1], [0, 1]],
[[-1, 1], [0, 1], [1, 1]],
[[0, 1], [1, 1], [2, 1]],
]
def make_features(sequence):
length = len(sequence)
num_features = len(sequence[0])
def get_features(pos):
if pos < 0:
return ['#B%s' % -pos] * num_features
if pos >= length:
return ['#E%s' % (pos - length + 1)] * num_features
return sequence[pos]
for i in xrange(length):
for pattern in patterns:
fname = '/'.join([get_features(i + pos)[f] for pos, f in pattern])
sequence[i].append(fname)
'''
Source file format:
Each line is for one timestep. The features are separated by space.
An empty line indicates end of a sequence.
cutoff: a list of numbers. If count of a feature is smaller than this,
it will be ignored.
if oov_policy[i] is OOV_POLICY_USE, id 0 is reserved for OOV features of
i-th column.
return a list of dict for each column
'''
def create_dictionaries(filename, cutoff, oov_policy):
def add_to_dict(sequence, dicts):
num_features = len(dicts)
for features in sequence:
l = len(features)
assert l == num_features, "Wrong number of features " + line
for i in xrange(l):
if features[i] in dicts[i]:
dicts[i][features[i]] += 1
else:
dicts[i][features[i]] = 1
num_features = len(cutoff)
dicts = []
for i in xrange(num_features):
dicts.append(dict())
f = open(filename, 'rb')
sequence = []
for line in f:
line = line.strip()
if not line:
make_features(sequence)
add_to_dict(sequence, dicts)
sequence = []
continue
features = line.split(' ')
sequence.append(features)
for i in xrange(num_features):
dct = dicts[i]
n = 1 if oov_policy[i] == OOV_POLICY_USE else 0
todo = []
for k, v in dct.iteritems():
if v < cutoff[i]:
todo.append(k)
else:
dct[k] = n
n += 1
if oov_policy[i] == OOV_POLICY_USE:
# placeholder so that len(dct) will be the number of features
# including OOV
dct['#OOV#'] = 0
logger.info('column %d dict size=%d, ignored %d' % (i, n, len(todo)))
for k in todo:
del dct[k]
f.close()
return dicts
def encode_varint(v):
out = StringIO()
_EncodeVarint(out.write, v)
return out.getvalue()
def write_proto(file, message):
s = message.SerializeToString()
packed_len = encode_varint(len(s))
file.write(packed_len + s)
'''
if oov_policy[i] == OOV_POLICY_USE, features in i-th column which are not
existed in dicts[i] will be assigned to id 0.
if oov_policy[i] == OOV_POLICY_ERROR, all features in i-th column MUST exist
in dicts[i].
'''
def gen_proto_file(input_file, dicts, oov_policy, output_file):
def write_sequence(out, sequence):
num_features = len(dicts)
is_beginning = True
for features in sequence:
assert len(features) == num_features, \
"Wrong number of features: " + line
sample = DataFormat.DataSample()
for i in xrange(num_original_columns):
id = dicts[i].get(features[i], -1)
if id != -1:
sample.id_slots.append(id)
elif oov_policy[i] == OOV_POLICY_IGNORE:
sample.id_slots.append(0xffffffff)
elif oov_policy[i] == OOV_POLICY_ERROR:
logger.fatal("Unknown token: %s" % features[i])
else:
sample.id_slots.append(0)
if patterns:
dim = 0
vec = sample.vector_slots.add()
for i in xrange(num_original_columns, num_features):
id = dicts[i].get(features[i], -1)
if id != -1:
vec.ids.append(dim + id)
elif oov_policy[i] == OOV_POLICY_IGNORE:
pass
elif oov_policy[i] == OOV_POLICY_ERROR:
logger.fatal("Unknown token: %s" % features[i])
else:
vec.ids.append(dim + 0)
dim += len(dicts[i])
sample.is_beginning = is_beginning
is_beginning = False
write_proto(out, sample)
num_features = len(dicts)
f = open(input_file, 'rb')
out = open(output_file, 'wb')
header = DataFormat.DataHeader()
if patterns:
slot_def = header.slot_defs.add()
slot_def.type = DataFormat.SlotDef.VECTOR_SPARSE_NON_VALUE
slot_def.dim = sum(
[len(dicts[i]) for i in xrange(num_original_columns, len(dicts))])
logger.info("feature_dim=%s" % slot_def.dim)
for i in xrange(num_original_columns):
slot_def = header.slot_defs.add()
slot_def.type = DataFormat.SlotDef.INDEX
slot_def.dim = len(dicts[i])
write_proto(out, header)
num_sequences = 0
sequence = []
for line in f:
line = line.strip()
if not line:
make_features(sequence)
write_sequence(out, sequence)
sequence = []
num_sequences += 1
continue
features = line.split(' ')
sequence.append(features)
f.close()
out.close()
logger.info("num_sequences=%s" % num_sequences)
dict2 = {
'B-ADJP': 0,
'I-ADJP': 1,
'B-ADVP': 2,
'I-ADVP': 3,
'B-CONJP': 4,
'I-CONJP': 5,
'B-INTJ': 6,
'I-INTJ': 7,
'B-LST': 8,
'I-LST': 9,
'B-NP': 10,
'I-NP': 11,
'B-PP': 12,
'I-PP': 13,
'B-PRT': 14,
'I-PRT': 15,
'B-SBAR': 16,
'I-SBAR': 17,
'B-UCP': 18,
'I-UCP': 19,
'B-VP': 20,
'I-VP': 21,
'O': 22
}
if __name__ == '__main__':
cutoff = [3, 1, 0]
cutoff += [3] * len(patterns)
oov_policy = [OOV_POLICY_IGNORE, OOV_POLICY_ERROR, OOV_POLICY_ERROR]
oov_policy += [OOV_POLICY_IGNORE] * len(patterns)
dicts = create_dictionaries('trainer/tests/train.txt', cutoff, oov_policy)
dicts[2] = dict2
gen_proto_file('trainer/tests/train.txt', dicts, oov_policy,
'trainer/tests/train_proto.bin')
gen_proto_file('trainer/tests/test.txt', dicts, oov_policy,
'trainer/tests/test_proto.bin')
Confidence NN B-NP
in IN B-PP
the DT B-NP
pound NN I-NP
is VBZ B-VP
widely RB I-VP
expected VBN I-VP
to TO I-VP
take VB I-VP
another DT B-NP
sharp JJ I-NP
dive NN I-NP
if IN B-SBAR
trade NN B-NP
figures NNS I-NP
for IN B-PP
September NNP B-NP
, , O
due JJ B-ADJP
for IN B-PP
release NN B-NP
tomorrow NN B-NP
, , O
fail VB B-VP
to TO I-VP
show VB I-VP
a DT B-NP
substantial JJ I-NP
improvement NN I-NP
from IN B-PP
July NNP B-NP
and CC I-NP
August NNP I-NP
's POS B-NP
near-record JJ I-NP
deficits NNS I-NP
. . O
Chancellor NNP O
of IN B-PP
the DT B-NP
Exchequer NNP I-NP
Nigel NNP B-NP
Lawson NNP I-NP
's POS B-NP
restated VBN I-NP
commitment NN I-NP
to TO B-PP
a DT B-NP
firm NN I-NP
monetary JJ I-NP
policy NN I-NP
has VBZ B-VP
helped VBN I-VP
to TO I-VP
prevent VB I-VP
a DT B-NP
freefall NN I-NP
in IN B-PP
sterling NN B-NP
over IN B-PP
the DT B-NP
past JJ I-NP
week NN I-NP
. . O
But CC O
analysts NNS B-NP
reckon VBP B-VP
underlying VBG B-NP
support NN I-NP
for IN B-PP
sterling NN B-NP
has VBZ B-VP
been VBN I-VP
eroded VBN I-VP
by IN B-PP
the DT B-NP
chancellor NN I-NP
's POS B-NP
failure NN I-NP
to TO B-VP
announce VB I-VP
any DT B-NP
new JJ I-NP
policy NN I-NP
measures NNS I-NP
in IN B-PP
his PRP$ B-NP
Mansion NNP I-NP
House NNP I-NP
speech NN I-NP
last JJ B-NP
Thursday NNP I-NP
. . O
This DT B-NP
has VBZ B-VP
increased VBN I-VP
the DT B-NP
risk NN I-NP
of IN B-PP
the DT B-NP
government NN I-NP
being VBG B-VP
forced VBN I-VP
to TO I-VP
increase VB I-VP
base NN B-NP
rates NNS I-NP
to TO B-PP
16 CD B-NP
% NN I-NP
from IN B-PP
their PRP$ B-NP
current JJ I-NP
15 CD I-NP
% NN I-NP
level NN I-NP
to TO B-VP
defend VB I-VP
the DT B-NP
pound NN I-NP
, , O
economists NNS B-NP
and CC O
foreign JJ B-NP
exchange NN I-NP
market NN I-NP
analysts NNS I-NP
say VBP B-VP
. . O
`` `` O
The DT B-NP
risks NNS I-NP
for IN B-PP
sterling NN B-NP
of IN B-PP
a DT B-NP
bad JJ I-NP
trade NN I-NP
figure NN I-NP
are VBP B-VP
very RB B-ADVP
heavily RB I-ADVP
on IN B-PP
the DT B-NP
down JJ I-NP
side NN I-NP
, , O
'' '' O
said VBD B-VP
Chris NNP B-NP
Dillow NNP I-NP
, , O
senior JJ B-NP
U.K. NNP I-NP
economist NN I-NP
at IN B-PP
Nomura NNP B-NP
Research NNP I-NP
Institute NNP I-NP
. . O
`` `` O
If IN B-SBAR
there EX B-NP
is VBZ B-VP
another DT B-NP
bad JJ I-NP
trade NN I-NP
number NN I-NP
, , O
there EX B-NP
could MD B-VP
be VB I-VP
an DT B-NP
awful JJ I-NP
lot NN I-NP
of IN B-PP
pressure NN B-NP
, , O
'' '' O
noted VBD B-VP
Simon NNP B-NP
Briscoe NNP I-NP
, , O
U.K. NNP B-NP
economist NN I-NP
for IN B-PP
Midland NNP B-NP
Montagu NNP I-NP
, , O
a DT B-NP
unit NN I-NP
of IN B-PP
Midland NNP B-NP
Bank NNP I-NP
PLC NNP I-NP
. . O
Forecasts NNS B-NP
for IN B-PP
the DT B-NP
trade NN I-NP
figures NNS I-NP
range VBP B-VP
widely RB B-ADVP
, , O
but CC O
few JJ B-NP
economists NNS I-NP
expect VBP B-VP
the DT B-NP
data NNS I-NP
to TO B-VP
show VB I-VP
a DT B-NP
very RB I-NP
marked VBN I-NP
improvement NN I-NP
from IN B-PP
the DT O
# # O
2 CD O
billion CD O
-LRB- ( O
$ $ B-ADJP
3.2 CD O
billion CD O
-RRB- ) O
deficit NN B-NP
in IN B-PP
the DT B-NP
current JJ I-NP
account NN I-NP
reported VBD B-VP
for IN B-PP
August NNP B-NP
. . O
The DT B-NP
August NNP I-NP
deficit NN I-NP
and CC O
the DT B-NP
# # I-NP
2.2 CD I-NP
billion CD I-NP
gap NN I-NP
registered VBN B-VP
in IN B-PP
July NNP B-NP
are VBP B-VP
topped VBN I-VP
only RB B-ADVP
by IN B-PP
the DT B-NP
# # I-NP
2.3 CD I-NP
billion CD I-NP
deficit NN I-NP
of IN B-PP
October NNP B-NP
1988 CD I-NP
. . O
Sanjay NNP B-NP
Joshi NNP I-NP
, , O
European JJ B-NP
economist NN I-NP
at IN B-PP
Baring NNP B-NP
Brothers NNPS I-NP
& CC I-NP
Co. NNP I-NP
, , O
said VBD B-VP
there EX B-NP
is VBZ B-VP
no DT B-NP
sign NN I-NP
that IN B-SBAR
Britain NNP B-NP
's POS B-NP
manufacturing NN I-NP
industry NN I-NP
is VBZ B-VP
transforming VBG I-VP
itself PRP B-NP
to TO B-VP
boost VB I-VP
exports NNS B-NP
. . O
At IN B-PP
the DT B-NP
same JJ I-NP
time NN I-NP
, , O
he PRP B-NP
remains VBZ B-VP
fairly RB B-ADJP
pessimistic JJ I-ADJP
about IN B-PP
the DT B-NP
outlook NN I-NP
for IN B-PP
imports NNS B-NP
, , O
given VBN B-PP
continued VBD B-NP
high JJ I-NP
consumer NN I-NP
and CC I-NP
capital NN I-NP
goods NNS I-NP
inflows NNS I-NP
. . O
He PRP B-NP
reckons VBZ B-VP
the DT B-NP
current JJ I-NP
account NN I-NP
deficit NN I-NP
will MD B-VP
narrow VB I-VP
to TO B-PP
only RB B-NP
# # I-NP
1.8 CD I-NP
billion CD I-NP
in IN B-PP
September NNP B-NP
. . O
However RB B-ADVP
, , O
Mr. NNP B-NP
Dillow NNP I-NP
said VBD B-VP
he PRP B-NP
believes VBZ B-VP
that IN B-SBAR
a DT B-NP
reduction NN I-NP
in IN B-PP
raw JJ B-NP
material NN I-NP
stockbuilding VBG I-NP
by IN B-PP
industry NN B-NP
could MD B-VP
lead VB I-VP
to TO B-PP
a DT B-NP
sharp JJ I-NP
drop NN I-NP
in IN B-PP
imports NNS B-NP
. . O
Combined VBN B-PP
with IN B-PP
at IN B-ADVP
least JJS I-ADVP
some DT B-NP
rebound NN I-NP
in IN B-PP
exports NNS B-NP
after IN B-PP
August NNP B-NP
's POS B-NP
unexpected JJ I-NP
decline NN I-NP
, , O
the DT B-NP
deficit NN I-NP
could MD B-VP
narrow VB I-VP
to TO B-PP
as RB B-NP
little JJ I-NP
as IN I-NP
# # I-NP
1.3 CD I-NP
billion CD I-NP
. . O
Mr. NNP B-NP
Briscoe NNP I-NP
, , O
who WP B-NP
also RB B-ADVP
forecasts VBZ B-VP
a DT B-NP
# # I-NP
1.3 CD I-NP
billion CD I-NP
current JJ I-NP
account NN I-NP
gap NN I-NP
, , O
warns VBZ B-VP
that IN B-SBAR
even RB B-SBAR
if IN I-SBAR
the DT B-NP
trade NN I-NP
figures NNS I-NP
are VBP B-VP
bullish JJ B-ADJP
for IN B-PP
sterling NN B-NP
, , O
the DT B-NP
currency NN I-NP
wo MD B-VP
n't RB I-VP
advance VB I-VP
much JJ B-NP
because IN B-SBAR
investors NNS B-NP
will MD B-VP
want VB I-VP
to TO I-VP
see VB I-VP
further JJ B-NP
evidence NN I-NP
of IN B-PP
the DT B-NP
turnaround NN I-NP
before IN B-PP
adjusting VBG B-VP
positions NNS B-NP
. . O
Nevertheless RB B-ADVP
, , O
he PRP B-NP
noted VBD B-VP
, , O
`` `` O
No DT B-NP
one PRP I-NP
will MD B-VP
want VB I-VP
to TO I-VP
go VB I-VP
into IN B-PP
the DT B-NP
trade NN I-NP
figures NNS I-NP
without IN B-PP
a DT B-NP
flat JJ I-NP
position NN I-NP
'' '' O
in IN B-PP
the DT B-NP
pound NN I-NP
. . O
Meanwhile RB B-ADVP
, , O
overall JJ B-NP
evidence NN I-NP
on IN B-PP
the DT B-NP
economy NN I-NP
remains VBZ B-VP
fairly RB B-ADJP
clouded VBN I-ADJP
. . O
In IN B-PP
his PRP$ B-NP
Mansion NNP I-NP
House NNP I-NP
speech NN I-NP
, , O
Mr. NNP B-NP
Lawson NNP I-NP
warned VBD B-VP
that IN B-SBAR
a DT B-NP
further JJ I-NP
slowdown NN I-NP
can MD B-VP
be VB I-VP
expected VBN I-VP
as IN B-SBAR
the DT B-NP
impact NN I-NP
of IN B-PP
the DT B-NP
last JJ I-NP
rise NN I-NP
in IN B-PP
interest NN B-NP
rates NNS I-NP
earlier RBR B-NP
this DT I-NP
month NN I-NP
takes VBZ B-VP
effect NN B-NP
. . O
U.K. JJ B-NP
base NN I-NP
rates NNS I-NP
are VBP B-VP
at IN B-PP
their PRP$ B-NP
highest JJS I-NP
level NN I-NP
in IN B-PP
eight CD B-NP
years NNS I-NP
. . O
But CC O
consumer NN B-NP
expenditure NN I-NP
data NNS I-NP
released VBD B-VP
Friday NNP B-NP
do VBP B-VP
n't RB I-VP
suggest VB I-VP
that IN B-SBAR
the DT B-NP
U.K. NNP I-NP
economy NN I-NP
is VBZ B-VP
slowing VBG I-VP
that DT B-ADVP
quickly RB I-ADVP
. . O
The DT B-NP
figures NNS I-NP
show VBP B-VP
that DT O
spending NN B-NP
rose VBD B-VP
0.1 CD B-NP
% NN I-NP
in IN B-PP
the DT B-NP
third JJ I-NP
quarter NN I-NP
from IN B-PP
the DT B-NP
second JJ I-NP
quarter NN I-NP
and CC O
was VBD B-VP
up IN B-ADVP
3.8 CD B-NP
% NN I-NP
from IN B-PP
a DT B-NP
year NN I-NP
ago RB B-ADVP
. . O
This DT B-NP
compares VBZ B-VP
with IN B-PP
a DT B-NP
1.6 CD I-NP
% NN I-NP
rise NN I-NP
in IN B-PP
the DT B-NP
second NN I-NP
from IN B-PP
the DT B-NP
first JJ I-NP
quarter NN I-NP
and CC O
a DT B-NP
5.4 CD I-NP
% NN I-NP
increase NN I-NP
from IN B-PP
the DT B-NP
second JJ I-NP
quarter NN I-NP
of IN B-PP
1988 CD B-NP
. . O
Mr. NNP B-NP
Dillow NNP I-NP
said VBD B-VP
the DT B-NP
data NNS I-NP
show VBP B-VP
the DT B-NP
economy NN I-NP
`` `` O
is VBZ B-VP
still RB B-ADVP
quite RB B-ADJP
strong JJ I-ADJP
, , O
'' '' O
but CC O
suggestions NNS B-NP
that IN B-SBAR
much NN B-NP
of IN B-PP
the DT B-NP
spending NN I-NP
went VBD B-VP
on IN B-PP
services NNS B-NP
rather RB B-PP
than IN I-PP
consumer NN B-NP
goods NNS I-NP
should MD B-VP
reduce VB I-VP
fears NNS B-NP
of IN B-PP
more JJR B-NP
import NN I-NP
rises NNS I-NP
. . O
Certainly RB B-ADVP
, , O
the DT B-NP
chancellor NN I-NP
has VBZ B-VP
made VBN I-VP
it PRP B-NP
clear JJ B-ADJP
that IN B-SBAR
he PRP B-NP
is VBZ B-VP
prepared VBN I-VP
to TO I-VP
increase VB I-VP
interest NN B-NP
rates NNS I-NP
again RB B-ADVP
if IN B-SBAR
necessary JJ B-ADJP
to TO B-VP
both DT I-VP
ensure VB I-VP
that IN B-SBAR
a DT B-NP
substantial JJ I-NP
slowdown NN I-NP
does VBZ B-VP
take VB I-VP
place NN B-NP
and CC O
that DT O
sterling NN B-NP
does VBZ B-VP
n't RB I-VP
decline VB I-VP
further JJ B-ADVP
. . O
Thursday NNP B-NP
, , O
he PRP B-NP
reminded VBD B-VP
his PRP$ B-NP
audience NN I-NP
that IN B-SBAR
the DT B-NP
government NN I-NP
`` `` O
can MD B-VP
not RB I-VP
allow VB I-VP
the DT B-NP
necessary JJ I-NP
rigor NN I-NP
of IN B-PP
monetary JJ B-NP
policy NN I-NP
to TO B-VP
be VB I-VP
undermined VBN I-VP
by IN B-PP
exchange NN B-NP
rate NN I-NP
weakness NN I-NP
. . O
'' '' O
Analysts NNS B-NP
agree VBP B-VP
there EX B-NP
is VBZ B-VP
little JJ B-NP
holding NN B-VP
sterling NN B-NP
firm NN B-ADJP
at IN B-PP
the DT B-NP
moment NN I-NP
other JJ B-ADJP
than IN B-PP
Mr. NNP B-NP
Lawson NNP I-NP
's POS B-NP
promise NN I-NP
that IN B-SBAR
rates NNS B-NP
will MD B-VP
be VB I-VP
pushed VBN I-VP
higher JJR B-ADJP
if IN B-SBAR
necessary JJ B-ADJP
. . O
And CC O
, , O
they PRP B-NP
warn VBP B-VP
, , O
any DT B-NP
further JJ I-NP
drop NN I-NP
in IN B-PP
the DT B-NP
government NN I-NP
's POS B-NP
popularity NN I-NP
could MD B-VP
swiftly RB I-VP
make VB I-VP
this DT B-NP
promise NN I-NP
sound NN B-VP
hollow JJ B-ADJP
. . O
Sterling NNP B-NP
was VBD B-VP
already RB I-VP
showing VBG I-VP
some DT B-NP
signs NNS I-NP
of IN B-PP
a DT B-NP
lack NN I-NP
of IN B-PP
confidence NN B-NP
in IN B-PP
Mr. NNP B-NP
Lawson NNP I-NP
's POS B-NP
promise NN I-NP
Friday NNP B-NP
. . O
In IN B-PP
European JJ B-NP
trading NN I-NP
it PRP B-NP
declined VBD B-VP
to TO B-PP
$ $ B-NP
1.5890 CD I-NP
and CC O
2.9495 CD B-NP
marks NNS I-NP
from IN B-PP
$ $ B-NP
1.5940 CD I-NP
and CC O
2.9429 CD B-NP
marks NNS I-NP
late JJ B-NP
Thursday NNP I-NP
. . O
Economists NNS B-NP
suggested VBD B-VP
that IN B-SBAR
if IN B-SBAR
the DT B-NP
pound NN I-NP
falls VBZ B-VP
much JJ B-NP
below IN B-PP
2.90 CD B-NP
marks NNS I-NP
, , O
the DT B-NP
government NN I-NP
will MD B-VP
be VB I-VP
forced VBN I-VP
to TO I-VP
increase VB I-VP
rates NNS B-NP
to TO B-PP
16 CD B-NP
% NN I-NP
, , O
both DT B-VP
to TO I-VP
halt VB B-VP
any DT B-NP
further JJ I-NP
decline NN I-NP
and CC O
ensure VB B-VP
that IN B-SBAR
the DT B-NP
balance NN I-NP
of IN B-PP
monetary JJ B-NP
policy NN I-NP
remains VBZ B-VP
unchanged JJ B-ADJP
. . O
Friday NNP B-NP
's POS B-NP
Market NNP I-NP
Activity NN I-NP
The DT B-NP
dollar NN I-NP
posted VBD B-VP
gains NNS B-NP
in IN B-PP
quiet JJ B-NP
trading NN I-NP
as IN B-SBAR
concerns NNS B-NP
about IN B-PP
equities NNS B-NP
abated VBN B-VP
. . O
Foreign JJ B-NP
exchange NN I-NP
dealers NNS I-NP
said VBD B-VP
that IN B-SBAR
the DT B-NP
currency NN I-NP
market NN I-NP
has VBZ B-VP
begun VBN I-VP
to TO I-VP
distance VB I-VP
itself PRP B-NP
from IN B-PP
the DT B-NP
volatile JJ I-NP
stock NN I-NP
exchange NN I-NP
, , O
which WDT B-NP
has VBZ B-VP
preoccupied VBN I-VP
the DT B-NP
market NN I-NP
since IN B-PP
Oct. NNP B-NP
13 CD I-NP
, , O
when WRB B-ADVP
the DT B-NP
Dow NNP I-NP
Jones NNP I-NP
Industrial NNP I-NP
Average NNP I-NP
plunged VBD B-VP
more JJR B-NP
than IN I-NP
190 CD I-NP
points NNS I-NP
. . O
Currency NN B-NP
analysts NNS I-NP
predict VBP B-VP
that IN B-SBAR
in IN B-PP
the DT B-NP
coming VBG I-NP
week NN I-NP
the DT B-NP
foreign JJ I-NP
exchange NN I-NP
market NN I-NP
will MD B-VP
shift VB I-VP
its PRP$ B-NP
focus NN I-NP
back RB B-ADVP
to TO B-PP
economic JJ B-NP
fundamentals NNS I-NP
, , O
keeping VBG B-VP
a DT B-NP
close NN I-NP
eye NN I-NP
out IN B-ADVP
for IN B-PP
any DT B-NP
signs NNS I-NP
of IN B-PP
monetary JJ B-NP
easing NN I-NP
by IN B-PP
U.S. NNP B-NP
Federal NNP I-NP
Reserve NNP I-NP
. . O
Late RB B-ADVP
in IN B-PP
the DT B-NP
New NNP I-NP
York NNP I-NP
trading NN I-NP
day NN I-NP
, , O
the DT B-NP
dollar NN I-NP
was VBD B-VP
quoted VBN I-VP
at IN B-PP
1.8578 CD B-NP
marks NNS I-NP
, , O
up IN B-ADVP
from IN B-PP
1.8470 CD B-NP
marks NNS I-NP
late JJ B-NP
Thursday NNP I-NP
in IN B-PP
New NNP B-NP
York NNP I-NP
. . O
The DT B-NP
U.S. NNP I-NP
currency NN I-NP
was VBD B-VP
also RB I-VP
changing VBG I-VP
hands NNS B-NP
at IN B-PP
142.43 CD B-NP
yen NN I-NP
, , O
up IN B-ADVP
from IN B-PP
141.70 CD B-NP
yen NN I-NP
in IN B-PP
New NNP B-NP
York NNP I-NP
late JJ B-NP
Thursday NNP I-NP
. . O
In IN B-PP
Tokyo NNP B-NP
on IN B-PP
Monday NNP B-NP
, , O
the DT B-NP
U.S. NNP I-NP
currency NN I-NP
opened VBD B-VP
for IN B-PP
trading NN B-NP
at IN B-PP
141.95 CD B-NP
yen NN I-NP
, , O
up IN B-ADVP
from IN B-PP
Friday NNP B-NP
's POS B-NP
Tokyo NNP I-NP
......@@ -24,7 +24,6 @@ using namespace std; // NOLINT
static const string& configFile1 = "trainer/tests/sample_trainer_config.conf";
static const string& configFile2 =
"trainer/tests/sample_trainer_config_hsigmoid.conf";
static const string& configFile3 = "trainer/tests/chunking.conf";
static const string& configFile4 =
"trainer/tests/sample_trainer_config_parallel.conf";
......@@ -95,13 +94,6 @@ TEST(checkGradient, multi) {
TEST(checkGradient, hsigmoid) { checkGradientTest(configFile2, false, false); }
TEST(checkGradient, chunk) {
checkGradientTest(configFile3, false, false);
#ifdef PADDLE_WITH_CUDA
checkGradientTest(configFile3, true, true);
#endif
}
TEST(checkGradient, non_parallel) {
checkGradientTest(configFile4, false, false);
}
......
......@@ -15,12 +15,7 @@
from paddle.trainer_config_helpers import *
TrainData(ProtoData(
files = "dummy_list",
constant_slots = [1.0],
async_load_data = True))
TestData(SimpleData(
TrainData(SimpleData(
files = "trainer/tests/sample_filelist.txt",
feat_dim = 3,
context_len = 0,
......
此差异已折叠。
......@@ -1116,35 +1116,6 @@ def PyData(files=None,
return data_config
@config_func
def ProtoData(files=None,
type=None,
file_group_queue_capacity=None,
load_file_count=None,
constant_slots=None,
load_thread_num=None,
**xargs):
data_config = create_data_config_proto(**xargs)
if type is None:
data_config.type = 'proto'
else:
data_config.type = type
data_config.files = files
# When type="proto_group", one data provider contains at most
# load_file_count files, and there are at most
# (queue_capacity + load_thread_num + 1) data providers in memory
if file_group_queue_capacity is not None:
data_config.file_group_conf.queue_capacity = file_group_queue_capacity
if load_file_count is not None:
data_config.file_group_conf.load_file_count = load_file_count
if load_thread_num is not None:
data_config.file_group_conf.load_thread_num = load_thread_num
if constant_slots:
data_config.constant_slots.extend(constant_slots)
return data_config
#real data for training is actually provided by "sub_data" data providers.
@config_func
def MultiData(sub_data=[]):
......@@ -2714,7 +2685,7 @@ Usage:
max_sort_size = -1, inputs = ["output", "score"])
Input data: Samples of the same query should be loaded as a sequence,
by ProtoDataProvider or PyDataProvider etc.. User should provide
by PyDataProvider etc.. User should provide
scores for each sample. The score slot should be the 2nd
input of lambdaRank layer.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册