chunking.conf 2.4 KB
Newer Older
Z
zhangjinchao01 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.

TrainData(ProtoData(
  files = 'trainer/tests/train_files.txt',
  usage_ratio = 1.0,
))

TestData(ProtoData(
  files = 'trainer/tests/test_files.txt'
))

default_initial_std(1)
default_decay_rate(4e-4)
default_device(0)

Inputs("features", "word", "pos", "chunk")

Outputs("crf")

Layer(
    name = "features",
    type = "data",
    size = 4339,
)

Layer(
    name = "word",
    type = "data",
    size = 478,
)

Layer(
    name = "pos",
    type = "data",
    size = 45
)

Layer(
    name = "chunk",
    type = "data",
    size = 23
)

Layer(
    name = "output",
    type = "mixed",
    size = 23,
    bias = False,
    device = -1,
    inputs = [
        FullMatrixProjection("features", parameter_name="feature_weights"),
    #    TableProjection("word"),
    #    TableProjection("pos"),
    ],
)

Layer(
    name = "crf",
    type = "crf",
    size = 23,
    device = -1,
    inputs = [
        Input("output", parameter_name="crfw"),
        "chunk"
    ]
)

Layer(
    name = "crf_decoding",
    type = "crf_decoding",
    size = 23,
    device = -1,
    inputs = [
        Input("output", parameter_name="crfw"),
        "chunk"
    ]
)

Evaluator(
    name = "error",
    type = "sum",
    inputs = "crf_decoding",
)

'''
# chuck evaluator cannot be used for GPU training
Evaluator(
    name = "chunk_f1",
    type = "chunk",
    inputs = ["crf_decoding", "chunk"],
    chunk_scheme = "IOB",
    num_chunk_types = 11,
)
'''

Settings(
    algorithm = 'sgd',
    batch_size = 100,
    average_window = 0.5,
    max_average_window = 2500,
    learning_rate = 1e-1,
    learning_rate_decay_a = 5e-7,
    learning_rate_decay_b = 0.75,
    l1weight = 0,
    l2weight = 1,
    c1 = 0.0001,
    backoff = 0.5,
    owlqn_steps = 100,
    max_backoff = 5,
)