chunking.conf

#edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.

TrainData(ProtoData(
  files = 'trainer/tests/train_files.txt',
  usage_ratio = 1.0,
))

TestData(ProtoData(
  files = 'trainer/tests/test_files.txt'
))

default_initial_std(1)
default_decay_rate(4e-4)
default_device(0)

Inputs("features", "word", "pos", "chunk")

Outputs("crf")

Layer(
    name = "features",
    type = "data",
    size = 4339,
)

Layer(
    name = "word",
    type = "data",
    size = 478,
)

Layer(
    name = "pos",
    type = "data",
    size = 45
)

Layer(
    name = "chunk",
    type = "data",
    size = 23
)

Layer(
    name = "output",
    type = "mixed",
    size = 23,
    bias = False,
    device = -1,
    inputs = [
        FullMatrixProjection("features", parameter_name="feature_weights"),
    #    TableProjection("word"),
    #    TableProjection("pos"),
    ],
)

Layer(
    name = "crf",
    type = "crf",
    size = 23,
    device = -1,
    inputs = [
        Input("output", parameter_name="crfw"),
        "chunk"
    ]
)

Layer(
    name = "crf_decoding",
    type = "crf_decoding",
    size = 23,
    device = -1,
    inputs = [
        Input("output", parameter_name="crfw"),
        "chunk"
    ]
)

Evaluator(
    name = "error",
    type = "sum",
    inputs = "crf_decoding",
)

'''
# chuck evaluator cannot be used for GPU training
Evaluator(
    name = "chunk_f1",
    type = "chunk",
    inputs = ["crf_decoding", "chunk"],
    chunk_scheme = "IOB",
    num_chunk_types = 11,
)
'''

Settings(
    algorithm = 'sgd',
    batch_size = 100,
    average_window = 0.5,
    max_average_window = 2500,
    learning_rate = 1e-1,
    learning_rate_decay_a = 5e-7,
    learning_rate_decay_b = 0.75,
    l1weight = 0,
    l2weight = 1,
    c1 = 0.0001,
    backoff = 0.5,
    owlqn_steps = 100,
    max_backoff = 5,
)