提交 6353576f 编写于 作者: R root

add an example of the NER task in fluid style

上级 3b77123b
##
# Utility functions for NER assignment
# Assigment 2, part 1 for CS224D
##
from utils import invert_dict
from numpy import *
def load_wv(vocabfile, wvfile):
wv = loadtxt(wvfile, dtype=float)
with open(vocabfile) as fd:
words = [line.strip() for line in fd]
num_to_word = dict(enumerate(words))
word_to_num = invert_dict(num_to_word)
return wv, word_to_num, num_to_word
def save_predictions(y, filename):
"""Save predictions, one per line."""
with open(filename, 'w') as fd:
fd.write("\n".join(map(str, y)))
fd.write("\n")
\ No newline at end of file
import sys, os, re, json
import itertools
from collections import Counter
import time
from numpy import *
import pandas as pd
def invert_dict(d):
return {v:k for k,v in d.iteritems()}
def flatten1(lst):
return list(itertools.chain.from_iterable(lst))
def load_wv_pandas(fname):
return pd.read_hdf(fname, 'data')
def extract_wv(df):
num_to_word = dict(enumerate(df.index))
word_to_num = invert_dict(num_to_word)
wv = df.as_matrix()
return wv, word_to_num, num_to_word
def canonicalize_digits(word):
if any([c.isalpha() for c in word]): return word
word = re.sub("\d", "DG", word)
if word.startswith("DG"):
word = word.replace(",", "") # remove thousands separator
return word
def canonicalize_word(word, wordset=None, digits=True):
word = word.lower()
if digits:
if (wordset != None) and (word in wordset): return word
word = canonicalize_digits(word) # try to canonicalize numbers
if (wordset == None) or (word in wordset): return word
else: return "UUUNKKK" # unknown token
##
# Utility functions used to create dataset
##
def augment_wv(df, extra=["UUUNKKK"]):
for e in extra:
df.loc[e] = zeros(len(df.columns))
def prune_wv(df, vocab, extra=["UUUNKKK"]):
"""Prune word vectors to vocabulary."""
items = set(vocab).union(set(extra))
return df.filter(items=items, axis='index')
def load_wv_raw(fname):
return pd.read_table(fname, sep="\s+",
header=None,
index_col=0,
quoting=3)
def load_dataset(fname):
docs = []
with open(fname) as fd:
cur = []
for line in fd:
# new sentence on -DOCSTART- or blank line
if re.match(r"-DOCSTART-.+", line) or (len(line.strip()) == 0):
if len(cur) > 0:
docs.append(cur)
cur = []
else: # read in tokens
cur.append(line.strip().split("\t",1))
# flush running buffer
docs.append(cur)
return docs
def extract_tag_set(docs):
tags = set(flatten1([[t[1].split("|")[0] for t in d] for d in docs]))
return tags
def extract_word_set(docs):
words = set(flatten1([[t[0] for t in d] for d in docs]))
return words
def pad_sequence(seq, left=1, right=1):
return left*[("<s>", "")] + seq + right*[("</s>", "")]
##
# For window models
def seq_to_windows(words, tags, word_to_num, tag_to_num, left=1, right=1):
ns = len(words)
X = []
y = []
for i in range(ns):
if words[i] == "<s>" or words[i] == "</s>":
continue # skip sentence delimiters
tagn = tag_to_num[tags[i]]
idxs = [word_to_num[words[ii]]
for ii in range(i - left, i + right + 1)]
X.append(idxs)
y.append(tagn)
return array(X), array(y)
def docs_to_windows(docs, word_to_num, tag_to_num, wsize=3):
pad = (wsize - 1)/2
docs = flatten1([pad_sequence(seq, left=pad, right=pad) for seq in docs])
words, tags = zip(*docs)
words = [canonicalize_word(w, word_to_num) for w in words]
tags = [t.split("|")[0] for t in tags]
return seq_to_windows(words, tags, word_to_num, tag_to_num, pad, pad)
def window_to_vec(window, L):
"""Concatenate word vectors for a given window."""
return concatenate([L[i] for i in window])
##
# For fixed-window LM:
# each row of X is a list of word indices
# each entry of y is the word index to predict
def seq_to_lm_windows(words, word_to_num, ngram=2):
ns = len(words)
X = []
y = []
for i in range(ns):
if words[i] == "<s>":
continue # skip sentence begin, but do predict end
idxs = [word_to_num[words[ii]]
for ii in range(i - ngram + 1, i + 1)]
X.append(idxs[:-1])
y.append(idxs[-1])
return array(X), array(y)
def docs_to_lm_windows(docs, word_to_num, ngram=2):
docs = flatten1([pad_sequence(seq, left=(ngram-1), right=1)
for seq in docs])
words = [canonicalize_word(wt[0], word_to_num) for wt in docs]
return seq_to_lm_windows(words, word_to_num, ngram)
##
# For RNN LM
# just convert each sentence to a list of indices
# after padding each with <s> ... </s> tokens
def seq_to_indices(words, word_to_num):
return array([word_to_num[w] for w in words])
def docs_to_indices(docs, word_to_num):
docs = [pad_sequence(seq, left=1, right=1) for seq in docs]
ret = []
for seq in docs:
words = [canonicalize_word(wt[0], word_to_num) for wt in seq]
ret.append(seq_to_indices(words, word_to_num))
# return as numpy array for fancier slicing
return array(ret, dtype=object)
def offset_seq(seq):
return seq[:-1], seq[1:]
def seqs_to_lmXY(seqs):
X, Y = zip(*[offset_seq(s) for s in seqs])
return array(X, dtype=object), array(Y, dtype=object)
##
# For RNN tagger
# return X, Y as lists
# where X[i] is indices, Y[i] is tags for a sequence
# NOTE: this does not use padding tokens!
# (RNN should natively handle begin/end)
def docs_to_tag_sequence(docs, word_to_num, tag_to_num):
# docs = [pad_sequence(seq, left=1, right=1) for seq in docs]
X = []
Y = []
for seq in docs:
if len(seq) < 1: continue
words, tags = zip(*seq)
words = [canonicalize_word(w, word_to_num) for w in words]
x = seq_to_indices(words, word_to_num)
X.append(x)
tags = [t.split("|")[0] for t in tags]
y = seq_to_indices(tags, tag_to_num)
Y.append(y)
# return as numpy array for fancier slicing
return array(X, dtype=object), array(Y, dtype=object)
def idxs_to_matrix(idxs, L):
"""Return a matrix X with each row
as a word vector for the corresponding
index in idxs."""
return vstack([L[i] for i in idxs])
\ No newline at end of file
class Model(object):
"""Abstracts a Tensorflow graph for a learning task.
We use various Model classes as usual abstractions to encapsulate tensorflow
computational graphs. Each algorithm you will construct in this homework will
inherit from a Model object.
"""
def load_data(self):
"""Loads data from disk and stores it in memory.
Feel free to add instance variables to Model object that store loaded data.
"""
raise NotImplementedError("Each Model must re-implement this method.")
def add_placeholders(self):
"""Adds placeholder variables to tensorflow computational graph.
Tensorflow uses placeholder variables to represent locations in a
computational graph where data is inserted. These placeholders are used as
inputs by the rest of the model building code and will be fed data during
training.
See for more information:
https://www.tensorflow.org/versions/r0.7/api_docs/python/io_ops.html#placeholders
"""
raise NotImplementedError("Each Model must re-implement this method.")
def create_feed_dict(self, input_batch, label_batch):
"""Creates the feed_dict for training the given step.
A feed_dict takes the form of:
feed_dict = {
<placeholder>: <tensor of values to be passed for placeholder>,
....
}
If label_batch is None, then no labels are added to feed_dict.
Hint: The keys for the feed_dict should be a subset of the placeholder
tensors created in add_placeholders.
Args:
input_batch: A batch of input data.
label_batch: A batch of label data.
Returns:
feed_dict: The feed dictionary mapping from placeholders to values.
"""
raise NotImplementedError("Each Model must re-implement this method.")
def add_model(self, input_data):
"""Implements core of model that transforms input_data into predictions.
The core transformation for this model which transforms a batch of input
data into a batch of predictions.
Args:
input_data: A tensor of shape (batch_size, n_features).
Returns:
out: A tensor of shape (batch_size, n_classes)
"""
raise NotImplementedError("Each Model must re-implement this method.")
def add_loss_op(self, pred):
"""Adds ops for loss to the computational graph.
Args:
pred: A tensor of shape (batch_size, n_classes)
Returns:
loss: A 0-d tensor (scalar) output
"""
raise NotImplementedError("Each Model must re-implement this method.")
def run_epoch(self, sess, input_data, input_labels):
"""Runs an epoch of training.
Trains the model for one-epoch.
Args:
sess: tf.Session() object
input_data: np.ndarray of shape (n_samples, n_features)
input_labels: np.ndarray of shape (n_samples, n_classes)
Returns:
average_loss: scalar. Average minibatch loss of model on epoch.
"""
raise NotImplementedError("Each Model must re-implement this method.")
def fit(self, sess, input_data, input_labels):
"""Fit model on provided data.
Args:
sess: tf.Session()
input_data: np.ndarray of shape (n_samples, n_features)
input_labels: np.ndarray of shape (n_samples, n_classes)
Returns:
losses: list of loss per epoch
"""
raise NotImplementedError("Each Model must re-implement this method.")
def predict(self, sess, input_data, input_labels=None):
"""Make predictions from the provided model.
Args:
sess: tf.Session()
input_data: np.ndarray of shape (n_samples, n_features)
input_labels: np.ndarray of shape (n_samples, n_classes)
Returns:
average_loss: Average loss of model.
predictions: Predictions of model on input_data
"""
raise NotImplementedError("Each Model must re-implement this method.")
class LanguageModel(Model):
"""Abstracts a Tensorflow graph for learning language models.
Adds ability to do embedding.
"""
def add_embedding(self):
"""Add embedding layer. that maps from vocabulary to vectors.
"""
raise NotImplementedError("Each Model must re-implement this method.")
import time
import math
import numpy as np
import tensorflow as tf
from q1_softmax import softmax
from q1_softmax import cross_entropy_loss
from model import Model
from utils import data_iterator
class Config(object):
"""Holds model hyperparams and data information.
The config class is used to store various hyperparameters and dataset
information parameters. Model objects are passed a Config() object at
instantiation.
"""
batch_size = 64
n_samples = 1024
n_features = 100
n_classes = 5
# You may adjust the max_epochs to ensure convergence.
max_epochs = 50
# You may adjust this learning rate to ensure convergence.
lr = 1e-4
class SoftmaxModel(Model):
"""Implements a Softmax classifier with cross-entropy loss."""
def load_data(self):
"""Creates a synthetic dataset and stores it in memory."""
np.random.seed(1234)
self.input_data = np.random.rand(
self.config.n_samples, self.config.n_features)
self.input_labels = np.ones((self.config.n_samples,), dtype=np.int32)
def add_placeholders(self):
"""Generate placeholder variables to represent the input tensors.
These placeholders are used as inputs by the rest of the model building
code and will be fed data during training.
Adds following nodes to the computational graph
input_placeholder: Input placeholder tensor of shape
(batch_size, n_features), type tf.float32
labels_placeholder: Labels placeholder tensor of shape
(batch_size, n_classes), type tf.int32
Add these placeholders to self as the instance variables
self.input_placeholder
self.labels_placeholder
(Don't change the variable names)
"""
### YOUR CODE HERE
raise NotImplementedError
### END YOUR CODE
def create_feed_dict(self, input_batch, label_batch):
"""Creates the feed_dict for softmax classifier.
A feed_dict takes the form of:
feed_dict = {
<placeholder>: <tensor of values to be passed for placeholder>,
....
}
If label_batch is None, then no labels are added to feed_dict.
Hint: The keys for the feed_dict should match the placeholder tensors
created in add_placeholders.
Args:
input_batch: A batch of input data.
label_batch: A batch of label data.
Returns:
feed_dict: The feed dictionary mapping from placeholders to values.
"""
### YOUR CODE HERE
raise NotImplementedError
### END YOUR CODE
return feed_dict
def add_training_op(self, loss):
"""Sets up the training Ops.
Creates an optimizer and applies the gradients to all trainable variables.
The Op returned by this function is what must be passed to the
`sess.run()` call to cause the model to train. See
https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer
for more information.
Hint: Use tf.train.GradientDescentOptimizer to get an optimizer object.
Calling optimizer.minimize() will return a train_op object.
Args:
loss: Loss tensor, from cross_entropy_loss.
Returns:
train_op: The Op for training.
"""
### YOUR CODE HERE
raise NotImplementedError
### END YOUR CODE
return train_op
def add_model(self, input_data):
"""Adds a linear-layer plus a softmax transformation
The core transformation for this model which transforms a batch of input
data into a batch of predictions. In this case, the mathematical
transformation effected is
y = softmax(xW + b)
Hint: Make sure to create tf.Variables as needed. Also, make sure to use
tf.name_scope to ensure that your name spaces are clean.
Hint: For this simple use-case, it's sufficient to initialize both weights W
and biases b with zeros.
Args:
input_data: A tensor of shape (batch_size, n_features).
Returns:
out: A tensor of shape (batch_size, n_classes)
"""
### YOUR CODE HERE
raise NotImplementedError
### END YOUR CODE
return out
def add_loss_op(self, pred):
"""Adds cross_entropy_loss ops to the computational graph.
Hint: Use the cross_entropy_loss function we defined. This should be a very
short function.
Args:
pred: A tensor of shape (batch_size, n_classes)
Returns:
loss: A 0-d tensor (scalar)
"""
### YOUR CODE HERE
raise NotImplementedError
### END YOUR CODE
return loss
def run_epoch(self, sess, input_data, input_labels):
"""Runs an epoch of training.
Trains the model for one-epoch.
Args:
sess: tf.Session() object
input_data: np.ndarray of shape (n_samples, n_features)
input_labels: np.ndarray of shape (n_samples, n_classes)
Returns:
average_loss: scalar. Average minibatch loss of model on epoch.
"""
# And then after everything is built, start the training loop.
average_loss = 0
for step, (input_batch, label_batch) in enumerate(
data_iterator(input_data, input_labels,
batch_size=self.config.batch_size,
label_size=self.config.n_classes)):
# Fill a feed dictionary with the actual set of images and labels
# for this particular training step.
feed_dict = self.create_feed_dict(input_batch, label_batch)
# Run one step of the model. The return values are the activations
# from the `self.train_op` (which is discarded) and the `loss` Op. To
# inspect the values of your Ops or variables, you may include them
# in the list passed to sess.run() and the value tensors will be
# returned in the tuple from the call.
_, loss_value = sess.run([self.train_op, self.loss], feed_dict=feed_dict)
average_loss += loss_value
average_loss = average_loss / step
return average_loss
def fit(self, sess, input_data, input_labels):
"""Fit model on provided data.
Args:
sess: tf.Session()
input_data: np.ndarray of shape (n_samples, n_features)
input_labels: np.ndarray of shape (n_samples, n_classes)
Returns:
losses: list of loss per epoch
"""
losses = []
for epoch in range(self.config.max_epochs):
start_time = time.time()
average_loss = self.run_epoch(sess, input_data, input_labels)
duration = time.time() - start_time
# Print status to stdout.
print('Epoch %d: loss = %.2f (%.3f sec)'
% (epoch, average_loss, duration))
losses.append(average_loss)
return losses
def __init__(self, config):
"""Initializes the model.
Args:
config: A model configuration object of type Config
"""
self.config = config
# Generate placeholders for the images and labels.
self.load_data()
self.add_placeholders()
self.pred = self.add_model(self.input_placeholder)
self.loss = self.add_loss_op(self.pred)
self.train_op = self.add_training_op(self.loss)
def test_SoftmaxModel():
"""Train softmax model for a number of steps."""
config = Config()
with tf.Graph().as_default():
model = SoftmaxModel(config)
# Create a session for running Ops on the Graph.
sess = tf.Session()
# Run the Op to initialize the variables.
init = tf.initialize_all_variables()
sess.run(init)
losses = model.fit(sess, model.input_data, model.input_labels)
# If ops are implemented correctly, the average loss should fall close to zero
# rapidly.
assert losses[-1] < .5
print "Basic (non-exhaustive) classifier tests pass\n"
if __name__ == "__main__":
test_SoftmaxModel()
import numpy as np
import tensorflow as tf
def softmax(x):
"""
Compute the softmax function in tensorflow.
You might find the tensorflow functions tf.exp, tf.reduce_max,
tf.reduce_sum, tf.expand_dims useful. (Many solutions are possible, so you may
not need to use all of these functions). Recall also that many common
tensorflow operations are sugared (e.g. x * y does a tensor multiplication
if x and y are both tensors). Make sure to implement the numerical stability
fixes as in the previous homework!
Args:
x: tf.Tensor with shape (n_samples, n_features). Note feature vectors are
represented by row-vectors. (For simplicity, no need to handle 1-d
input as in the previous homework)
Returns:
out: tf.Tensor with shape (n_sample, n_features). You need to construct this
tensor in this problem.
"""
### YOUR CODE HERE
raise NotImplementedError
### END YOUR CODE
return out
def cross_entropy_loss(y, yhat):
"""
Compute the cross entropy loss in tensorflow.
y is a one-hot tensor of shape (n_samples, n_classes) and yhat is a tensor
of shape (n_samples, n_classes). y should be of dtype tf.int32, and yhat should
be of dtype tf.float32.
The functions tf.to_float, tf.reduce_sum, and tf.log might prove useful. (Many
solutions are possible, so you may not need to use all of these functions).
Note: You are NOT allowed to use the tensorflow built-in cross-entropy
functions.
Args:
y: tf.Tensor with shape (n_samples, n_classes). One-hot encoded.
yhat: tf.Tensorwith shape (n_sample, n_classes). Each row encodes a
probability distribution and should sum to 1.
Returns:
out: tf.Tensor with shape (1,) (Scalar output). You need to construct this
tensor in the problem.
"""
### YOUR CODE HERE
raise NotImplementedError
### END YOUR CODE
return out
def test_softmax_basic():
"""
Some simple tests to get you started.
Warning: these are not exhaustive.
"""
print "Running basic tests..."
test1 = softmax(tf.convert_to_tensor(
np.array([[1001,1002],[3,4]]), dtype=tf.float32))
with tf.Session():
test1 = test1.eval()
assert np.amax(np.fabs(test1 - np.array(
[0.26894142, 0.73105858]))) <= 1e-6
test2 = softmax(tf.convert_to_tensor(
np.array([[-1001,-1002]]), dtype=tf.float32))
with tf.Session():
test2 = test2.eval()
assert np.amax(np.fabs(test2 - np.array(
[0.73105858, 0.26894142]))) <= 1e-6
print "Basic (non-exhaustive) softmax tests pass\n"
def test_cross_entropy_loss_basic():
"""
Some simple tests to get you started.
Warning: these are not exhaustive.
"""
y = np.array([[0, 1], [1, 0], [1, 0]])
yhat = np.array([[.5, .5], [.5, .5], [.5, .5]])
test1 = cross_entropy_loss(
tf.convert_to_tensor(y, dtype=tf.int32),
tf.convert_to_tensor(yhat, dtype=tf.float32))
with tf.Session():
test1 = test1.eval()
result = -3 * np.log(.5)
assert np.amax(np.fabs(test1 - result)) <= 1e-6
print "Basic (non-exhaustive) cross-entropy tests pass\n"
if __name__ == "__main__":
test_softmax_basic()
test_cross_entropy_loss_basic()
import os
import getpass
import sys
import time
import numpy as np
import tensorflow as tf
from q2_initialization import xavier_weight_init
import data_utils.utils as du
import data_utils.ner as ner
from utils import data_iterator
from model import LanguageModel
class Config(object):
"""Holds model hyperparams and data information.
The config class is used to store various hyperparameters and dataset
information parameters. Model objects are passed a Config() object at
instantiation.
"""
embed_size = 50
batch_size = 64
label_size = 5
hidden_size = 100
max_epochs = 24
early_stopping = 2
dropout = 0.9
lr = 0.001
l2 = 0.001
window_size = 3
class NERModel(LanguageModel):
"""Implements a NER (Named Entity Recognition) model.
This class implements a deep network for named entity recognition. It
inherits from LanguageModel, which has an add_embedding method in addition to
the standard Model method.
"""
def load_data(self, debug=False):
"""Loads starter word-vectors and train/dev/test data."""
# Load the starter word vectors
self.wv, word_to_num, num_to_word = ner.load_wv(
'data/ner/vocab.txt', 'data/ner/wordVectors.txt')
tagnames = ['O', 'LOC', 'MISC', 'ORG', 'PER']
self.num_to_tag = dict(enumerate(tagnames))
tag_to_num = {v:k for k,v in self.num_to_tag.iteritems()}
# Load the training set
docs = du.load_dataset('data/ner/train')
self.X_train, self.y_train = du.docs_to_windows(
docs, word_to_num, tag_to_num, wsize=self.config.window_size)
if debug:
self.X_train = self.X_train[:1024]
self.y_train = self.y_train[:1024]
# Load the dev set (for tuning hyperparameters)
docs = du.load_dataset('data/ner/dev')
self.X_dev, self.y_dev = du.docs_to_windows(
docs, word_to_num, tag_to_num, wsize=self.config.window_size)
if debug:
self.X_dev = self.X_dev[:1024]
self.y_dev = self.y_dev[:1024]
# Load the test set (dummy labels only)
docs = du.load_dataset('data/ner/test.masked')
self.X_test, self.y_test = du.docs_to_windows(
docs, word_to_num, tag_to_num, wsize=self.config.window_size)
def add_placeholders(self):
"""Generate placeholder variables to represent the input tensors
These placeholders are used as inputs by the rest of the model building
code and will be fed data during training. Note that when "None" is in a
placeholder's shape, it's flexible
Adds following nodes to the computational graph
input_placeholder: Input placeholder tensor of shape
(None, window_size), type tf.int32
labels_placeholder: Labels placeholder tensor of shape
(None, label_size), type tf.float32
dropout_placeholder: Dropout value placeholder (scalar),
type tf.float32
Add these placeholders to self as the instance variables
self.input_placeholder
self.labels_placeholder
self.dropout_placeholder
(Don't change the variable names)
"""
### YOUR CODE HERE
raise NotImplementedError
### END YOUR CODE
def create_feed_dict(self, input_batch, dropout, label_batch=None):
"""Creates the feed_dict for softmax classifier.
A feed_dict takes the form of:
feed_dict = {
<placeholder>: <tensor of values to be passed for placeholder>,
....
}
Hint: The keys for the feed_dict should be a subset of the placeholder
tensors created in add_placeholders.
Hint: When label_batch is None, don't add a labels entry to the feed_dict.
Args:
input_batch: A batch of input data.
label_batch: A batch of label data.
Returns:
feed_dict: The feed dictionary mapping from placeholders to values.
"""
### YOUR CODE HERE
raise NotImplementedError
### END YOUR CODE
return feed_dict
def add_embedding(self):
"""Add embedding layer that maps from vocabulary to vectors.
Creates an embedding tensor (of shape (len(self.wv), embed_size). Use the
input_placeholder to retrieve the embeddings for words in the current batch.
(Words are discrete entities. They need to be transformed into vectors for use
in deep-learning. Although we won't do so in this problem, in practice it's
useful to initialize the embedding with pre-trained word-vectors. For this
problem, using the default initializer is sufficient.)
Hint: This layer should use the input_placeholder to index into the
embedding.
Hint: You might find tf.nn.embedding_lookup useful.
Hint: See following link to understand what -1 in a shape means.
https://www.tensorflow.org/versions/r0.8/api_docs/python/array_ops.html#reshape
Hint: Check the last slide from the TensorFlow lecture.
Hint: Here are the dimensions of the variables you will need to create:
L: (len(self.wv), embed_size)
Returns:
window: tf.Tensor of shape (-1, window_size*embed_size)
"""
# The embedding lookup is currently only implemented for the CPU
with tf.device('/cpu:0'):
### YOUR CODE HERE
raise NotImplementedError
### END YOUR CODE
return window
def add_model(self, window):
"""Adds the 1-hidden-layer NN.
Hint: Use a variable_scope (e.g. "Layer") for the first hidden layer, and
another variable_scope (e.g. "Softmax") for the linear transformation
preceding the softmax. Make sure to use the xavier_weight_init you
defined in the previous part to initialize weights.
Hint: Make sure to add in regularization and dropout to this network.
Regularization should be an addition to the cost function, while
dropout should be added after both variable scopes.
Hint: You might consider using a tensorflow Graph Collection (e.g
"total_loss") to collect the regularization and loss terms (which you
will add in add_loss_op below).
Hint: Here are the dimensions of the various variables you will need to
create
W: (window_size*embed_size, hidden_size)
b1: (hidden_size,)
U: (hidden_size, label_size)
b2: (label_size)
https://www.tensorflow.org/versions/r0.7/api_docs/python/framework.html#graph-collections
Args:
window: tf.Tensor of shape (-1, window_size*embed_size)
Returns:
output: tf.Tensor of shape (batch_size, label_size)
"""
### YOUR CODE HERE
raise NotImplementedError
### END YOUR CODE
return output
def add_loss_op(self, y):
"""Adds cross_entropy_loss ops to the computational graph.
Hint: You can use tf.nn.softmax_cross_entropy_with_logits to simplify your
implementation. You might find tf.reduce_mean useful.
Args:
pred: A tensor of shape (batch_size, n_classes)
Returns:
loss: A 0-d tensor (scalar)
"""
### YOUR CODE HERE
raise NotImplementedError
### END YOUR CODE
return loss
def add_training_op(self, loss):
"""Sets up the training Ops.
Creates an optimizer and applies the gradients to all trainable variables.
The Op returned by this function is what must be passed to the
`sess.run()` call to cause the model to train. See
https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer
for more information.
Hint: Use tf.train.AdamOptimizer for this model.
Calling optimizer.minimize() will return a train_op object.
Args:
loss: Loss tensor, from cross_entropy_loss.
Returns:
train_op: The Op for training.
"""
### YOUR CODE HERE
raise NotImplementedError
### END YOUR CODE
return train_op
def __init__(self, config):
"""Constructs the network using the helper functions defined above."""
self.config = config
self.load_data(debug=False)
self.add_placeholders()
window = self.add_embedding()
y = self.add_model(window)
self.loss = self.add_loss_op(y)
self.predictions = tf.nn.softmax(y)
one_hot_prediction = tf.argmax(self.predictions, 1)
correct_prediction = tf.equal(
tf.argmax(self.labels_placeholder, 1), one_hot_prediction)
self.correct_predictions = tf.reduce_sum(tf.cast(correct_prediction, 'int32'))
self.train_op = self.add_training_op(self.loss)
def run_epoch(self, session, input_data, input_labels,
shuffle=True, verbose=True):
orig_X, orig_y = input_data, input_labels
dp = self.config.dropout
# We're interested in keeping track of the loss and accuracy during training
total_loss = []
total_correct_examples = 0
total_processed_examples = 0
total_steps = len(orig_X) / self.config.batch_size
for step, (x, y) in enumerate(
data_iterator(orig_X, orig_y, batch_size=self.config.batch_size,
label_size=self.config.label_size, shuffle=shuffle)):
feed = self.create_feed_dict(input_batch=x, dropout=dp, label_batch=y)
loss, total_correct, _ = session.run(
[self.loss, self.correct_predictions, self.train_op],
feed_dict=feed)
total_processed_examples += len(x)
total_correct_examples += total_correct
total_loss.append(loss)
##
if verbose and step % verbose == 0:
sys.stdout.write('\r{} / {} : loss = {}'.format(
step, total_steps, np.mean(total_loss)))
sys.stdout.flush()
if verbose:
sys.stdout.write('\r')
sys.stdout.flush()
return np.mean(total_loss), total_correct_examples / float(total_processed_examples)
def predict(self, session, X, y=None):
"""Make predictions from the provided model."""
# If y is given, the loss is also calculated
# We deactivate dropout by setting it to 1
dp = 1
losses = []
results = []
if np.any(y):
data = data_iterator(X, y, batch_size=self.config.batch_size,
label_size=self.config.label_size, shuffle=False)
else:
data = data_iterator(X, batch_size=self.config.batch_size,
label_size=self.config.label_size, shuffle=False)
for step, (x, y) in enumerate(data):
feed = self.create_feed_dict(input_batch=x, dropout=dp)
if np.any(y):
feed[self.labels_placeholder] = y
loss, preds = session.run(
[self.loss, self.predictions], feed_dict=feed)
losses.append(loss)
else:
preds = session.run(self.predictions, feed_dict=feed)
predicted_indices = preds.argmax(axis=1)
results.extend(predicted_indices)
return np.mean(losses), results
def print_confusion(confusion, num_to_tag):
"""Helper method that prints confusion matrix."""
# Summing top to bottom gets the total number of tags guessed as T
total_guessed_tags = confusion.sum(axis=0)
# Summing left to right gets the total number of true tags
total_true_tags = confusion.sum(axis=1)
print
print confusion
for i, tag in sorted(num_to_tag.items()):
prec = confusion[i, i] / float(total_guessed_tags[i])
recall = confusion[i, i] / float(total_true_tags[i])
print 'Tag: {} - P {:2.4f} / R {:2.4f}'.format(tag, prec, recall)
def calculate_confusion(config, predicted_indices, y_indices):
"""Helper method that calculates confusion matrix."""
confusion = np.zeros((config.label_size, config.label_size), dtype=np.int32)
for i in xrange(len(y_indices)):
correct_label = y_indices[i]
guessed_label = predicted_indices[i]
confusion[correct_label, guessed_label] += 1
return confusion
def save_predictions(predictions, filename):
"""Saves predictions to provided file."""
with open(filename, "wb") as f:
for prediction in predictions:
f.write(str(prediction) + "\n")
def test_NER():
"""Test NER model implementation.
You can use this function to test your implementation of the Named Entity
Recognition network. When debugging, set max_epochs in the Config object to 1
so you can rapidly iterate.
"""
config = Config()
with tf.Graph().as_default():
model = NERModel(config)
init = tf.initialize_all_variables()
saver = tf.train.Saver()
with tf.Session() as session:
best_val_loss = float('inf')
best_val_epoch = 0
session.run(init)
for epoch in xrange(config.max_epochs):
print 'Epoch {}'.format(epoch)
start = time.time()
###
train_loss, train_acc = model.run_epoch(session, model.X_train,
model.y_train)
val_loss, predictions = model.predict(session, model.X_dev, model.y_dev)
print 'Training loss: {}'.format(train_loss)
print 'Training acc: {}'.format(train_acc)
print 'Validation loss: {}'.format(val_loss)
if val_loss < best_val_loss:
best_val_loss = val_loss
best_val_epoch = epoch
if not os.path.exists("./weights"):
os.makedirs("./weights")
saver.save(session, './weights/ner.weights')
if epoch - best_val_epoch > config.early_stopping:
break
###
confusion = calculate_confusion(config, predictions, model.y_dev)
print_confusion(confusion, model.num_to_tag)
print 'Total time: {}'.format(time.time() - start)
saver.restore(session, './weights/ner.weights')
print 'Test'
print '=-=-='
print 'Writing predictions to q2_test.predicted'
_, predictions = model.predict(session, model.X_test, model.y_test)
save_predictions(predictions, "q2_test.predicted")
if __name__ == "__main__":
test_NER()
import numpy as np
import tensorflow as tf
def xavier_weight_init():
"""
Returns function that creates random tensor.
The specified function will take in a shape (tuple or 1-d array) and must
return a random tensor of the specified shape and must be drawn from the
Xavier initialization distribution.
Hint: You might find tf.random_uniform useful.
"""
def _xavier_initializer(shape, **kwargs):
"""Defines an initializer for the Xavier distribution.
This function will be used as a variable scope initializer.
https://www.tensorflow.org/versions/r0.7/how_tos/variable_scope/index.html#initializers-in-variable-scope
Args:
shape: Tuple or 1-d array that species dimensions of requested tensor.
Returns:
out: tf.Tensor of specified shape sampled from Xavier distribution.
"""
### YOUR CODE HERE
raise NotImplementedError
### END YOUR CODE
return out
# Returns defined initializer function.
return _xavier_initializer
def test_initialization_basic():
"""
Some simple tests for the initialization.
"""
print "Running basic tests..."
xavier_initializer = xavier_weight_init()
shape = (1,)
xavier_mat = xavier_initializer(shape)
assert xavier_mat.get_shape() == shape
shape = (1, 2, 3)
xavier_mat = xavier_initializer(shape)
assert xavier_mat.get_shape() == shape
print "Basic (non-exhaustive) Xavier initialization tests pass\n"
def test_initialization():
"""
Use this space to test your Xavier initialization code by running:
python q1_initialization.py
This function will not be called by the autograder, nor will
your tests be graded.
"""
print "Running your tests..."
### YOUR CODE HERE
raise NotImplementedError
### END YOUR CODE
if __name__ == "__main__":
test_initialization_basic()
import getpass
import sys
import time
import numpy as np
from copy import deepcopy
from utils import calculate_perplexity, get_ptb_dataset, Vocab
from utils import ptb_iterator, sample
import tensorflow as tf
from tensorflow.python.ops.seq2seq import sequence_loss
from model import LanguageModel
# Let's set the parameters of our model
# http://arxiv.org/pdf/1409.2329v4.pdf shows parameters that would achieve near
# SotA numbers
class Config(object):
"""Holds model hyperparams and data information.
The config class is used to store various hyperparameters and dataset
information parameters. Model objects are passed a Config() object at
instantiation.
"""
batch_size = 64
embed_size = 50
hidden_size = 100
num_steps = 10
max_epochs = 16
early_stopping = 2
dropout = 0.9
lr = 0.001
class RNNLM_Model(LanguageModel):
def load_data(self, debug=False):
"""Loads starter word-vectors and train/dev/test data."""
self.vocab = Vocab()
self.vocab.construct(get_ptb_dataset('train'))
self.encoded_train = np.array(
[self.vocab.encode(word) for word in get_ptb_dataset('train')],
dtype=np.int32)
self.encoded_valid = np.array(
[self.vocab.encode(word) for word in get_ptb_dataset('valid')],
dtype=np.int32)
self.encoded_test = np.array(
[self.vocab.encode(word) for word in get_ptb_dataset('test')],
dtype=np.int32)
if debug:
num_debug = 1024
self.encoded_train = self.encoded_train[:num_debug]
self.encoded_valid = self.encoded_valid[:num_debug]
self.encoded_test = self.encoded_test[:num_debug]
def add_placeholders(self):
"""Generate placeholder variables to represent the input tensors
These placeholders are used as inputs by the rest of the model building
code and will be fed data during training. Note that when "None" is in a
placeholder's shape, it's flexible
Adds following nodes to the computational graph.
(When None is in a placeholder's shape, it's flexible)
input_placeholder: Input placeholder tensor of shape
(None, num_steps), type tf.int32
labels_placeholder: Labels placeholder tensor of shape
(None, num_steps), type tf.float32
dropout_placeholder: Dropout value placeholder (scalar),
type tf.float32
Add these placeholders to self as the instance variables
self.input_placeholder
self.labels_placeholder
self.dropout_placeholder
(Don't change the variable names)
"""
### YOUR CODE HERE
raise NotImplementedError
### END YOUR CODE
def add_embedding(self):
"""Add embedding layer.
Hint: This layer should use the input_placeholder to index into the
embedding.
Hint: You might find tf.nn.embedding_lookup useful.
Hint: You might find tf.split, tf.squeeze useful in constructing tensor inputs
Hint: Check the last slide from the TensorFlow lecture.
Hint: Here are the dimensions of the variables you will need to create:
L: (len(self.vocab), embed_size)
Returns:
inputs: List of length num_steps, each of whose elements should be
a tensor of shape (batch_size, embed_size).
"""
# The embedding lookup is currently only implemented for the CPU
with tf.device('/cpu:0'):
### YOUR CODE HERE
raise NotImplementedError
### END YOUR CODE
return inputs
def add_projection(self, rnn_outputs):
"""Adds a projection layer.
The projection layer transforms the hidden representation to a distribution
over the vocabulary.
Hint: Here are the dimensions of the variables you will need to
create
U: (hidden_size, len(vocab))
b_2: (len(vocab),)
Args:
rnn_outputs: List of length num_steps, each of whose elements should be
a tensor of shape (batch_size, embed_size).
Returns:
outputs: List of length num_steps, each a tensor of shape
(batch_size, len(vocab)
"""
### YOUR CODE HERE
raise NotImplementedError
### END YOUR CODE
return outputs
def add_loss_op(self, output):
"""Adds loss ops to the computational graph.
Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss.
Args:
output: A tensor of shape (None, self.vocab)
Returns:
loss: A 0-d tensor (scalar)
"""
### YOUR CODE HERE
raise NotImplementedError
### END YOUR CODE
return loss
def add_training_op(self, loss):
"""Sets up the training Ops.
Creates an optimizer and applies the gradients to all trainable variables.
The Op returned by this function is what must be passed to the
`sess.run()` call to cause the model to train. See
https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer
for more information.
Hint: Use tf.train.AdamOptimizer for this model.
Calling optimizer.minimize() will return a train_op object.
Args:
loss: Loss tensor, from cross_entropy_loss.
Returns:
train_op: The Op for training.
"""
### YOUR CODE HERE
raise NotImplementedError
### END YOUR CODE
return train_op
def __init__(self, config):
self.config = config
self.load_data(debug=False)
self.add_placeholders()
self.inputs = self.add_embedding()
self.rnn_outputs = self.add_model(self.inputs)
self.outputs = self.add_projection(self.rnn_outputs)
# We want to check how well we correctly predict the next word
# We cast o to float64 as there are numerical issues at hand
# (i.e. sum(output of softmax) = 1.00000298179 and not 1)
self.predictions = [tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs]
# Reshape the output into len(vocab) sized chunks - the -1 says as many as
# needed to evenly divide
output = tf.reshape(tf.concat(1, self.outputs), [-1, len(self.vocab)])
self.calculate_loss = self.add_loss_op(output)
self.train_step = self.add_training_op(self.calculate_loss)
def add_model(self, inputs):
"""Creates the RNN LM model.
In the space provided below, you need to implement the equations for the
RNN LM model. Note that you may NOT use built in rnn_cell functions from
tensorflow.
Hint: Use a zeros tensor of shape (batch_size, hidden_size) as
initial state for the RNN. Add this to self as instance variable
self.initial_state
(Don't change variable name)
Hint: Add the last RNN output to self as instance variable
self.final_state
(Don't change variable name)
Hint: Make sure to apply dropout to the inputs and the outputs.
Hint: Use a variable scope (e.g. "RNN") to define RNN variables.
Hint: Perform an explicit for-loop over inputs. You can use
scope.reuse_variables() to ensure that the weights used at each
iteration (each time-step) are the same. (Make sure you don't call
this for iteration 0 though or nothing will be initialized!)
Hint: Here are the dimensions of the various variables you will need to
create:
H: (hidden_size, hidden_size)
I: (embed_size, hidden_size)
b_1: (hidden_size,)
Args:
inputs: List of length num_steps, each of whose elements should be
a tensor of shape (batch_size, embed_size).
Returns:
outputs: List of length num_steps, each of whose elements should be
a tensor of shape (batch_size, hidden_size)
"""
### YOUR CODE HERE
raise NotImplementedError
### END YOUR CODE
return rnn_outputs
def run_epoch(self, session, data, train_op=None, verbose=10):
config = self.config
dp = config.dropout
if not train_op:
train_op = tf.no_op()
dp = 1
total_steps = sum(1 for x in ptb_iterator(data, config.batch_size, config.num_steps))
total_loss = []
state = self.initial_state.eval()
for step, (x, y) in enumerate(
ptb_iterator(data, config.batch_size, config.num_steps)):
# We need to pass in the initial state and retrieve the final state to give
# the RNN proper history
feed = {self.input_placeholder: x,
self.labels_placeholder: y,
self.initial_state: state,
self.dropout_placeholder: dp}
loss, state, _ = session.run(
[self.calculate_loss, self.final_state, train_op], feed_dict=feed)
total_loss.append(loss)
if verbose and step % verbose == 0:
sys.stdout.write('\r{} / {} : pp = {}'.format(
step, total_steps, np.exp(np.mean(total_loss))))
sys.stdout.flush()
if verbose:
sys.stdout.write('\r')
return np.exp(np.mean(total_loss))
def generate_text(session, model, config, starting_text='<eos>',
stop_length=100, stop_tokens=None, temp=1.0):
"""Generate text from the model.
Hint: Create a feed-dictionary and use sess.run() to execute the model. Note
that you will need to use model.initial_state as a key to feed_dict
Hint: Fetch model.final_state and model.predictions[-1]. (You set
model.final_state in add_model() and model.predictions is set in
__init__)
Hint: Store the outputs of running the model in local variables state and
y_pred (used in the pre-implemented parts of this function.)
Args:
session: tf.Session() object
model: Object of type RNNLM_Model
config: A Config() object
starting_text: Initial text passed to model.
Returns:
output: List of word idxs
"""
state = model.initial_state.eval()
# Imagine tokens as a batch size of one, length of len(tokens[0])
tokens = [model.vocab.encode(word) for word in starting_text.split()]
for i in xrange(stop_length):
### YOUR CODE HERE
raise NotImplementedError
### END YOUR CODE
next_word_idx = sample(y_pred[0], temperature=temp)
tokens.append(next_word_idx)
if stop_tokens and model.vocab.decode(tokens[-1]) in stop_tokens:
break
output = [model.vocab.decode(word_idx) for word_idx in tokens]
return output
def generate_sentence(session, model, config, *args, **kwargs):
"""Convenice to generate a sentence from the model."""
return generate_text(session, model, config, *args, stop_tokens=['<eos>'], **kwargs)
def test_RNNLM():
config = Config()
gen_config = deepcopy(config)
gen_config.batch_size = gen_config.num_steps = 1
# We create the training model and generative model
with tf.variable_scope('RNNLM') as scope:
model = RNNLM_Model(config)
# This instructs gen_model to reuse the same variables as the model above
scope.reuse_variables()
gen_model = RNNLM_Model(gen_config)
init = tf.initialize_all_variables()
saver = tf.train.Saver()
with tf.Session() as session:
best_val_pp = float('inf')
best_val_epoch = 0
session.run(init)
for epoch in xrange(config.max_epochs):
print 'Epoch {}'.format(epoch)
start = time.time()
###
train_pp = model.run_epoch(
session, model.encoded_train,
train_op=model.train_step)
valid_pp = model.run_epoch(session, model.encoded_valid)
print 'Training perplexity: {}'.format(train_pp)
print 'Validation perplexity: {}'.format(valid_pp)
if valid_pp < best_val_pp:
best_val_pp = valid_pp
best_val_epoch = epoch
saver.save(session, './ptb_rnnlm.weights')
if epoch - best_val_epoch > config.early_stopping:
break
print 'Total time: {}'.format(time.time() - start)
saver.restore(session, 'ptb_rnnlm.weights')
test_pp = model.run_epoch(session, model.encoded_test)
print '=-=' * 5
print 'Test perplexity: {}'.format(test_pp)
print '=-=' * 5
starting_text = 'in palo alto'
while starting_text:
print ' '.join(generate_sentence(
session, gen_model, gen_config, starting_text=starting_text, temp=1.0))
starting_text = raw_input('> ')
if __name__ == "__main__":
test_RNNLM()
from collections import defaultdict
import numpy as np
class Vocab(object):
def __init__(self):
self.word_to_index = {}
self.index_to_word = {}
self.word_freq = defaultdict(int)
self.total_words = 0
self.unknown = '<unk>'
self.add_word(self.unknown, count=0)
def add_word(self, word, count=1):
if word not in self.word_to_index:
index = len(self.word_to_index)
self.word_to_index[word] = index
self.index_to_word[index] = word
self.word_freq[word] += count
def construct(self, words):
for word in words:
self.add_word(word)
self.total_words = float(sum(self.word_freq.values()))
print '{} total words with {} uniques'.format(self.total_words, len(self.word_freq))
def encode(self, word):
if word not in self.word_to_index:
word = self.unknown
return self.word_to_index[word]
def decode(self, index):
return self.index_to_word[index]
def __len__(self):
return len(self.word_freq)
def calculate_perplexity(log_probs):
# https://web.stanford.edu/class/cs124/lec/languagemodeling.pdf
perp = 0
for p in log_probs:
perp += -p
return np.exp(perp / len(log_probs))
def get_ptb_dataset(dataset='train'):
fn = 'data/ptb/ptb.{}.txt'
for line in open(fn.format(dataset)):
for word in line.split():
yield word
# Add token to the end of the line
# Equivalent to <eos> in:
# https://github.com/wojzaremba/lstm/blob/master/data.lua#L32
# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/rnn/ptb/reader.py#L31
yield '<eos>'
def ptb_iterator(raw_data, batch_size, num_steps):
# Pulled from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/rnn/ptb/reader.py#L82
raw_data = np.array(raw_data, dtype=np.int32)
data_len = len(raw_data)
batch_len = data_len // batch_size
data = np.zeros([batch_size, batch_len], dtype=np.int32)
for i in range(batch_size):
data[i] = raw_data[batch_len * i:batch_len * (i + 1)]
epoch_size = (batch_len - 1) // num_steps
if epoch_size == 0:
raise ValueError("epoch_size == 0, decrease batch_size or num_steps")
for i in range(epoch_size):
x = data[:, i * num_steps:(i + 1) * num_steps]
y = data[:, i * num_steps + 1:(i + 1) * num_steps + 1]
yield (x, y)
def sample(a, temperature=1.0):
# helper function to sample an index from a probability array
# from https://github.com/fchollet/keras/blob/master/examples/lstm_text_generation.py
a = np.log(a) / temperature
a = np.exp(a) / np.sum(np.exp(a))
return np.argmax(np.random.multinomial(1, a, 1))
def data_iterator(orig_X, orig_y=None, batch_size=32, label_size=2, shuffle=False):
# Optionally shuffle the data before training
if shuffle:
indices = np.random.permutation(len(orig_X))
data_X = orig_X[indices]
data_y = orig_y[indices] if np.any(orig_y) else None
else:
data_X = orig_X
data_y = orig_y
###
total_processed_examples = 0
total_steps = int(np.ceil(len(data_X) / float(batch_size)))
for step in xrange(total_steps):
# Create the batch by selecting up to batch_size elements
batch_start = step * batch_size
x = data_X[batch_start:batch_start + batch_size]
# Convert our target from the class index to a one hot vector
y = None
if np.any(data_y):
y_indices = data_y[batch_start:batch_start + batch_size]
y = np.zeros((len(x), label_size), dtype=np.int32)
y[np.arange(len(y_indices)), y_indices] = 1
###
yield x, y
total_processed_examples += len(x)
# Sanity check to make sure we iterated over all the dataset as intended
assert total_processed_examples == len(data_X), 'Expected {} and processed {}'.format(len(data_X), total_processed_examples)
此差异已折叠。
if [ -f assignment2.zip ]; then
echo "data exist"
else
wget http://cs224d.stanford.edu/assignment2/assignment2.zip
fi
if [ $? -eq 0 ];then
unzip assignment2.zip
cp assignment2_release/data/ner/wordVectors.txt ./data
cp assignment2_release/data/ner/vocab.txt ./data
rm -rf assignment2.zip assignment2_release
else
echo "download data error!" >> /dev/stderr
exit 1
fi
B-LOC
I-LOC
B-MISC
I-MISC
B-ORG
I-ORG
B-PER
I-PER
O
CRICKET NNP I-NP O
- : O O
LEICESTERSHIRE NNP I-NP I-ORG
TAKE NNP I-NP O
OVER IN I-PP O
AT NNP I-NP O
TOP NNP I-NP O
AFTER NNP I-NP O
INNINGS NNP I-NP O
VICTORY NN I-NP O
. . O O
LONDON NNP I-NP I-LOC
1996-08-30 CD I-NP O
West NNP I-NP I-MISC
Indian NNP I-NP I-MISC
all-rounder NN I-NP O
Phil NNP I-NP I-PER
Simmons NNP I-NP I-PER
took VBD I-VP O
four CD I-NP O
for IN I-PP O
38 CD I-NP O
on IN I-PP O
Friday NNP I-NP O
as IN I-PP O
Leicestershire NNP I-NP I-ORG
beat VBD I-VP O
Somerset NNP I-NP I-ORG
by IN I-PP O
an DT I-NP O
innings NN I-NP O
and CC O O
39 CD I-NP O
runs NNS I-NP O
in IN I-PP O
two CD I-NP O
days NNS I-NP O
to TO I-VP O
take VB I-VP O
over IN I-PP O
at IN B-PP O
the DT I-NP O
head NN I-NP O
of IN I-PP O
the DT I-NP O
county NN I-NP O
championship NN I-NP O
. . O O
Their PRP$ I-NP O
stay NN I-NP O
on IN I-PP O
top NN I-NP O
, , O O
though RB I-ADVP O
, , O O
may MD I-VP O
be VB I-VP O
short-lived JJ I-ADJP O
as IN I-PP O
title NN I-NP O
rivals NNS I-NP O
Essex NNP I-NP I-ORG
, , O O
Derbyshire NNP I-NP I-ORG
and CC I-NP O
Surrey NNP I-NP I-ORG
all DT O O
closed VBD I-VP O
in RP I-PRT O
on IN I-PP O
victory NN I-NP O
while IN I-SBAR O
Kent NNP I-NP I-ORG
made VBD I-VP O
up RP I-PRT O
for IN I-PP O
lost VBN I-NP O
time NN I-NP O
in IN I-PP O
their PRP$ I-NP O
rain-affected JJ I-NP O
match NN I-NP O
against IN I-PP O
Nottinghamshire NNP I-NP I-ORG
. . O O
After IN I-PP O
bowling VBG I-NP O
Somerset NNP I-NP I-ORG
out RP I-PRT O
for IN I-PP O
83 CD I-NP O
on IN I-PP O
the DT I-NP O
opening NN I-NP O
morning NN I-NP O
at IN I-PP O
Grace NNP I-NP I-LOC
Road NNP I-NP I-LOC
, , O O
Leicestershire NNP I-NP I-ORG
extended VBD I-VP O
their PRP$ I-NP O
first JJ I-NP O
innings NN I-NP O
by IN I-PP O
94 CD I-NP O
runs VBZ I-VP O
before IN I-PP O
being VBG I-VP O
bowled VBD I-VP O
out RP I-PRT O
for IN I-PP O
296 CD I-NP O
with IN I-PP O
England NNP I-NP I-LOC
discard VBP I-VP O
Andy NNP I-NP I-PER
Caddick NNP I-NP I-PER
taking VBG I-VP O
three CD I-NP O
for IN I-PP O
83 CD I-NP O
. . O O
EU NNP I-NP I-ORG
rejects VBZ I-VP O
German JJ I-NP I-MISC
call NN I-NP O
to TO I-VP O
boycott VB I-VP O
British JJ I-NP I-MISC
lamb NN I-NP O
. . O O
Peter NNP I-NP I-PER
Blackburn NNP I-NP I-PER
BRUSSELS NNP I-NP I-LOC
1996-08-22 CD I-NP O
The DT I-NP O
European NNP I-NP I-ORG
Commission NNP I-NP I-ORG
said VBD I-VP O
on IN I-PP O
Thursday NNP I-NP O
it PRP B-NP O
disagreed VBD I-VP O
with IN I-PP O
German JJ I-NP I-MISC
advice NN I-NP O
to TO I-PP O
consumers NNS I-NP O
to TO I-VP O
shun VB I-VP O
British JJ I-NP I-MISC
lamb NN I-NP O
until IN I-SBAR O
scientists NNS I-NP O
determine VBP I-VP O
whether IN I-SBAR O
mad JJ I-NP O
cow NN I-NP O
disease NN I-NP O
can MD I-VP O
be VB I-VP O
transmitted VBN I-VP O
to TO I-PP O
sheep NN I-NP O
. . O O
Germany NNP I-NP I-LOC
's POS B-NP O
representative NN I-NP O
to TO I-PP O
the DT I-NP O
European NNP I-NP I-ORG
Union NNP I-NP I-ORG
's POS B-NP O
veterinary JJ I-NP O
committee NN I-NP O
Werner NNP I-NP I-PER
Zwingmann NNP I-NP I-PER
said VBD I-VP O
on IN I-PP O
Wednesday NNP I-NP O
consumers NNS I-NP O
should MD I-VP O
buy VB I-VP O
sheepmeat NN I-NP O
from IN I-PP O
countries NNS I-NP O
other JJ I-ADJP O
than IN I-PP O
Britain NNP I-NP I-LOC
until IN I-SBAR O
the DT I-NP O
scientific JJ I-NP O
advice NN I-NP O
was VBD I-VP O
clearer JJR I-ADJP O
. . O O
" " O O
We PRP I-NP O
do VBP I-VP O
n't RB I-VP O
support VB I-VP O
any DT I-NP O
such JJ I-NP O
recommendation NN I-NP O
because IN I-SBAR O
we PRP I-NP O
do VBP I-VP O
n't RB I-VP O
see VB I-VP O
any DT I-NP O
grounds NNS I-NP O
for IN I-PP O
it PRP I-NP O
, , O O
" " O O
the DT I-NP O
Commission NNP I-NP I-ORG
's POS B-NP O
chief JJ I-NP O
spokesman NN I-NP O
Nikolaus NNP I-NP I-PER
van NNP I-NP I-PER
der FW I-NP I-PER
Pas NNP I-NP I-PER
told VBD I-VP O
a DT I-NP O
news NN I-NP O
briefing NN I-NP O
. . O O
He PRP I-NP O
said VBD I-VP O
further JJ I-NP O
scientific JJ I-NP O
study NN I-NP O
was VBD I-VP O
required VBN I-VP O
and CC O O
if IN I-SBAR O
it PRP I-NP O
was VBD I-VP O
found VBN I-VP O
that IN I-SBAR O
action NN I-NP O
was VBD I-VP O
needed VBN I-VP O
it PRP I-NP O
should MD I-VP O
be VB I-VP O
taken VBN I-VP O
by IN I-PP O
the DT I-NP O
European NNP I-NP I-ORG
Union NNP I-NP I-ORG
. . O O
import paddle.v2.fluid as fluid
from paddle.v2.fluid.initializer import NormalInitializer
from utils import logger, load_dict, get_embedding
import math
def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True):
mark_dict_len = 2
word_dim = 50
mark_dim = 5
hidden_dim = 300
IS_SPARSE = True
embedding_name = 'emb'
stack_num = 2
word = fluid.layers.data(
name='word', shape=[1], dtype='int64', lod_level=1)
word_embedding = fluid.layers.embedding(
input=word,
size=[word_dict_len, word_dim],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr=fluid.ParamAttr(
name=embedding_name, trainable=True))
mark = fluid.layers.data(
name='mark', shape=[1], dtype='int64', lod_level=1)
mark_embedding = fluid.layers.embedding(
input=mark,
size=[mark_dict_len, mark_dim],
dtype='float32',
is_sparse=IS_SPARSE)
#print dir(word_embedding)
#print word_embedding.shape
#print word_embedding.to_string
# print mark_embedding.to_string("")
gru_h, c = fluid.layers.dynamic_lstm(input=word_embedding, size=hid_dim, is_reverse=False)
gru_max = fluid.layers.sequence_pool(input=gru_h, pool_type='max', act='tanh')
fc1 = fluid.layers.fc(input=gru_max, size=hid_dim2, act='tanh')
prediction = fluid.layers.fc(input=fc1, size=label_dict_len, act='softmax')
'''
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
word_caps_vector = fluid.layers.concat(input=[word_embedding, mark_embedding], axis = 1)
mix_hidden_lr = 1e-3
rnn_para_attr = fluid.ParamAttr(
initializer=NormalInitializer(loc=0.0, scale=0.0, seed=0), learning_rate=0.1)
hidden_para_attr = fluid.ParamAttr(
initializer=NormalInitializer(loc=0.0, scale=(1. / math.sqrt(hidden_dim) / 3), seed=0),
learning_rate=mix_hidden_lr)
hidden = fluid.layers.fc(
input=word_caps_vector,
name="__hidden00__",
size=hidden_dim,
act="tanh",
bias_attr=fluid.ParamAttr(
initializer=NormalInitializer(loc=0.0, scale=(1. / math.sqrt(hidden_dim) / 3), seed=0)),
param_attr=fluid.ParamAttr(
initializer=NormalInitializer(loc=0.0, scale=(1. / math.sqrt(hidden_dim) / 3), seed=0)))
fea = []
for direction in ["fwd", "bwd"]:
for i in range(stack_num):
if i != 0:
#print i
#print rnn.shape
#print hidden.shape
#print isinstance(hidden, fluid.framework.Variable)
#print isinstance(rnn, fluid.framework.Variable)
#print dir(rnn)
#print type(rnn)
#print rnn[0].shape
#print rnn[1].shape
#print rnn[2].shape
hidden = fluid.layers.fc(
name="__hidden%02d_%s__" % (i, direction),
size=hidden_dim,
act="stanh",
bias_attr=fluid.ParamAttr(initializer=NormalInitializer(loc=0.0, scale=1.0, seed=0)),
input=[hidden, rnn[0], rnn[1]],
param_attr=[hidden_para_attr, rnn_para_attr, rnn_para_attr])
rnn = fluid.layers.dynamic_lstm(
name="__rnn%02d_%s__" % (i, direction),
input=hidden,
size=hidden_dim,
candidate_activation='relu',
gate_activation='sigmoid',
cell_activation='sigmoid',
bias_attr=fluid.ParamAttr(initializer=NormalInitializer(loc=0.0, scale=1.0, seed=0)),
is_reverse=(i % 2) if direction == "fwd" else not i % 2,
param_attr=rnn_para_attr)
fea += [hidden, rnn[0], rnn[1]]
rnn_fea = fluid.layers.fc(
size=hidden_dim,
bias_attr=fluid.ParamAttr(initializer=NormalInitializer(loc=0.0, scale=(1. / math.sqrt(hidden_dim) / 3), seed=0)),
act="stanh",
input=fea,
param_attr=[hidden_para_attr, rnn_para_attr, rnn_para_attr] * 2)
emission = fluid.layers.fc(size=label_dict_len,
input=rnn_fea,
param_attr=fluid.ParamAttr(initializer=NormalInitializer(loc=0.0, scale=(1. / math.sqrt(hidden_dim) / 3), seed=0)))
'''
if is_train:
target = fluid.layers.data(
name="target",
shape=[1], dtype='int64', lod_level=1)
crf_cost = fluid.layers.linear_chain_crf(
input=prediction,
label=target,
param_attr=fluid.ParamAttr(
name='crfw',
initializer=NormalInitializer(loc=0.0, scale=(1. / math.sqrt(hidden_dim) / 3), seed=0),
learning_rate=mix_hidden_lr))
crf_decode = fluid.layers.crf_decoding(
input=emission,
label=target,
param_attr=fluid.ParamAttr(name='crfw'))
return crf_cost, crf_decode, word, mark, target
else:
predict = fluid.layers.crf_decoding(
input=emission,
param_attr=fluid.ParamAttr(name='crfw'))
return predict
import sys
precision_list = []
recall_list = []
f1_list = []
train_precision_list = []
train_recall_list = []
train_f1_list = []
for line in sys.stdin:
line = line.strip()
if line.startswith("[TestSet]"):
tokens = line.split(" ")
for token in tokens:
field_value = token.split(":")
field = field_value[0].strip()
if len(field_value) != 2:
continue
value = float(field_value[1].strip("[] "))
if (field == "pass_precision"):
precision_list.append(value)
if field == "pass_recall":
recall_list.append(value)
if field == "pass_f1_score":
f1_list.append(value)
elif line.startswith("[TrainSet]"):
tokens = line.split(" ")
for token in tokens:
field_value = token.split(":")
if len(field_value) != 2:
continue
field = field_value[0].strip()
value = float(field_value[1].strip("[] "))
if (field == "pass_precision"):
train_precision_list.append(value)
if field == "pass_recall":
train_recall_list.append(value)
if field == "pass_f1_score":
train_f1_list.append(value)
assert len(precision_list) == len(recall_list)
assert len(recall_list) == len(f1_list)
assert len(train_precision_list) == len(train_recall_list)
assert len(train_recall_list) == len(train_f1_list)
for i in xrange(len(precision_list)):
print str(precision_list[i]) + "\t" + str(recall_list[i]) + "\t" + str(f1_list[i]) + "\t" + str(train_precision_list[i]) + "\t" + str(train_recall_list[
i]) + "\t" + str(train_f1_list[i])
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
#!/bin/bash
for i in 1 2 3 4 5 6 7 8 9 10
do
echo $i
python train.py >logfile_wending_$i 2>&1 &
done
此差异已折叠。
此差异已折叠。
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import logging
import os
import re
import argparse
import numpy as np
from collections import defaultdict
logger = logging.getLogger("paddle")
logger.setLevel(logging.INFO)
def get_embedding(emb_file='data/wordVectors.txt'):
"""
Get the trained word vector.
"""
return np.loadtxt(emb_file, dtype='float32')
def load_dict(dict_path):
"""
Load the word dictionary from the given file.
Each line of the given file is a word, which can include multiple columns
seperated by tab.
This function takes the first column (columns in a line are seperated by
tab) as key and takes line number of a line as the key (index of the word
in the dictionary).
"""
return dict((line.strip().split("\t")[0], idx)
for idx, line in enumerate(open(dict_path, "r").readlines()))
def load_reverse_dict(dict_path):
"""
Load the word dictionary from the given file.
Each line of the given file is a word, which can include multiple columns
seperated by tab.
This function takes line number of a line as the key (index of the word in
the dictionary) and the first column (columns in a line are seperated by
tab) as the value.
"""
return dict((idx, line.strip().split("\t")[0])
for idx, line in enumerate(open(dict_path, "r").readlines()))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册