#!/usr/bin/perl -w
# Add current dir to include
use File::Basename;
use lib dirname (__FILE__);
# Version: ROUGE v1.5.5
# Date: 05/26/2005,05/19/2005,04/26/2005,04/03/2005,10/28/2004,10/25/2004,10/21/2004
# Author: Chin-Yew Lin
# Description: Given an evaluation description file, for example: test.xml,
# this script computes the averages of the average ROUGE scores for
# the evaluation pairs listed in the ROUGE evaluation configuration file.
# For more information, please see:
# http://www.isi.edu/~cyl/ROUGE
# For more information about Basic Elements, please see:
# http://www.isi.edu/~cyl/BE
# Revision Note:
# 1.5.5
# (1) Correct stemming on multi-token BE heads and modifiers.
# Previously, only single token heads and modifiers were assumed.
# (2) Correct the resampling routine which ignores the last evaluation
# item in the evaluation list. Therefore, the average scores reported
# by ROUGE is only based on the first N-1 evaluation items.
# Thanks Barry Schiffman at Columbia University to report this bug.
# This bug only affects ROUGE-1.5.X. For pre-1.5 ROUGE, it only affects
# the computation of confidence interval (CI) estimation, i.e. CI is only
# estimated by the first N-1 evaluation items, but it *does not* affect
# average scores.
# (3) Change read_text and read_text_LCS functions to read exact words or
# bytes required by users. Previous versions carry out whitespace
# compression and other string clear up actions before enforce the length
# limit.
# 1.5.4.1
# (1) Minor description change about "-t 0" option.
# 1.5.4
# (1) Add easy evalution mode for single reference evaluations with -z
# option.
# 1.5.3
# (1) Add option to compute ROUGE score based on SIMPLE BE format. Given
# a set of peer and model summary file in BE format with appropriate
# options, ROUGE will compute matching scores based on BE lexical
# matches.
# There are 6 options:
# 1. H : Head only match. This is similar to unigram match but
# only BE Head is used in matching. BEs generated by
# Minipar-based breaker do not include head-only BEs,
# therefore, the score will always be zero. Use HM or HMR
# optiions instead.
# 2. HM : Head and modifier match. This is similar to bigram or
# skip bigram but it's head-modifier bigram match based on
# parse result. Only BE triples with non-NIL modifier are
# included in the matching.
# 3. HMR : Head, modifier, and relation match. This is similar to
# trigram match but it's head-modifier-relation trigram
# match based on parse result. Only BE triples with non-NIL
# relation are included in the matching.
# 4. HM1 : This is combination of H and HM. It is similar to unigram +
# bigram or skip bigram with unigram match but it's
# head-modifier bigram match based on parse result.
# In this case, the modifier field in a BE can be "NIL"
# 5. HMR1 : This is combination of HM and HMR. It is similar to
# trigram match but it's head-modifier-relation trigram
# match based on parse result. In this case, the relation
# field of the BE can be "NIL".
# 6. HMR2 : This is combination of H, HM and HMR. It is similar to
# trigram match but it's head-modifier-relation trigram
# match based on parse result. In this case, the modifier and
# relation fields of the BE can both be "NIL".
# 1.5.2
# (1) Add option to compute ROUGE score by token using the whole corpus
# as average unit instead of individual sentences. Previous versions of
# ROUGE uses sentence (or unit) boundary to break counting unit and takes
# the average score from the counting unit as the final score.
# Using the whole corpus as one single counting unit can potentially
# improve the reliablity of the final score that treats each token as
# equally important; while the previous approach considers each sentence as
# equally important that ignores the length effect of each individual
# sentences (i.e. long sentences contribute equal weight to the final
# score as short sentences.)
# +v1.2 provide a choice of these two counting modes that users can
# choose the one that fits their scenarios.
# 1.5.1
# (1) Add precision oriented measure and f-measure to deal with different lengths
# in candidates and references. Importance between recall and precision can
# be controled by 'alpha' parameter:
# alpha -> 0: recall is more important
# alpha -> 1: precision is more important
# Following Chapter 7 in C.J. van Rijsbergen's "Information Retrieval".
# http://www.dcs.gla.ac.uk/Keith/Chapter.7/Ch.7.html
# F = 1/(alpha * (1/P) + (1 - alpha) * (1/R)) ;;; weighted harmonic mean
# 1.4.2
# (1) Enforce length limit at the time when summary text is read. Previously (before
# and including v1.4.1), length limit was enforced at tokenization time.
# 1.4.1
# (1) Fix potential over counting in ROUGE-L and ROUGE-W
# In previous version (i.e. 1.4 and order), LCS hit is computed
# by summing union hit over all model sentences. Each model sentence
# is compared with all peer sentences and mark the union LCS. The
# length of the union LCS is the hit of that model sentence. The
# final hit is then sum over all model union LCS hits. This potentially
# would over count a peer sentence which already been marked as contributed
# to some other model sentence. Therefore, double counting is resulted.
# This is seen in evalution where ROUGE-L score is higher than ROUGE-1 and
# this is not correct.
# ROUGEeval-1.4.1.pl fixes this by add a clip function to prevent
# double counting.
# 1.4
# (1) Remove internal Jackknifing procedure:
# Now the ROUGE script will use all the references listed in the
# section in each section and no
# automatic Jackknifing is performed. Please see RELEASE-NOTE.txt
# for more details.
# 1.3
# (1) Add skip bigram
# (2) Add an option to specify the number of sampling point (default is 1000)
# 1.2.3
# (1) Correct the enviroment variable option: -e. Now users can specify evironment
# variable ROUGE_EVAL_HOME using the "-e" option; previously this option is
# not active. Thanks Zhouyan Li of Concordia University, Canada pointing this
# out.
# 1.2.2
# (1) Correct confidence interval calculation for median, maximum, and minimum.
# Line 390.
# 1.2.1
# (1) Add sentence per line format input format. See files in Verify-SPL for examples.
# (2) Streamline command line arguments.
# (3) Use bootstrap resampling to estimate confidence intervals instead of using t-test
# or z-test which assume a normal distribution.
# (4) Add LCS (longest common subsequence) evaluation method.
# (5) Add WLCS (weighted longest common subsequence) evaluation method.
# (6) Add length cutoff in bytes.
# (7) Add an option to specify the longest ngram to compute. The default is 4.
# 1.2
# (1) Change zero condition check in subroutine &computeNGramScores when
# computing $gram1Score from
# if($totalGram2Count!=0) to
# if($totalGram1Count!=0)
# Thanks Ken Litkowski for this bug report.
# This original script will set gram1Score to zero if there is no
# bigram matches. This should rarely has significant affect the final score
# since (a) there are bigram matches most of time; (b) the computation
# of gram1Score is using Jackknifing procedure. However, this definitely
# did not compute the correct $gram1Score when there is no bigram matches.
# Therefore, users of version 1.1 should definitely upgrade to newer
# version of the script that does not contain this bug.
# Note: To use this script, two additional data files are needed:
# (1) smart_common_words.txt - contains stopword list from SMART IR engine
# (2) WordNet-2.0.exc.db - WordNet 2.0 exception inflexion database
# These two files have to be put in a directory pointed by the environment
# variable: "ROUGE_EVAL_HOME".
# If environment variable ROUGE_EVAL_HOME does not exist, this script will
# will assume it can find these two database files in the current directory.
# COPYRIGHT (C) UNIVERSITY OF SOUTHERN CALIFORNIA, 2002,2003,2004
# University of Southern California
# Information Sciences Institute
# 4676 Admiralty Way
# Marina Del Rey, California 90292-6695
#
# This software was partially developed under SPAWAR Grant No.
# N66001-00-1-8916 , and the Government holds license rights under
# DAR 7-104.9(a)(c)(1). It is
# transmitted outside of the University of Southern California only under
# written license agreements or software exchange agreements, and its use
# is limited by these agreements. At no time shall any recipient use
# this software in any manner which conflicts or interferes with the
# governmental license rights or other provisions of the governing
# agreement under which it is obtained. It is supplied "AS IS," without
# any warranties of any kind. It is furnished only on the basis that any
# party who receives it indemnifies and holds harmless the parties who
# furnish and originate it against any claims, demands or liabilities
# connected with using it, furnishing it to others or providing it to a
# third party. THIS NOTICE MUST NOT BE REMOVED FROM THE SOFTWARE,
# AND IN THE EVENT THAT THE SOFTWARE IS DIVIDED, IT SHOULD BE
# ATTACHED TO EVERY PART.
#
# Contributor to its design is Chin-Yew Lin.
use XML::DOM;
use DB_File;
use Getopt::Std;
#-------------------------------------------------------------------------------------
use vars qw($opt_a $opt_b $opt_c $opt_d $opt_e $opt_f $opt_h $opt_H $opt_m $opt_n $opt_p $opt_s $opt_t $opt_l $opt_v $opt_w $opt_2 $opt_u $opt_x $opt_U $opt_3 $opt_M $opt_z);
my $usageFull="$0\n [-a (evaluate all systems)]
[-c cf]
[-d (print per evaluation scores)]
[-e ROUGE_EVAL_HOME]
[-h (usage)]
[-H (detailed usage)]
[-b n-bytes|-l n-words]
[-m (use Porter stemmer)]
[-n max-ngram]
[-s (remove stopwords)]
[-r number-of-samples (for resampling)]
[-2 max-gap-length (if < 0 then no gap length limit)]
[-3 (for scoring based on BE)]
[-u (include unigram in skip-bigram) default no)]
[-U (same as -u but also compute regular skip-bigram)]
[-w weight (weighting factor for WLCS)]
[-v (verbose)]
[-x (do not calculate ROUGE-L)]
[-f A|B (scoring formula)]
[-p alpha (0 <= alpha <=1)]
[-t 0|1|2 (count by token instead of sentence)]
[-z ]
[]\n
".
"ROUGE-eval-config-file: Specify the evaluation setup. Three files come with the ROUGE evaluation package, i.e.\n".
" ROUGE-test.xml, verify.xml, and verify-spl.xml are good examples.\n".
"systemID: Specify which system in the ROUGE-eval-config-file to perform the evaluation.\n".
" If '-a' option is used, then all systems are evaluated and users do not need to\n".
" provide this argument.\n".
"Default:\n".
" When running ROUGE without supplying any options (except -a), the following defaults are used:\n".
" (1) ROUGE-L is computed;\n".
" (2) 95% confidence interval;\n".
" (3) No stemming;\n".
" (4) Stopwords are inlcuded in the calculations;\n".
" (5) ROUGE looks for its data directory first through the ROUGE_EVAL_HOME environment variable. If\n".
" it is not set, the current directory is used.\n".
" (6) Use model average scoring formula.\n".
" (7) Assign equal importance of ROUGE recall and precision in computing ROUGE f-measure, i.e. alpha=0.5.\n".
" (8) Compute average ROUGE by averaging sentence (unit) ROUGE scores.\n".
"Options:\n".
" -2: Compute skip bigram (ROGUE-S) co-occurrence, also specify the maximum gap length between two words (skip-bigram)\n".
" -u: Compute skip bigram as -2 but include unigram, i.e. treat unigram as \"start-sentence-symbol unigram\"; -2 has to be specified.\n".
" -3: Compute BE score. Currently only SIMPLE BE triple format is supported.\n".
" H -> head only scoring (does not applied to Minipar-based BEs).\n".
" HM -> head and modifier pair scoring.\n".
" HMR -> head, modifier and relation triple scoring.\n".
" HM1 -> H and HM scoring (same as HM for Minipar-based BEs).\n".
" HMR1 -> HM and HMR scoring (same as HMR for Minipar-based BEs).\n".
" HMR2 -> H, HM and HMR scoring (same as HMR for Minipar-based BEs).\n".
" -a: Evaluate all systems specified in the ROUGE-eval-config-file.\n".
" -c: Specify CF\% (0 <= CF <= 100) confidence interval to compute. The default is 95\% (i.e. CF=95).\n".
" -d: Print per evaluation average score for each system.\n".
" -e: Specify ROUGE_EVAL_HOME directory where the ROUGE data files can be found.\n".
" This will overwrite the ROUGE_EVAL_HOME specified in the environment variable.\n".
" -f: Select scoring formula: 'A' => model average; 'B' => best model\n".
" -h: Print usage information.\n".
" -H: Print detailed usage information.\n".
" -b: Only use the first n bytes in the system/peer summary for the evaluation.\n".
" -l: Only use the first n words in the system/peer summary for the evaluation.\n".
" -m: Stem both model and system summaries using Porter stemmer before computing various statistics.\n".
" -n: Compute ROUGE-N up to max-ngram length will be computed.\n".
" -p: Relative importance of recall and precision ROUGE scores. Alpha -> 1 favors precision, Alpha -> 0 favors recall.\n".
" -s: Remove stopwords in model and system summaries before computing various statistics.\n".
" -t: Compute average ROUGE by averaging over the whole test corpus instead of sentences (units).\n".
" 0: use sentence as counting unit, 1: use token as couting unit, 2: same as 1 but output raw counts\n".
" instead of precision, recall, and f-measure scores. 2 is useful when computation of the final,\n".
" precision, recall, and f-measure scores will be conducted later.\n".
" -r: Specify the number of sampling point in bootstrap resampling (default is 1000).\n".
" Smaller number will speed up the evaluation but less reliable confidence interval.\n".
" -w: Compute ROUGE-W that gives consecutive matches of length L in an LCS a weight of 'L^weight' instead of just 'L' as in LCS.\n".
" Typically this is set to 1.2 or other number greater than 1.\n".
" -v: Print debugging information for diagnositic purpose.\n".
" -x: Do not calculate ROUGE-L.\n".
" -z: ROUGE-eval-config-file is a list of peer-model pair per line in the specified format (SEE|SPL|ISI|SIMPLE).\n";
my $usage="$0\n [-a (evaluate all systems)]
[-c cf]
[-d (print per evaluation scores)]
[-e ROUGE_EVAL_HOME]
[-h (usage)]
[-H (detailed usage)]
[-b n-bytes|-l n-words]
[-m (use Porter stemmer)]
[-n max-ngram]
[-s (remove stopwords)]
[-r number-of-samples (for resampling)]
[-2 max-gap-length (if < 0 then no gap length limit)]
[-3 (for scoring based on BE)]
[-u (include unigram in skip-bigram) default no)]
[-U (same as -u but also compute regular skip-bigram)]
[-w weight (weighting factor for WLCS)]
[-v (verbose)]
[-x (do not calculate ROUGE-L)]
[-f A|B (scoring formula)]
[-p alpha (0 <= alpha <=1)]
[-t 0|1|2 (count by token instead of sentence)]
[-z ]
[]
";
getopts('ahHb:c:de:f:l:mMn:p:st:r:2:3:w:uUvxz:');
my $systemID;
die $usageFull if defined($opt_H);
die $usage if defined($opt_h)||@ARGV==0;
die "Please specify the ROUGE configuration file or use option '-h' for help\n" if(@ARGV==0);
if(@ARGV==1&&defined($opt_z)) {
$systemID="X"; # default system ID
}
elsif(@ARGV==1&&!defined($opt_a)) {
die "Please specify a system ID to evaluate or use option '-a' to evaluate all systems. For more information, use option '-h'.\n";
}
elsif(@ARGV==2) {
$systemID=$ARGV[1];
}
if(defined($opt_e)) {
$stopwords="$opt_e/smart_common_words.txt";
$wordnetDB="$opt_e/WordNet-2.0.exc.db";
}
else {
if(exists($ENV{"ROUGE_EVAL_HOME"})) {
$stopwords="$ENV{\"ROUGE_EVAL_HOME\"}/smart_common_words.txt";
$wordnetDB="$ENV{\"ROUGE_EVAL_HOME\"}/WordNet-2.0.exc.db";
}
elsif(exists($ENV{"RED_EVAL_HOME"})) {
$stopwords="$ENV{\"RED_EVAL_HOME\"}/smart_common_words.txt";
$wordnetDB="$ENV{\"RED_EVAL_HOME\"}/WordNet-2.0.exc.db";
}
else {
# if no environment variable exists then assume data files are in the current directory
$stopwords="smart_common_words.txt";
$wordnetDB="WordNet-2.0.exc.db";
}
}
if(defined($opt_s)) {
$useStopwords=0; # do not use stop words
}
else {
$useStopwords=1; # use stop words
}
if(defined($opt_l)&&defined($opt_b)) {
die "Please specify length limit in words or bytes but not both.\n";
}
if(defined($opt_l)) {
$lengthLimit=$opt_l;
$byteLimit=0; # no byte limit
}
elsif(defined($opt_b)) {
$lengthLimit=0; # no length limit in words
$byteLimit=$opt_b;
}
else {
$byteLimit=0; # no byte limit
$lengthLimit=0; # no length limit
}
unless(defined($opt_c)) {
$opt_c=95;
}
else {
if($opt_c<0||$opt_c>100) {
die "Confidence interval should be within 0 and 100. Use option -h for more details.\n";
}
}
if(defined($opt_w)) {
if($opt_w>0) {
$weightFactor=$opt_w;
}
else {
die "ROUGE-W weight factor must greater than 0.\n";
}
}
#unless(defined($opt_n)) {
# $opt_n=4; # default maximum ngram is 4
#}
if(defined($opt_v)) {
$debug=1;
}
else {
$debug=0;
}
if(defined($opt_r)) {
$numOfResamples=$opt_r;
}
else {
$numOfResamples=1000;
}
if(defined($opt_2)) {
$skipDistance=$opt_2;
}
if(defined($opt_3)) {
$BEMode=$opt_3;
}
if(defined($opt_f)) {
$scoreMode=$opt_f;
}
else {
$scoreMode="A"; # default: use model average scoring formula
}
if(defined($opt_p)) {
$alpha=$opt_p;
if($alpha<0||
$alpha>1) {
die "Relative importance of ROUGE recall and precision has to be between 0 and 1 inclusively.\n";
}
}
else {
$alpha=0.5; # default is equal importance of ROUGE recall and precision
}
if(defined($opt_t)) {
# make $opt_t as undef when appropriate option is given
# when $opt_t is undef, sentence level average will be used
if($opt_t==0) {
$opt_t=undef;
}
elsif($opt_t!=1&&
$opt_t!=2) {
$opt_t=undef; # other than 1 or 2, let $opt_t to be undef
}
}
if(defined($opt_z)) {
# If opt_z is specified, the user has to specify a system ID that
# is used for identification therefore -a option is not allowed.
# Here we make it undef.
$opt_a=undef;
}
#-------------------------------------------------------------------------------------
# Setup ROUGE scoring parameters
%ROUGEParam=(); # ROUGE scoring parameter
if(defined($lengthLimit)) {
$ROUGEParam{"LENGTH"}=$lengthLimit;
}
else {
$ROUGEParam{"LENGTH"}=undef;
}
if(defined($byteLimit)) {
$ROUGEParam{"BYTE"}=$byteLimit;
}
else {
$ROUGEParam{"BYTE"}=undef;
}
if(defined($opt_n)) { # ngram size
$ROUGEParam{"NSIZE"}=$opt_n;
}
else {
$ROUGEParam{"NSIZE"}=undef;
}
if(defined($weightFactor)) {
$ROUGEParam{"WEIGHT"}=$weightFactor;
}
else {
$ROUGEParam{"WEIGHT"}=undef;
}
if(defined($skipDistance)) {
$ROUGEParam{"SD"}=$skipDistance;
}
else {
$ROUGEParam{"SD"}=undef;
}
if(defined($scoreMode)) {
$ROUGEParam{"SM"}=$scoreMode;
}
else {
$ROUGEParam{"SM"}=undef;
}
if(defined($alpha)) {
$ROUGEParam{"ALPHA"}=$alpha;
}
else {
$ROUGEParam{"ALPHA"}=undef;
}
if(defined($opt_t)) {
$ROUGEParam{"AVERAGE"}=$opt_t;
}
else {
$ROUGEParam{"AVERAGE"}=undef;
}
if(defined($opt_3)) {
$ROUGEParam{"BEMODE"}=$opt_3;
}
else {
$ROUGEParam{"BEMODE"}=undef;
}
#-------------------------------------------------------------------------------------
# load stopwords
%stopwords=();
open(STOP,$stopwords)||die "Cannot open $stopwords\n";
while(defined($line=)) {
chomp($line);
$stopwords{$line}=1;
}
close(STOP);
# load WordNet database
if(-e "$wordnetDB") {
tie %exceptiondb,'DB_File',"$wordnetDB",O_RDONLY,0440,$DB_HASH or
die "Cannot open exception db file for reading: $wordnetDB\n";
}
else {
die "Cannot open exception db file for reading: $wordnetDB\n";
}
#-------------------------------------------------------------------------------------
# Initialize Porter Stemmer
&initialise();
#-------------------------------------------------------------------------------------
# Read and parse the document
my $parser = new XML::DOM::Parser;
my $doc;
unless(defined($opt_z)) {
$doc=$parser->parsefile($ARGV[0]);
}
else {
open($doc,$ARGV[0])||die "Cannot open $ARGV[0]\n";
}
%ROUGEEvals=();
@ROUGEEvalIDs=();
%ROUGEPeerIDTable=();
@allPeerIDs=();
%knownMissing=(); # remember missing submission already known
if(defined($doc)) {
# read evaluation description file
&readEvals(\%ROUGEEvals,\@ROUGEEvalIDs,\%ROUGEPeerIDTable,$doc,undef);
# print evaluation configuration
if(defined($opt_z)) {
if(defined($ARGV[1])) {
$systemID=$ARGV[1];
}
else {
$systemID="X"; # default system ID in BE file list evaluation mode
}
push(@allPeerIDs,$systemID);
}
else {
unless(defined($opt_a)) {
$systemID=$ARGV[1];
push(@allPeerIDs,$systemID);
}
else {
# run evaluation for each peer listed in the description file
@allPeerIDs=sort (keys %ROUGEPeerIDTable);
}
}
foreach $peerID (@allPeerIDs) {
%testIDs=();
# print "\@PEER($peerID)--------------------------------------------------\n";
if(defined($opt_n)) {
# evaluate a specific peer
# compute ROUGE score up to $opt_n-gram
for($n=1;$n<=$opt_n;$n++) {
my (%ROUGEScores,%ROUGEAverages);
%ROUGEScores=();
foreach $e (@ROUGEEvalIDs) {
if($debug) {
print "\@Eval ($e)\n";
}
$ROUGEParam{"NSIZE"}=$n;
&computeROUGEX("N",\%ROUGEScores,$e,$ROUGEEvals{$e},$peerID,\%ROUGEParam);
}
# compute averages
%ROUGEAverages=();
&computeAverages(\%ROUGEScores,\%ROUGEAverages,$opt_t);
&printResults($peerID,\%ROUGEAverages,\%ROUGEScores,"ROUGE-$n",$opt_c,$opt_t,$opt_d);
}
}
unless(defined($opt_x)||defined($opt_3)) {
#-----------------------------------------------
# compute LCS score
%ROUGEScores=();
foreach $e (@ROUGEEvalIDs) {
&computeROUGEX("L",\%ROUGEScores,$e,$ROUGEEvals{$e},$peerID,\%ROUGEParam);
}
# compute averages
%ROUGEAverages=();
&computeAverages(\%ROUGEScores,\%ROUGEAverages,$opt_t);
&printResults($peerID,\%ROUGEAverages,\%ROUGEScores,"ROUGE-L",$opt_c,$opt_t,$opt_d);
}
if(defined($opt_w)) {
#-----------------------------------------------
# compute WLCS score
%ROUGEScores=();
foreach $e (@ROUGEEvalIDs) {
&computeROUGEX("W",\%ROUGEScores,$e,$ROUGEEvals{$e},$peerID,\%ROUGEParam);
}
# compute averages
%ROUGEAverages=();
&computeAverages(\%ROUGEScores,\%ROUGEAverages,$opt_t);
&printResults($peerID,\%ROUGEAverages,\%ROUGEScores,"ROUGE-W-$weightFactor",$opt_c,$opt_t,$opt_d);
}
if(defined($opt_2)) {
#-----------------------------------------------
# compute skip bigram score
%ROUGEScores=();
foreach $e (@ROUGEEvalIDs) {
&computeROUGEX("S",\%ROUGEScores,$e,$ROUGEEvals{$e},$peerID,\%ROUGEParam);
}
# compute averages
%ROUGEAverages=();
&computeAverages(\%ROUGEScores,\%ROUGEAverages,$opt_t);
if($skipDistance>=0) {
if(defined($opt_u)) {
&printResults($peerID,\%ROUGEAverages,\%ROUGEScores,"ROUGE-SU$skipDistance",$opt_c,$opt_t,$opt_d);
}
elsif(defined($opt_U)) {
# print regular skip bigram results
&printResults($peerID,\%ROUGEAverages,\%ROUGEScores,"ROUGE-S$skipDistance",$opt_c,$opt_t,$opt_d);
#-----------------------------------------------
# compute skip bigram with unigram extension score
$opt_u=1;
%ROUGEScores=();
foreach $e (@ROUGEEvalIDs) {
&computeROUGEX("S",\%ROUGEScores,$e,$ROUGEEvals{$e},$peerID,\%ROUGEParam);
}
$opt_u=undef;
# compute averages
%ROUGEAverages=();
&computeAverages(\%ROUGEScores,\%ROUGEAverages,$opt_t);
&printResults($peerID,\%ROUGEAverages,\%ROUGEScores,"ROUGE-SU$skipDistance",$opt_c,$opt_t,$opt_d);
}
else {
&printResults($peerID,\%ROUGEAverages,\%ROUGEScores,"ROUGE-S$skipDistance",$opt_c,$opt_t,$opt_d);
}
}
else {
if(defined($opt_u)) {
&printResults($peerID,\%ROUGEAverages,\%ROUGEScores,"ROUGE-SU*",$opt_c,$opt_t,$opt_d);
}
else {
&printResults($peerID,\%ROUGEAverages,\%ROUGEScores,"ROUGE-S*",$opt_c,$opt_t,$opt_d);
if(defined($opt_U)) {
#-----------------------------------------------
# compute skip bigram with unigram extension score
$opt_u=1;
%ROUGEScores=();
foreach $e (@ROUGEEvalIDs) {
&computeROUGEX("S",\%ROUGEScores,$e,$ROUGEEvals{$e},$peerID,\%ROUGEParam);
}
$opt_u=undef;
# compute averages
%ROUGEAverages=();
&computeAverages(\%ROUGEScores,\%ROUGEAverages,$opt_t);
&printResults($peerID,\%ROUGEAverages,\%ROUGEScores,"ROUGE-SU*",$opt_c,$opt_t,$opt_d);
}
}
}
}
if(defined($opt_3)) {
#-----------------------------------------------
# compute Basic Element triple score
%ROUGEScores=();
foreach $e (@ROUGEEvalIDs) {
&computeROUGEX("BE",\%ROUGEScores,$e,$ROUGEEvals{$e},$peerID,\%ROUGEParam);
}
# compute averages
%ROUGEAverages=();
&computeAverages(\%ROUGEScores,\%ROUGEAverages,$opt_t);
&printResults($peerID,\%ROUGEAverages,\%ROUGEScores,"ROUGE-BE-$BEMode",$opt_c,$opt_t,$opt_d);
}
}
}
else {
die "Document undefined\n";
}
if(defined($opt_z)) {
close($doc);
}
untie %exceptiondb;
sub printResults {
my $peerID=shift;
my $ROUGEAverages=shift;
my $ROUGEScores=shift;
my $methodTag=shift;
my $opt_c=shift;
my $opt_t=shift;
my $opt_d=shift;
print "---------------------------------------------\n";
if(!defined($opt_t)||$opt_t==1) {
print "$peerID $methodTag Average_R: $ROUGEAverages->{'AvgR'} ";
print "($opt_c\%-conf.int. $ROUGEAverages->{'CIAvgL_R'} - $ROUGEAverages->{'CIAvgU_R'})\n";
print "$peerID $methodTag Average_P: $ROUGEAverages->{'AvgP'} ";
print "($opt_c\%-conf.int. $ROUGEAverages->{'CIAvgL_P'} - $ROUGEAverages->{'CIAvgU_P'})\n";
print "$peerID $methodTag Average_F: $ROUGEAverages->{'AvgF'} ";
print "($opt_c\%-conf.int. $ROUGEAverages->{'CIAvgL_F'} - $ROUGEAverages->{'CIAvgU_F'})\n";
}
else {
print "$peerID $methodTag M_count: ";
print int($ROUGEAverages->{'M_cnt'});
print " P_count: ";
print int($ROUGEAverages->{'P_cnt'});
print " H_count: ";
print int($ROUGEAverages->{'H_cnt'});
print "\n";
}
if(defined($opt_d)) {
print ".............................................\n";
&printPerEvalData($ROUGEScores,"$peerID $methodTag Eval");
}
}
sub bootstrapResampling {
my $scores=shift;
my $instances=shift;
my $seed=shift;
my $opt_t=shift;
my $sample;
my ($i,$ridx);
# Use $seed to seed the random number generator to make sure
# we have the same random sequence every time, therefore a
# consistent estimation of confidence interval in different runs.
# This is not necessary. To ensure a consistent result in reporting
# results using ROUGE, this is implemented.
srand($seed);
for($i=0;$i<@{$instances};$i++) {
# generate a random index
$ridx=int(rand(@{$instances}));
unless(defined($sample)) {
# setup the resampling array
$sample=[];
push(@$sample,$scores->{$instances->[$ridx]}[0]);
push(@$sample,$scores->{$instances->[$ridx]}[1]);
push(@$sample,$scores->{$instances->[$ridx]}[2]);
}
else {
# update the resampling array
$sample->[0]+=$scores->{$instances->[$ridx]}[0];
$sample->[1]+=$scores->{$instances->[$ridx]}[1];
$sample->[2]+=$scores->{$instances->[$ridx]}[2];
}
}
# compute the average result for this resampling procedure
unless(defined($opt_t)) {
# per instance or sentence average
if(@{$instances}>0) {
$sample->[0]/=@{$instances};
$sample->[1]/=@{$instances};
$sample->[2]/=@{$instances};
}
else {
$sample->[0]=0;
$sample->[1]=0;
$sample->[2]=0;
}
}
else {
if($opt_t==1) {
# per token or corpus level average
# output recall, precision, and f-measure score
my ($tmpR,$tmpP,$tmpF);
if($sample->[0]>0) {
$tmpR=$sample->[2]/$sample->[0]; # recall
}
else {
$tmpR=0;
}
if($sample->[1]>0) {
$tmpP=$sample->[2]/$sample->[1]; # precision
}
else {
$tmpP=0;
}
if((1-$alpha)*$tmpP+$alpha*$tmpR>0) {
$tmpF=($tmpR*$tmpP)/((1-$alpha)*$tmpP+$alpha*$tmpR); # f-measure
}
else {
$tmpF=0;
}
$sample->[0]=$tmpR;
$sample->[1]=$tmpP;
$sample->[2]=$tmpF;
}
else {
# $opt_t!=1 => output raw model token count, peer token count, and hit count
# do nothing, just return $sample
}
}
return $sample;
}
sub by_value {
$a<=>$b;
}
sub printPerEvalData {
my $ROUGEScores=shift;
my $tag=shift; # tag to identify each evaluation
my (@instances,$i,$j);
@instances=sort by_evalID (keys %$ROUGEScores);
foreach $i (@instances) {
# print average per evaluation score
print "$tag $i R:$ROUGEScores->{$i}[0] P:$ROUGEScores->{$i}[1] F:$ROUGEScores->{$i}[2]\n";
}
}
sub by_evalID {
my ($a1,$b1);
if($a=~/^([0-9]+)/o) {
$a1=$1;
}
if($b=~/^([0-9]+)/o) {
$b1=$1;
}
if(defined($a1)&&defined($b1)) {
return $a1<=>$b1;
}
else {
return $a cmp $b;
}
}
sub computeAverages {
my $ROUGEScores=shift;
my $ROUGEAverages=shift;
my $opt_t=shift;
my ($avgAvgROUGE_R,$resampleAvgROUGE_R);
my ($avgAvgROUGE_P,$resampleAvgROUGE_P);
my ($avgAvgROUGE_F,$resampleAvgROUGE_F);
my ($ciU,$ciL);
my (@instances,$i,$j,@rankedArray_R,@rankedArray_P,@RankedArray_F);
@instances=sort (keys %$ROUGEScores);
$avgAvgROUGE_R=0;
$avgAvgROUGE_P=0;
$avgAvgROUGE_F=0;
$resampleAvgROUGE_R=0;
$resampleAvgROUGE_P=0;
$resampleAvgROUGE_F=0;
# compute totals
foreach $i (@instances) {
$avgAvgROUGE_R+=$ROUGEScores->{$i}[0]; # recall ; or model token count
$avgAvgROUGE_P+=$ROUGEScores->{$i}[1]; # precision ; or peer token count
$avgAvgROUGE_F+=$ROUGEScores->{$i}[2]; # f1-measure ; or match token count (hit)
}
# compute averages
unless(defined($opt_t)) {
# per sentence average
if((scalar @instances)>0) {
$avgAvgROUGE_R=sprintf("%7.5f",$avgAvgROUGE_R/(scalar @instances));
$avgAvgROUGE_P=sprintf("%7.5f",$avgAvgROUGE_P/(scalar @instances));
$avgAvgROUGE_F=sprintf("%7.5f",$avgAvgROUGE_F/(scalar @instances));
}
else {
$avgAvgROUGE_R=sprintf("%7.5f",0);
$avgAvgROUGE_P=sprintf("%7.5f",0);
$avgAvgROUGE_F=sprintf("%7.5f",0);
}
}
else {
if($opt_t==1) {
# per token average on corpus level
my ($tmpR,$tmpP,$tmpF);
if($avgAvgROUGE_R>0) {
$tmpR=$avgAvgROUGE_F/$avgAvgROUGE_R;
}
else {
$tmpR=0;
}
if($avgAvgROUGE_P>0) {
$tmpP=$avgAvgROUGE_F/$avgAvgROUGE_P;
}
else {
$tmpP=0;
}
if((1-$alpha)*$tmpP+$alpha*$tmpR>0) {
$tmpF=($tmpR+$tmpP)/((1-$alpha)*$tmpP+$alpha*$tmpR);
}
else {
$tmpF=0;
}
$avgAvgROUGE_R=sprintf("%7.5f",$tmpR);
$avgAvgROUGE_P=sprintf("%7.5f",$tmpP);
$avgAvgROUGE_F=sprintf("%7.5f",$tmpF);
}
}
if(!defined($opt_t)||$opt_t==1) {
# compute confidence intervals using bootstrap resampling
@ResamplingArray=();
for($i=0;$i<$numOfResamples;$i++) {
my $sample;
$sample=&bootstrapResampling($ROUGEScores,\@instances,$i,$opt_t);
# sample contains average sum of the sample
if(@ResamplingArray==0) {
# setup the resampling array for Avg
my $s;
$s=[];
push(@$s,$sample->[0]);
push(@ResamplingArray,$s);
$s=[];
push(@$s,$sample->[1]);
push(@ResamplingArray,$s);
$s=[];
push(@$s,$sample->[2]);
push(@ResamplingArray,$s);
}
else {
$rsa=$ResamplingArray[0];
push(@{$rsa},$sample->[0]);
$rsa=$ResamplingArray[1];
push(@{$rsa},$sample->[1]);
$rsa=$ResamplingArray[2];
push(@{$rsa},$sample->[2]);
}
}
# sort resampling results
{
# recall
@rankedArray_R=sort by_value (@{$ResamplingArray[0]});
$ResamplingArray[0]=\@rankedArray_R;
for($x=0;$x<=$#rankedArray_R;$x++) {
$resampleAvgROUGE_R+=$rankedArray_R[$x];
# print "*R ($x): $rankedArray_R[$x]\n";
}
$resampleAvgROUGE_R=sprintf("%7.5f",$resampleAvgROUGE_R/(scalar @rankedArray_R));
# precision
@rankedArray_P=sort by_value (@{$ResamplingArray[1]});
$ResamplingArray[1]=\@rankedArray_P;
for($x=0;$x<=$#rankedArray_P;$x++) {
$resampleAvgROUGE_P+=$rankedArray_P[$x];
# print "*P ($x): $rankedArray_P[$x]\n";
}
$resampleAvgROUGE_P=sprintf("%7.5f",$resampleAvgROUGE_P/(scalar @rankedArray_P));
# f1-measure
@rankedArray_F=sort by_value (@{$ResamplingArray[2]});
$ResamplingArray[2]=\@rankedArray_F;
for($x=0;$x<=$#rankedArray_F;$x++) {
$resampleAvgROUGE_F+=$rankedArray_F[$x];
# print "*F ($x): $rankedArray_F[$x]\n";
}
$resampleAvgROUGE_F=sprintf("%7.5f",$resampleAvgROUGE_F/(scalar @rankedArray_F));
}
# $ciU=999-int((100-$opt_c)*10/2); # upper bound index
# $ciL=int((100-$opt_c)*10/2); # lower bound index
$delta=$numOfResamples*((100-$opt_c)/2.0)/100.0;
$ciUa=int($numOfResamples-$delta-1); # upper confidence interval lower index
$ciUb=$ciUa+1; # upper confidence interval upper index
$ciLa=int($delta); # lower confidence interval lower index
$ciLb=$ciLa+1; # lower confidence interval upper index
$ciR=$numOfResamples-$delta-1-$ciUa; # ratio bewteen lower and upper indexes
# $ROUGEAverages->{"AvgR"}=$avgAvgROUGE_R;
#-------
# recall
$ROUGEAverages->{"AvgR"}=$resampleAvgROUGE_R;
# find condifence intervals; take maximum distance from the mean
$ROUGEAverages->{"CIAvgL_R"}=sprintf("%7.5f",$ResamplingArray[0][$ciLa]+
($ResamplingArray[0][$ciLb]-$ResamplingArray[0][$ciLa])*$ciR);
$ROUGEAverages->{"CIAvgU_R"}=sprintf("%7.5f",$ResamplingArray[0][$ciUa]+
($ResamplingArray[0][$ciUb]-$ResamplingArray[0][$ciUa])*$ciR);
#-------
# precision
$ROUGEAverages->{"AvgP"}=$resampleAvgROUGE_P;
# find condifence intervals; take maximum distance from the mean
$ROUGEAverages->{"CIAvgL_P"}=sprintf("%7.5f",$ResamplingArray[1][$ciLa]+
($ResamplingArray[1][$ciLb]-$ResamplingArray[1][$ciLa])*$ciR);
$ROUGEAverages->{"CIAvgU_P"}=sprintf("%7.5f",$ResamplingArray[1][$ciUa]+
($ResamplingArray[1][$ciUb]-$ResamplingArray[1][$ciUa])*$ciR);
#-------
# f1-measure
$ROUGEAverages->{"AvgF"}=$resampleAvgROUGE_F;
# find condifence intervals; take maximum distance from the mean
$ROUGEAverages->{"CIAvgL_F"}=sprintf("%7.5f",$ResamplingArray[2][$ciLa]+
($ResamplingArray[2][$ciLb]-$ResamplingArray[2][$ciLa])*$ciR);
$ROUGEAverages->{"CIAvgU_F"}=sprintf("%7.5f",$ResamplingArray[2][$ciUa]+
($ResamplingArray[2][$ciUb]-$ResamplingArray[2][$ciUa])*$ciR);
$ROUGEAverages->{"M_cnt"}=$avgAvgROUGE_R; # model token count
$ROUGEAverages->{"P_cnt"}=$avgAvgROUGE_P; # peer token count
$ROUGEAverages->{"H_cnt"}=$avgAvgROUGE_F; # hit token count
}
else {
# $opt_t==2 => output raw count instead of precision, recall, and f-measure values
# in this option, no resampling is necessary, just output the raw counts
$ROUGEAverages->{"M_cnt"}=$avgAvgROUGE_R; # model token count
$ROUGEAverages->{"P_cnt"}=$avgAvgROUGE_P; # peer token count
$ROUGEAverages->{"H_cnt"}=$avgAvgROUGE_F; # hit token count
}
}
sub computeROUGEX {
my $metric=shift; # which ROUGE metric to compute?
my $ROUGEScores=shift;
my $evalID=shift;
my $ROUGEEval=shift; # one particular evaluation pair
my $peerID=shift; # a specific peer ID
my $ROUGEParam=shift; # ROUGE scoring parameters
my $lengthLimit; # lenght limit in words
my $byteLimit; # length limit in bytes
my $NSIZE; # ngram size for ROUGE-N
my $weightFactor; # weight factor for ROUGE-W
my $skipDistance; # skip distance for ROUGE-S
my $scoreMode; # scoring mode: A = model average; B = best model
my $alpha; # relative importance between recall and precision
my $opt_t; # ROUGE score counting mode
my $BEMode; # Basic Element scoring mode
my ($c,$cx,@modelPaths,$modelIDs,$modelRoot,$inputFormat);
$lengthLimit=$ROUGEParam->{"LENGTH"};
$byteLimit=$ROUGEParam->{"BYTE"};
$NSIZE=$ROUGEParam->{"NSIZE"};
$weightFactor=$ROUGEParam->{"WEIGHT"};
$skipDistance=$ROUGEParam->{"SD"};
$scoreMode=$ROUGEParam->{"SM"};
$alpha=$ROUGEParam->{"ALPHA"};
$opt_t=$ROUGEParam->{"AVERAGE"};
$BEMode=$ROUGEParam->{"BEMODE"};
# Check to see if this evaluation trial contains this $peerID.
# Sometimes not every peer provides response for each
# evaluation trial.
unless(exists($ROUGEEval->{"Ps"}{$peerID})) {
unless(exists($knownMissing{$evalID})) {
$knownMissing{$evalID}={};
}
unless(exists($knownMissing{$evalID}{$peerID})) {
print STDERR "\*ROUGE Warning: test instance for peer $peerID does not exist for evaluation $evalID\n";
$knownMissing{$evalID}{$peerID}=1;
}
return;
}
unless(defined($opt_z)) {
$peerPath=$ROUGEEval->{"PR"}."/".$ROUGEEval->{"Ps"}{$peerID};
}
else {
# if opt_z is set then peerPath is read from a file list that
# includes the path to the peer.
$peerPath=$ROUGEEval->{"Ps"}{$peerID};
}
if(defined($ROUGEEval->{"MR"})) {
$modelRoot=$ROUGEEval->{"MR"};
}
else {
# if opt_z is set then modelPath is read from a file list that
# includes the path to the model.
$modelRoot="";
}
$modelIDs=$ROUGEEval->{"MIDList"};
$inputFormat=$ROUGEEval->{"IF"};
# construct combined model
@modelPaths=(); # reset model paths
for($cx=0;$cx<=$#{$modelIDs};$cx++) {
my $modelID;
$modelID=$modelIDs->[$cx];
unless(defined($opt_z)) {
$modelPath="$modelRoot/$ROUGEEval->{\"Ms\"}{$modelID}"; # get full model path
}
else {
# if opt_z is set then modelPath is read from a file list that
# includes the full path to the model.
$modelPath="$ROUGEEval->{\"Ms\"}{$modelID}"; # get full model path
}
if(-e "$modelPath") {
# print "*$modelPath\n";
}
else {
die "Cannot find model summary: $modelPath\n";
}
push(@modelPaths,$modelPath);
}
#---------------------------------------------------------------
# evaluate peer
{
my (@results);
my ($testID,$avgROUGE,$avgROUGE_P,$avgROUGE_F);
@results=();
if($metric eq "N") {
&computeNGramScore(\@modelPaths,$peerPath,\@results,$NSIZE,$lengthLimit,$byteLimit,$inputFormat,$scoreMode,$alpha);
}
elsif($metric eq "L") {
&computeLCSScore(\@modelPaths,$peerPath,\@results,$lengthLimit,$byteLimit,$inputFormat,$scoreMode,$alpha);
}
elsif($metric eq "W") {
&computeWLCSScore(\@modelPaths,$peerPath,\@results,$lengthLimit,$byteLimit,$inputFormat,$weightFactor,$scoreMode,$alpha);
}
elsif($metric eq "S") {
&computeSkipBigramScore(\@modelPaths,$peerPath,\@results,$skipDistance,$lengthLimit,$byteLimit,$inputFormat,$scoreMode,$alpha);
}
elsif($metric eq "BE") {
&computeBEScore(\@modelPaths,$peerPath,\@results,$BEMode,$lengthLimit,$byteLimit,$inputFormat,$scoreMode,$alpha);
}
else {
die "Unknown ROUGE metric ID: $metric, has to be N, L, W, or S\n";
}
unless(defined($opt_t)) {
# sentence level average
$avgROUGE=sprintf("%7.5f",$results[2]);
$avgROUGE_P=sprintf("%7.5f",$results[4]);
$avgROUGE_F=sprintf("%7.5f",$results[5]);
}
else {
# corpus level per token average
$avgROUGE=$results[0]; # total model token count
$avgROUGE_P=$results[3]; # total peer token count
$avgROUGE_F=$results[1]; # total match count between model and peer, i.e. hit
}
# record ROUGE scores for the current test
$testID="$evalID\.$peerID";
if($debug) {
print "$testID\n";
}
unless(exists($testIDs{$testID})) {
$testIDs{$testID}=1;
}
unless(exists($ROUGEScores->{$testID})) {
$ROUGEScores->{$testID}=[];
push(@{$ROUGEScores->{$testID}},$avgROUGE); # average ; or model token count
push(@{$ROUGEScores->{$testID}},$avgROUGE_P); # average ; or peer token count
push(@{$ROUGEScores->{$testID}},$avgROUGE_F); # average ; or match token count (hit)
}
}
}
# 10/21/2004 add selection of scoring mode
# A: average over all models
# B: take only the best score
sub computeNGramScore {
my $modelPaths=shift;
my $peerPath=shift;
my $results=shift;
my $NSIZE=shift;
my $lengthLimit=shift;
my $byteLimit=shift;
my $inputFormat=shift;
my $scoreMode=shift;
my $alpha=shift;
my ($modelPath,$modelText,$peerText,$text,@tokens);
my (%model_grams,%peer_grams);
my ($gramHit,$gramScore,$gramScoreBest);
my ($totalGramHit,$totalGramCount);
my ($gramScoreP,$gramScoreF,$totalGramCountP);
#------------------------------------------------
# read model file and create model n-gram maps
$totalGramHit=0;
$totalGramCount=0;
$gramScoreBest=-1;
$gramScoreP=0; # precision
$gramScoreF=0; # f-measure
$totalGramCountP=0;
#------------------------------------------------
# read peer file and create model n-gram maps
%peer_grams=();
$peerText="";
&readText($peerPath,\$peerText,$inputFormat,$lengthLimit,$byteLimit);
&createNGram($peerText,\%peer_grams,$NSIZE);
if($debug) {
print "***P $peerPath\n";
if(defined($peerText)) {
print "$peerText\n";
print join("|",%peer_grams),"\n";
}
else {
print "---empty text---\n";
}
}
foreach $modelPath (@$modelPaths) {
%model_grams=();
$modelText="";
&readText($modelPath,\$modelText,$inputFormat,$lengthLimit,$byteLimit);
&createNGram($modelText,\%model_grams,$NSIZE);
if($debug) {
if(defined($modelText)) {
print "$modelText\n";
print join("|",%model_grams),"\n";
}
else {
print "---empty text---\n";
}
}
#------------------------------------------------
# compute ngram score
&ngramScore(\%model_grams,\%peer_grams,\$gramHit,\$gramScore);
# collect hit and count for each models
# This will effectively clip hit for each model; therefore would not give extra
# credit to reducdant information contained in the peer summary.
if($scoreMode eq "A") {
$totalGramHit+=$gramHit;
$totalGramCount+=$model_grams{"_cn_"};
$totalGramCountP+=$peer_grams{"_cn_"};
}
elsif($scoreMode eq "B") {
if($gramScore>$gramScoreBest) {
# only take a better score (i.e. better match)
$gramScoreBest=$gramScore;
$totalGramHit=$gramHit;
$totalGramCount=$model_grams{"_cn_"};
$totalGramCountP=$peer_grams{"_cn_"};
}
}
else {
# use average mode
$totalGramHit+=$gramHit;
$totalGramCount+=$model_grams{"_cn_"};
$totalGramCountP+=$peer_grams{"_cn_"};
}
if($debug) {
print "***M $modelPath\n";
}
}
# prepare score result for return
# unigram
push(@$results,$totalGramCount); # total number of ngrams in models
push(@$results,$totalGramHit);
if($totalGramCount!=0) {
$gramScore=sprintf("%7.5f",$totalGramHit/$totalGramCount);
}
else {
$gramScore=sprintf("%7.5f",0);
}
push(@$results,$gramScore);
push(@$results,$totalGramCountP); # total number of ngrams in peers
if($totalGramCountP!=0) {
$gramScoreP=sprintf("%7.5f",$totalGramHit/$totalGramCountP);
}
else {
$gramScoreP=sprintf("%7.5f",0);
}
push(@$results,$gramScoreP); # precision score
if((1-$alpha)*$gramScoreP+$alpha*$gramScore>0) {
$gramScoreF=sprintf("%7.5f",($gramScoreP*$gramScore)/((1-$alpha)*$gramScoreP+$alpha*$gramScore));
}
else {
$gramScoreF=sprintf("%7.5f",0);
}
push(@$results,$gramScoreF); # f1-measure score
if($debug) {
print "total $NSIZE-gram model count: $totalGramCount\n";
print "total $NSIZE-gram peer count: $totalGramCountP\n";
print "total $NSIZE-gram hit: $totalGramHit\n";
print "total ROUGE-$NSIZE\-R: $gramScore\n";
print "total ROUGE-$NSIZE\-P: $gramScoreP\n";
print "total ROUGE-$NSIZE\-F: $gramScoreF\n";
}
}
sub computeSkipBigramScore {
my $modelPaths=shift;
my $peerPath=shift;
my $results=shift;
my $skipDistance=shift;
my $lengthLimit=shift;
my $byteLimit=shift;
my $inputFormat=shift;
my $scoreMode=shift;
my $alpha=shift;
my ($modelPath,$modelText,$peerText,$text,@tokens);
my (%model_grams,%peer_grams);
my ($gramHit,$gramScore,$gramScoreBest);
my ($totalGramHitm,$totalGramCount);
my ($gramScoreP,$gramScoreF,$totalGramCountP);
#------------------------------------------------
# read model file and create model n-gram maps
$totalGramHit=0;
$totalGramCount=0;
$gramScoreBest=-1;
$gramScoreP=0; # precision
$gramScoreF=0; # f-measure
$totalGramCountP=0;
#------------------------------------------------
# read peer file and create model n-gram maps
%peer_grams=();
$peerText="";
&readText($peerPath,\$peerText,$inputFormat,$lengthLimit,$byteLimit);
&createSkipBigram($peerText,\%peer_grams,$skipDistance);
if($debug) {
print "***P $peerPath\n";
if(defined($peerText)) {
print "$peerText\n";
print join("|",%peer_grams),"\n";
}
else {
print "---empty text---\n";
}
}
foreach $modelPath (@$modelPaths) {
%model_grams=();
$modelText="";
&readText($modelPath,\$modelText,$inputFormat,$lengthLimit,$byteLimit);
if(defined($opt_M)) { # only apply stemming on models
$opt_m=1;
}
&createSkipBigram($modelText,\%model_grams,$skipDistance);
if(defined($opt_M)) { # only apply stemming on models
$opt_m=undef;
}
if($debug) {
if(defined($modelText)) {
print "$modelText\n";
print join("|",%model_grams),"\n";
}
else {
print "---empty text---\n";
}
}
#------------------------------------------------
# compute ngram score
&skipBigramScore(\%model_grams,\%peer_grams,\$gramHit,\$gramScore);
# collect hit and count for each models
# This will effectively clip hit for each model; therefore would not give extra
# credit to reducdant information contained in the peer summary.
if($scoreMode eq "A") {
$totalGramHit+=$gramHit;
$totalGramCount+=$model_grams{"_cn_"};
$totalGramCountP+=$peer_grams{"_cn_"};
}
elsif($scoreMode eq "B") {
if($gramScore>$gramScoreBest) {
# only take a better score (i.e. better match)
$gramScoreBest=$gramScore;
$totalGramHit=$gramHit;
$totalGramCount=$model_grams{"_cn_"};
$totalGramCountP=$peer_grams{"_cn_"};
}
}
else {
# use average mode
$totalGramHit+=$gramHit;
$totalGramCount+=$model_grams{"_cn_"};
$totalGramCountP+=$peer_grams{"_cn_"};
}
if($debug) {
print "***M $modelPath\n";
}
}
# prepare score result for return
# unigram
push(@$results,$totalGramCount); # total number of ngrams
push(@$results,$totalGramHit);
if($totalGramCount!=0) {
$gramScore=sprintf("%7.5f",$totalGramHit/$totalGramCount);
}
else {
$gramScore=sprintf("%7.5f",0);
}
push(@$results,$gramScore);
push(@$results,$totalGramCountP); # total number of ngrams in peers
if($totalGramCountP!=0) {
$gramScoreP=sprintf("%7.5f",$totalGramHit/$totalGramCountP);
}
else {
$gramScoreP=sprintf("%7.5f",0);
}
push(@$results,$gramScoreP); # precision score
if((1-$alpha)*$gramScoreP+$alpha*$gramScore>0) {
$gramScoreF=sprintf("%7.5f",($gramScoreP*$gramScore)/((1-$alpha)*$gramScoreP+$alpha*$gramScore));
}
else {
$gramScoreF=sprintf("%7.5f",0);
}
push(@$results,$gramScoreF); # f1-measure score
if($debug) {
print "total ROUGE-S$skipDistance model count: $totalGramCount\n";
print "total ROUGE-S$skipDistance peer count: $totalGramCountP\n";
print "total ROUGE-S$skipDistance hit: $totalGramHit\n";
print "total ROUGE-S$skipDistance\-R: $gramScore\n";
print "total ROUGE-S$skipDistance\-P: $gramScore\n";
print "total ROUGE-S$skipDistance\-F: $gramScore\n";
}
}
sub computeLCSScore {
my $modelPaths=shift;
my $peerPath=shift;
my $results=shift;
my $lengthLimit=shift;
my $byteLimit=shift;
my $inputFormat=shift;
my $scoreMode=shift;
my $alpha=shift;
my ($modelPath,@modelText,@peerText,$text,@tokens);
my (@modelTokens,@peerTokens);
my ($lcsHit,$lcsScore,$lcsBase,$lcsScoreBest);
my ($totalLCSHitm,$totalLCSCount);
my (%peer_1grams,%tmp_peer_1grams,%model_1grams,$peerText1,$modelText1);
my ($lcsScoreP,$lcsScoreF,$totalLCSCountP);
#------------------------------------------------
$totalLCSHit=0;
$totalLCSCount=0;
$lcsScoreBest=-1;
$lcsScoreP=0;
$lcsScoreF=0;
$totalLCSCountP=0;
#------------------------------------------------
# read peer file and create peer n-gram maps
@peerTokens=();
@peerText=();
&readText_LCS($peerPath,\@peerText,$inputFormat,$lengthLimit,$byteLimit);
&tokenizeText_LCS(\@peerText,\@peerTokens);
#------------------------------------------------
# create unigram for clipping
%peer_1grams=();
&readText($peerPath,\$peerText1,$inputFormat,$lengthLimit,$byteLimit);
&createNGram($peerText1,\%peer_1grams,1);
if($debug) {
my $i;
print "***P $peerPath\n";
print join("\n",@peerText),"\n";
for($i=0;$i<=$#peerText;$i++) {
print $i,": ",join("|",@{$peerTokens[$i]}),"\n";
}
}
foreach $modelPath (@$modelPaths) {
%tmp_peer_1grams=%peer_1grams; # renew peer unigram hash, so the peer count can be reset to the orignal number
@modelTokens=();
@modelText=();
&readText_LCS($modelPath,\@modelText,$inputFormat,$lengthLimit,$byteLimit);
if(defined($opt_M)) {
$opt_m=1;
&tokenizeText_LCS(\@modelText,\@modelTokens);
$opt_m=undef;
}
else {
&tokenizeText_LCS(\@modelText,\@modelTokens);
}
#------------------------------------------------
# create unigram for clipping
%model_1grams=();
&readText($modelPath,\$modelText1,$inputFormat,$lengthLimit,$byteLimit);
if(defined($opt_M)) { # only apply stemming on models
$opt_m=1;
}
&createNGram($modelText1,\%model_1grams,1);
if(defined($opt_M)) { # only apply stemming on models
$opt_m=undef;
}
#------------------------------------------------
# compute LCS score
&lcs(\@modelTokens,\@peerTokens,\$lcsHit,\$lcsScore,\$lcsBase,\%model_1grams,\%tmp_peer_1grams);
# collect hit and count for each models
# This will effectively clip hit for each model; therefore would not give extra
# credit to reductant information contained in the peer summary.
# Previous method that lumps model text together and inflates the peer summary
# the number of references time would reward redundant information
if($scoreMode eq "A") {
$totalLCSHit+=$lcsHit;
$totalLCSCount+=$lcsBase;
$totalLCSCountP+=$peer_1grams{"_cn_"};
}
elsif($scoreMode eq "B") {
if($lcsScore>$lcsScoreBest) {
# only take a better score (i.e. better match)
$lcsScoreBest=$lcsScore;
$totalLCSHit=$lcsHit;
$totalLCSCount=$lcsBase;
$totalLCSCountP=$peer_1grams{"_cn_"};
}
}
else {
# use average mode
$totalLCSHit+=$lcsHit;
$totalLCSCount+=$lcsBase;
$totalLCSCountP+=$peer_1grams{"_cn_"};
}
if($debug) {
my $i;
print "***M $modelPath\n";
print join("\n",@modelText),"\n";
for($i=0;$i<=$#modelText;$i++) {
print $i,": ",join("|",@{$modelTokens[$i]}),"\n";
}
}
}
# prepare score result for return
push(@$results,$totalLCSCount); # total number of ngrams
push(@$results,$totalLCSHit);
if($totalLCSCount!=0) {
$lcsScore=sprintf("%7.5f",$totalLCSHit/$totalLCSCount);
}
else {
$lcsScore=sprintf("%7.5f",0);
}
push(@$results,$lcsScore);
push(@$results,$totalLCSCountP); # total number of token in peers
if($totalLCSCountP!=0) {
$lcsScoreP=sprintf("%7.5f",$totalLCSHit/$totalLCSCountP);
}
else {
$lcsScoreP=sprintf("%7.5f",0);
}
push(@$results,$lcsScoreP);
if((1-$alpha)*$lcsScoreP+$alpha*$lcsScore>0) {
$lcsScoreF=sprintf("%7.5f",($lcsScoreP*$lcsScore)/((1-$alpha)*$lcsScoreP+$alpha*$lcsScore));
}
else {
$lcsScoreF=sprintf("%7.5f",0);
}
push(@$results,$lcsScoreF);
if($debug) {
print "total ROUGE-L model count: $totalLCSCount\n";
print "total ROUGE-L peer count: $totalLCSCountP\n";
print "total ROUGE-L hit: $totalLCSHit\n";
print "total ROUGE-L-R score: $lcsScore\n";
print "total ROUGE-L-P: $lcsScoreP\n";
print "total ROUGE-L-F: $lcsScoreF\n";
}
}
sub computeWLCSScore {
my $modelPaths=shift;
my $peerPath=shift;
my $results=shift;
my $lengthLimit=shift;
my $byteLimit=shift;
my $inputFormat=shift;
my $weightFactor=shift;
my $scoreMode=shift;
my $alpha=shift;
my ($modelPath,@modelText,@peerText,$text,@tokens);
my (@modelTokens,@peerTokens);
my ($lcsHit,$lcsScore,$lcsBase,$lcsScoreBest);
my ($totalLCSHitm,$totalLCSCount);
my (%peer_1grams,%tmp_peer_1grams,%model_1grams,$peerText1,$modelText1);
my ($lcsScoreP,$lcsScoreF,$totalLCSCountP);
#------------------------------------------------
# read model file and create model n-gram maps
$totalLCSHit=0;
$totalLCSCount=0;
$lcsScoreBest=-1;
$lcsScoreP=0;
$lcsScoreF=0;
$totalLCSCountP=0;
#------------------------------------------------
# read peer file and create model n-gram maps
@peerTokens=();
@peerText=();
&readText_LCS($peerPath,\@peerText,$inputFormat,$lengthLimit,$byteLimit);
&tokenizeText_LCS(\@peerText,\@peerTokens);
#------------------------------------------------
# create unigram for clipping
%peer_1grams=();
&readText($peerPath,\$peerText1,$inputFormat,$lengthLimit,$byteLimit);
&createNGram($peerText1,\%peer_1grams,1);
if($debug) {
my $i;
print "***P $peerPath\n";
print join("\n",@peerText),"\n";
for($i=0;$i<=$#peerText;$i++) {
print $i,": ",join("|",@{$peerTokens[$i]}),"\n";
}
}
foreach $modelPath (@$modelPaths) {
%tmp_peer_1grams=%peer_1grams; # renew peer unigram hash, so the peer count can be reset to the orignal number
@modelTokens=();
@modelText=();
&readText_LCS($modelPath,\@modelText,$inputFormat,$lengthLimit,$byteLimit);
&tokenizeText_LCS(\@modelText,\@modelTokens);
#------------------------------------------------
# create unigram for clipping
%model_1grams=();
&readText($modelPath,\$modelText1,$inputFormat,$lengthLimit,$byteLimit);
if(defined($opt_M)) { # only apply stemming on models
$opt_m=1;
}
&createNGram($modelText1,\%model_1grams,1);
if(defined($opt_M)) { # only apply stemming on models
$opt_m=undef;
}
#------------------------------------------------
# compute WLCS score
&wlcs(\@modelTokens,\@peerTokens,\$lcsHit,\$lcsScore,\$lcsBase,$weightFactor,\%model_1grams,\%tmp_peer_1grams);
# collect hit and count for each models
# This will effectively clip hit for each model; therefore would not give extra
# credit to reductant information contained in the peer summary.
# Previous method that lumps model text together and inflates the peer summary
# the number of references time would reward redundant information
if($scoreMode eq "A") {
$totalLCSHit+=$lcsHit;
$totalLCSCount+=&wlcsWeight($lcsBase,$weightFactor);
$totalLCSCountP+=&wlcsWeight($peer_1grams{"_cn_"},$weightFactor);
}
elsif($scoreMode eq "B") {
if($lcsScore>$lcsScoreBest) {
# only take a better score (i.e. better match)
$lcsScoreBest=$lcsScore;
$totalLCSHit=$lcsHit;
$totalLCSCount=&wlcsWeight($lcsBase,$weightFactor);
$totalLCSCountP=&wlcsWeight($peer_1grams{"_cn_"},$weightFactor);
}
}
else {
# use average mode
$totalLCSHit+=$lcsHit;
$totalLCSCount+=&wlcsWeight($lcsBase,$weightFactor);
$totalLCSCountP+=&wlcsWeight($peer_1grams{"_cn_"},$weightFactor);
}
if($debug) {
my $i;
print "***M $modelPath\n";
print join("\n",@modelText),"\n";
for($i=0;$i<=$#modelText;$i++) {
print $i,": ",join("|",@{$modelTokens[$i]}),"\n";
}
}
}
# prepare score result for return
push(@$results,$totalLCSCount); # total number of ngrams
push(@$results,$totalLCSHit);
if($totalLCSCount!=0) {
$lcsScore=sprintf("%7.5f",&wlcsWeightInverse($totalLCSHit/$totalLCSCount,$weightFactor));
}
else {
$lcsScore=sprintf("%7.5f",0);
}
push(@$results,$lcsScore);
push(@$results,$totalLCSCountP); # total number of token in peers
if($totalLCSCountP!=0) {
$lcsScoreP=sprintf("%7.5f",&wlcsWeightInverse($totalLCSHit/$totalLCSCountP,$weightFactor));
}
else {
$lcsScoreP=sprintf("%7.5f",0);
}
push(@$results,$lcsScoreP);
if((1-$alpha)*$lcsScoreP+$alpha*$lcsScore>0) {
$lcsScoreF=sprintf("%7.5f",($lcsScoreP*$lcsScore)/((1-$alpha)*$lcsScoreP+$alpha*$lcsScore));
}
else {
$lcsScoreF=sprintf("%7.5f",0);
}
push(@$results,$lcsScoreF);
if($debug) {
print "total ROUGE-W-$weightFactor model count: $totalLCSCount\n";
print "total ROUGE-W-$weightFactor peer count: $totalLCSCountP\n";
print "total ROUGE-W-$weightFactor hit: $totalLCSHit\n";
print "total ROUGE-W-$weightFactor-R score: $lcsScore\n";
print "total ROUGE-W-$weightFactor-P score: $lcsScoreP\n";
print "total ROUGE-W-$weightFactor-F score: $lcsScoreF\n";
}
}
sub computeBEScore {
my $modelPaths=shift;
my $peerPath=shift;
my $results=shift;
my $BEMode=shift;
my $lengthLimit=shift;
my $byteLimit=shift;
my $inputFormat=shift;
my $scoreMode=shift;
my $alpha=shift;
my ($modelPath,@modelBEList,@peerBEList,$text,@tokens);
my (%model_BEs,%peer_BEs);
my ($BEHit,$BEScore,$BEScoreBest);
my ($totalBEHit,$totalBECount);
my ($BEScoreP,$BEScoreF,$totalBECountP);
#------------------------------------------------
# read model file and create model BE maps
$totalBEHit=0;
$totalBECount=0;
$BEScoreBest=-1;
$BEScoreP=0; # precision
$BEScoreF=0; # f-measure
$totalBECountP=0;
#------------------------------------------------
# read peer file and create model n-BE maps
%peer_BEs=();
@peerBEList=();
&readBE($peerPath,\@peerBEList,$inputFormat);
&createBE(\@peerBEList,\%peer_BEs,$BEMode);
if($debug) {
print "***P $peerPath\n";
if(scalar @peerBEList > 0) {
# print join("\n",@peerBEList);
# print "\n";
print join("#",%peer_BEs),"\n";
}
else {
print "---empty text---\n";
}
}
foreach $modelPath (@$modelPaths) {
%model_BEs=();
@modelBEList=();
&readBE($modelPath,\@modelBEList,$inputFormat);
if(defined($opt_M)) { # only apply stemming on models
$opt_m=1;
}
&createBE(\@modelBEList,\%model_BEs,$BEMode);
if(defined($opt_M)) { # only apply stemming on models
$opt_m=undef;
}
if($debug) {
if(scalar @modelBEList > 0) {
# print join("\n",@modelBEList);
# print "\n";
print join("#",%model_BEs),"\n";
}
else {
print "---empty text---\n";
}
}
#------------------------------------------------
# compute BE score
&getBEScore(\%model_BEs,\%peer_BEs,\$BEHit,\$BEScore);
# collect hit and count for each models
# This will effectively clip hit for each model; therefore would not give extra
# credit to reducdant information contained in the peer summary.
if($scoreMode eq "A") {
$totalBEHit+=$BEHit;
$totalBECount+=$model_BEs{"_cn_"};
$totalBECountP+=$peer_BEs{"_cn_"};
}
elsif($scoreMode eq "B") {
if($BEScore>$BEScoreBest) {
# only take a better score (i.e. better match)
$BEScoreBest=$BEScore;
$totalBEHit=$BEHit;
$totalBECount=$model_BEs{"_cn_"};
$totalBECountP=$peer_BEs{"_cn_"};
}
}
else {
# use average mode
$totalBEHit+=$BEHit;
$totalBECount+=$model_BEs{"_cn_"};
$totalBECountP+=$peer_BEs{"_cn_"};
}
if($debug) {
print "***M $modelPath\n";
}
}
# prepare score result for return
# uniBE
push(@$results,$totalBECount); # total number of nbes in models
push(@$results,$totalBEHit);
if($totalBECount!=0) {
$BEScore=sprintf("%7.5f",$totalBEHit/$totalBECount);
}
else {
$BEScore=sprintf("%7.5f",0);
}
push(@$results,$BEScore);
push(@$results,$totalBECountP); # total number of nBEs in peers
if($totalBECountP!=0) {
$BEScoreP=sprintf("%7.5f",$totalBEHit/$totalBECountP);
}
else {
$BEScoreP=sprintf("%7.5f",0);
}
push(@$results,$BEScoreP); # precision score
if((1-$alpha)*$BEScoreP+$alpha*$BEScore>0) {
$BEScoreF=sprintf("%7.5f",($BEScoreP*$BEScore)/((1-$alpha)*$BEScoreP+$alpha*$BEScore));
}
else {
$BEScoreF=sprintf("%7.5f",0);
}
push(@$results,$BEScoreF); # f1-measure score
if($debug) {
print "total BE-$BEMode model count: $totalBECount\n";
print "total BE-$BEMode peer count: $totalBECountP\n";
print "total BE-$BEMode hit: $totalBEHit\n";
print "total ROUGE-BE-$BEMode\-R: $BEScore\n";
print "total ROUGE-BE-$BEMode\-P: $BEScoreP\n";
print "total ROUGE-BE-$BEMode\-F: $BEScoreF\n";
}
}
sub readTextOld {
my $inPath=shift;
my $tokenizedText=shift;
my $type=shift;
my $lengthLimit=shift;
my $byteLimit=shift;
my ($text,$bsize,$wsize,@words,$done);
$$tokenizedText=undef;
$bsize=0;
$wsize=0;
$done=0;
open(TEXT,$inPath)||die "Cannot open $inPath\n";
if($type=~/^SEE$/oi) {
while(defined($line=)) { # SEE abstract format
if($line=~/^\[([0-9]+)\]<\/a>\s+([^<]+)/o) {
$text=$3;
$text=~tr/A-Z/a-z/;
&checkSummarySize($tokenizedText,\$text,\$wsize,\$bsize,\$done,$lengthLimit,$byteLimit);
}
}
}
elsif($type=~/^ISI$/oi) { # ISI standard sentence by sentence format
while(defined($line=)) {
if($line=~/^([^<]+)<\/S>/o) {
$text=$1;
$text=~tr/A-Z/a-z/;
&checkSummarySize($tokenizedText,\$text,\$wsize,\$bsize,\$done,$lengthLimit,$byteLimit);
}
}
}
elsif($type=~/^SPL$/oi) { # SPL one Sentence Per Line format
while(defined($line=)) {
chomp($line);
$line=~s/^\s+//;
$line=~s/\s+$//;
if(defined($line)&&length($line)>0) {
$text=$line;
$text=~tr/A-Z/a-z/;
&checkSummarySize($tokenizedText,\$text,\$wsize,\$bsize,\$done,$lengthLimit,$byteLimit);
}
}
}
else {
close(TEXT);
die "Unknown input format: $type\n";
}
close(TEXT);
if(defined($$tokenizedText)) {
$$tokenizedText=~s/\-/ \- /g;
$$tokenizedText=~s/[^A-Za-z0-9\-]/ /g;
$$tokenizedText=~s/^\s+//;
$$tokenizedText=~s/\s+$//;
$$tokenizedText=~s/\s+/ /g;
}
else {
print STDERR "readText: $inPath -> empty text\n";
}
# print "($$tokenizedText)\n\n";
}
# enforce length cutoff at the file level
# convert different input format into SPL format then put them into
# tokenizedText
sub readText {
my $inPath=shift;
my $tokenizedText=shift;
my $type=shift;
my $lengthLimit=shift;
my $byteLimit=shift;
my ($text,$bsize,$wsize,@words,$done,@sntList);
$$tokenizedText=undef;
$bsize=0;
$wsize=0;
$done=0;
@sntList=();
open(TEXT,$inPath)||die "Cannot open $inPath\n";
if($type=~/^SEE$/oi) {
while(defined($line=)) { # SEE abstract format
if($line=~/^\[([0-9]+)\]<\/a>\s+([^<]+)/o||
$line=~/^\[([0-9]+)\]<\/a>\s+([^<]+)/o) {
$text=$2;
$text=~tr/A-Z/a-z/;
push(@sntList,$text);
}
}
}
elsif($type=~/^ISI$/oi) { # ISI standard sentence by sentence format
while(defined($line=)) {
if($line=~/^([^<]+)<\/S>/o) {
$text=$1;
$text=~tr/A-Z/a-z/;
push(@sntList,$text);
}
}
}
elsif($type=~/^SPL$/oi) { # SPL one Sentence Per Line format
while(defined($line=)) {
chomp($line);
if(defined($line)&&length($line)>0) {
$text=$line;
$text=~tr/A-Z/a-z/;
push(@sntList,$text);
}
}
}
else {
close(TEXT);
die "Unknown input format: $type\n";
}
close(TEXT);
if($lengthLimit==0&&$byteLimit==0) {
$$tokenizedText=join(" ",@sntList);
}
elsif($lengthLimit!=0) {
my ($tmpText);
$tmpText="";
$tmpTextLen=0;
foreach $s (@sntList) {
my ($sLen,@tokens);
@tokens=split(/\s+/,$s);
$sLen=scalar @tokens;
if($tmpTextLen+$sLen<$lengthLimit) {
if($tmpTextLen!=0) {
$tmpText.=" $s";
}
else {
$tmpText.="$s";
}
$tmpTextLen+=$sLen;
}
else {
if($tmpTextLen>0) {
$tmpText.=" ";
}
$tmpText.=join(" ",@tokens[0..$lengthLimit-$tmpTextLen-1]);
last;
}
}
if(length($tmpText)>0) {
$$tokenizedText=$tmpText;
}
}
elsif($byteLimit!=0) {
my ($tmpText);
$tmpText="";
$tmpTextLen=0;
foreach $s (@sntList) {
my ($sLen);
$sLen=length($s);
if($tmpTextLen+$sLen<$byteLimit) {
if($tmpTextLen!=0) {
$tmpText.=" $s";
}
else {
$tmpText.="$s";
}
$tmpTextLen+=$sLen;
}
else {
if($tmpTextLen>0) {
$tmpText.=" ";
}
$tmpText.=substr($s,0,$byteLimit-$tmpTextLen);
last;
}
}
if(length($tmpText)>0) {
$$tokenizedText=$tmpText;
}
}
if(defined($$tokenizedText)) {
$$tokenizedText=~s/\-/ \- /g;
$$tokenizedText=~s/[^A-Za-z0-9\-]/ /g;
$$tokenizedText=~s/^\s+//;
$$tokenizedText=~s/\s+$//;
$$tokenizedText=~s/\s+/ /g;
}
else {
print STDERR "readText: $inPath -> empty text\n";
}
# print "($$tokenizedText)\n\n";
}
sub readBE {
my $inPath=shift;
my $BEList=shift;
my $type=shift;
my ($line);
open(TEXT,$inPath)||die "Cannot open $inPath\n";
if(defined($opt_v)) {
print STDERR "$inPath\n";
}
if($type=~/^SIMPLE$/oi) {
while(defined($line=)) { # Simple BE triple format
chomp($line);
push(@{$BEList},$line);
}
}
elsif($type=~/^ISI$/oi) { # ISI standard BE format
while(defined($line=)) {
# place holder
}
}
else {
close(TEXT);
die "Unknown input format: $type\n";
}
close(TEXT);
if(scalar @{$BEList} ==0) {
print STDERR "readBE: $inPath -> empty text\n";
}
}
sub checkSummarySize {
my $tokenizedText=shift;
my $text=shift;
my $wsize=shift;
my $bsize=shift;
my $done=shift;
my $lenghtLimit=shift;
my $byteLimit=shift;
my (@words);
@words=split(/\s+/,$$text);
if(($lengthLimit==0&&$byteLimit==0)||
($lengthLimit!=0&&(scalar @words)+$$wsize<=$lengthLimit)||
($byteLimit!=0&&length($$text)+$$bsize<=$byteLimit)) {
if(defined($$tokenizedText)) {
$$tokenizedText.=" $$text";
}
else {
$$tokenizedText=$$text;
}
$$bsize+=length($$text);
$$wsize+=(scalar @words);
}
elsif($lengthLimit!=0&&(scalar @words)+$$wsize>$lengthLimit) {
if($$done==0) {
if(defined($$tokenizedText)) {
$$tokenizedText.=" ";
$$tokenizedText.=join(" ",@words[0..$lengthLimit-$$wsize-1]);
}
else {
$$tokenizedText=join(" ",@words[0..$lengthLimit-$$wsize-1]);
}
$$done=1;
}
}
elsif($byteLimit!=0&&length($$text)+$$bsize>$byteLimit) {
if($$done==0) {
if(defined($$tokenizedText)) {
$$tokenizedText.=" ";
$$tokenizedText.=substr($$text,0,$byteLimit-$$bsize);
}
else {
$$tokenizedText=substr($$text,0,$byteLimit-$$bsize);
}
$$done=1;
}
}
}
# LCS computing is based on unit and cannot lump all the text together
# as in computing ngram co-occurrences
sub readText_LCS {
my $inPath=shift;
my $tokenizedText=shift;
my $type=shift;
my $lengthLimit=shift;
my $byteLimit=shift;
my ($text,$t,$bsize,$wsize,$done,@sntList);
@{$tokenizedText}=();
$bsize=0;
$wsize=0;
$done=0;
@sntList=();
open(TEXT,$inPath)||die "Cannot open $inPath\n";
if($type=~/^SEE$/oi) {
while(defined($line=)) { # SEE abstract format
if($line=~/^\[([0-9]+)\]<\/a>\s+([^<]+)/o||
$line=~/^\[([0-9]+)\]<\/a>\s+([^<]+)/o) {
$text=$2;
$text=~tr/A-Z/a-z/;
push(@sntList,$text);
}
}
}
elsif($type=~/^ISI$/oi) { # ISI standard sentence by sentence format
while(defined($line=)) {
if($line=~/^([^<]+)<\/S>/o) {
$text=$1;
$text=~tr/A-Z/a-z/;
push(@sntList,$text);
}
}
}
elsif($type=~/^SPL$/oi) { # SPL one Sentence Per Line format
while(defined($line=)) {
chomp($line);
if(defined($line)&&length($line)>0) {
$text=$line;
$text=~tr/A-Z/a-z/;
push(@sntList,$text);
}
}
}
else {
close(TEXT);
die "Unknown input format: $type\n";
}
close(TEXT);
if($lengthLimit==0&&$byteLimit==0) {
@{$tokenizedText}=@sntList;
}
elsif($lengthLimit!=0) {
my ($tmpText);
$tmpText="";
$tmpTextLen=0;
foreach $s (@sntList) {
my ($sLen,@tokens);
@tokens=split(/\s+/,$s);
$sLen=scalar @tokens;
if($tmpTextLen+$sLen<$lengthLimit) {
$tmpTextLen+=$sLen;
push(@{$tokenizedText},$s);
}
else {
push(@{$tokenizedText},join(" ",@tokens[0..$lengthLimit-$tmpTextLen-1]));
last;
}
}
}
elsif($byteLimit!=0) {
my ($tmpText);
$tmpText="";
$tmpTextLen=0;
foreach $s (@sntList) {
my ($sLen);
$sLen=length($s);
if($tmpTextLen+$sLen<$byteLimit) {
push(@{$tokenizedText},$s);
}
else {
push(@{$tokenizedText},substr($s,0,$byteLimit-$tmpTextLen));
last;
}
}
}
if(defined(@{$tokenizedText}>0)) {
for($t=0;$t<@{$tokenizedText};$t++) {
$tokenizedText->[$t]=~s/\-/ \- /g;
$tokenizedText->[$t]=~s/[^A-Za-z0-9\-]/ /g;
$tokenizedText->[$t]=~s/^\s+//;
$tokenizedText->[$t]=~s/\s+$//;
$tokenizedText->[$t]=~s/\s+/ /g;
}
}
else {
print STDERR "readText_LCS: $inPath -> empty text\n";
}
}
# LCS computing is based on unit and cannot lump all the text together
# as in computing ngram co-occurrences
sub readText_LCS_old {
my $inPath=shift;
my $tokenizedText=shift;
my $type=shift;
my $lengthLimit=shift;
my $byteLimit=shift;
my ($text,$t,$bsize,$wsize,$done);
@{$tokenizedText}=();
$bsize=0;
$wsize=0;
$done=0;
open(TEXT,$inPath)||die "Cannot open $inPath\n";
if($type=~/^SEE$/oi) {
while(defined($line=)) { # SEE abstract format
if($line=~/^\[([0-9]+)\]<\/a>\s+([^<]+)/o) {
$text=$3;
$text=~tr/A-Z/a-z/;
&checkSummarySize_LCS($tokenizedText,\$text,\$wsize,\$bsize,\$done,$lengthLimit,$byteLimit);
}
}
}
elsif($type=~/^ISI$/oi) { # ISI standard sentence by sentence format
while(defined($line=)) {
if($line=~/^([^<]+)<\/S>/o) {
$text=$1;
$text=~tr/A-Z/a-z/;
&checkSummarySize_LCS($tokenizedText,\$text,\$wsize,\$bsize,\$done,$lengthLimit,$byteLimit);
}
}
}
elsif($type=~/^SPL$/oi) { # SPL one Sentence Per Line format
while(defined($line=)) {
chomp($line);
$line=~s/^\s+//;
$line=~s/\s+$//;
if(defined($line)&&length($line)>0) {
$text=$line;
$text=~tr/A-Z/a-z/;
&checkSummarySize_LCS($tokenizedText,\$text,\$wsize,\$bsize,\$done,$lengthLimit,$byteLimit);
}
}
}
else {
close(TEXT);
die "Unknown input format: $type\n";
}
close(TEXT);
if(defined(@{$tokenizedText}>0)) {
for($t=0;$t<@{$tokenizedText};$t++) {
$tokenizedText->[$t]=~s/\-/ \- /g;
$tokenizedText->[$t]=~s/[^A-Za-z0-9\-]/ /g;
$tokenizedText->[$t]=~s/^\s+//;
$tokenizedText->[$t]=~s/\s+$//;
$tokenizedText->[$t]=~s/\s+/ /g;
}
}
else {
print STDERR "readText_LCS: $inPath -> empty text\n";
}
}
sub checkSummarySize_LCS {
my $tokenizedText=shift;
my $text=shift;
my $wsize=shift;
my $bsize=shift;
my $done=shift;
my $lenghtLimit=shift;
my $byteLimit=shift;
my (@words);
@words=split(/\s+/,$$text);
if(($lengthLimit==0&&$byteLimit==0)||
($lengthLimit!=0&&(scalar @words)+$$wsize<=$lengthLimit)||
($byteLimit!=0&&length($$text)+$$bsize<=$byteLimit)) {
push(@{$tokenizedText},$$text);
$$bsize+=length($$text);
$$wsize+=(scalar @words);
}
elsif($lengthLimit!=0&&(scalar @words)+$$wsize>$lengthLimit) {
if($$done==0) {
push(@{$tokenizedText},$$text);
$$done=1;
}
}
elsif($byteLimit!=0&&length($$text)+$$bsize>$byteLimit) {
if($$done==0) {
push(@{$tokenizedText},$$text);
$$done=1;
}
}
}
sub ngramScore {
my $model_grams=shift;
my $peer_grams=shift;
my $hit=shift;
my $score=shift;
my ($s,$t,@tokens);
$$hit=0;
@tokens=keys (%$model_grams);
foreach $t (@tokens) {
if($t ne "_cn_") {
my $h;
$h=0;
if(exists($peer_grams->{$t})) {
$h=$peer_grams->{$t}<=$model_grams->{$t}?
$peer_grams->{$t}:$model_grams->{$t}; # clip
$$hit+=$h;
}
}
}
if($model_grams->{"_cn_"}!=0) {
$$score=sprintf("%07.5f",$$hit/$model_grams->{"_cn_"});
}
else {
# no instance of n-gram at this length
$$score=0;
# die "model n-grams has zero instance\n";
}
}
sub skipBigramScore {
my $model_grams=shift;
my $peer_grams=shift;
my $hit=shift;
my $score=shift;
my ($s,$t,@tokens);
$$hit=0;
@tokens=keys (%$model_grams);
foreach $t (@tokens) {
if($t ne "_cn_") {
my $h;
$h=0;
if(exists($peer_grams->{$t})) {
$h=$peer_grams->{$t}<=$model_grams->{$t}?
$peer_grams->{$t}:$model_grams->{$t}; # clip
$$hit+=$h;
}
}
}
if($model_grams->{"_cn_"}!=0) {
$$score=sprintf("%07.5f",$$hit/$model_grams->{"_cn_"});
}
else {
# no instance of n-gram at this length
$$score=0;
# die "model n-grams has zero instance\n";
}
}
sub lcs {
my $model=shift;
my $peer=shift;
my $hit=shift;
my $score=shift;
my $base=shift;
my $model_1grams=shift;
my $peer_1grams=shift;
my ($i,$j,@hitMask,@LCS);
$$hit=0;
$$base=0;
# compute LCS length for each model/peer pair
for($i=0;$i<@{$model};$i++) {
# use @hitMask to make sure multiple peer hit won't be counted as multiple hits
@hitMask=();
for($j=0;$j<@{$model->[$i]};$j++) {
push(@hitMask,0); # initialize hit mask
}
$$base+=scalar @{$model->[$i]}; # add model length
for($j=0;$j<@{$peer};$j++) {
&lcs_inner($model->[$i],$peer->[$j],\@hitMask);
}
@LCS=();
for($j=0;$j<@{$model->[$i]};$j++) {
if($hitMask[$j]==1) {
if(exists($model_1grams->{$model->[$i][$j]})&&
exists($peer_1grams->{$model->[$i][$j]})&&
$model_1grams->{$model->[$i][$j]}>0&&
$peer_1grams->{$model->[$i][$j]}>0) {
$$hit++;
#---------------------------------------------
# bookkeeping to clip over counting
# everytime a hit is found it is deducted
# from both model and peer unigram count
# if a unigram count already involve in
# one LCS match then it will not be counted
# if it match another token in the model
# unit. This will make sure LCS score
# is always lower than unigram score
$model_1grams->{$model->[$i][$j]}--;
$peer_1grams->{$model->[$i][$j]}--;
push(@LCS,$model->[$i][$j]);
}
}
}
if($debug) {
print "LCS: ";
if(@LCS) {
print join(" ",@LCS),"\n";
}
else {
print "-\n";
}
}
}
if($$base>0) {
$$score=$$hit/$$base;
}
else {
$$score=0;
}
}
sub lcs_inner {
my $model=shift;
my $peer=shift;
my $hitMask=shift;
my $m=scalar @$model; # length of model
my $n=scalar @$peer; # length of peer
my ($i,$j);
my (@c,@b);
if(@{$model}==0) {
return;
}
@c=();
@b=();
# initialize boundary condition and
# the DP array
for($i=0;$i<=$m;$i++) {
push(@c,[]);
push(@b,[]);
for($j=0;$j<=$n;$j++) {
push(@{$c[$i]},0);
push(@{$b[$i]},0);
}
}
for($i=1;$i<=$m;$i++) {
for($j=1;$j<=$n;$j++) {
if($model->[$i-1] eq $peer->[$j-1]) {
# recursively solve the i-1 subproblem
$c[$i][$j]=$c[$i-1][$j-1]+1;
$b[$i][$j]="\\"; # go diagonal
}
elsif($c[$i-1][$j]>=$c[$i][$j-1]) {
$c[$i][$j]=$c[$i-1][$j];
$b[$i][$j]="^"; # go up
}
else {
$c[$i][$j]=$c[$i][$j-1];
$b[$i][$j]="<"; # go left
}
}
}
&markLCS($hitMask,\@b,$m,$n);
}
sub wlcs {
my $model=shift;
my $peer=shift;
my $hit=shift;
my $score=shift;
my $base=shift;
my $weightFactor=shift;
my $model_1grams=shift;
my $peer_1grams=shift;
my ($i,$j,@hitMask,@LCS,$hitLen);
$$hit=0;
$$base=0;
# compute LCS length for each model/peer pair
for($i=0;$i<@{$model};$i++) {
# use @hitMask to make sure multiple peer hit won't be counted as multiple hits
@hitMask=();
for($j=0;$j<@{$model->[$i]};$j++) {
push(@hitMask,0); # initialize hit mask
}
$$base+=&wlcsWeight(scalar @{$model->[$i]},$weightFactor); # add model length
for($j=0;$j<@{$peer};$j++) {
&wlcs_inner($model->[$i],$peer->[$j],\@hitMask,$weightFactor);
}
@LCS=();
$hitLen=0;
for($j=0;$j<@{$model->[$i]};$j++) {
if($hitMask[$j]==1) {
if(exists($model_1grams->{$model->[$i][$j]})&&
exists($peer_1grams->{$model->[$i][$j]})&&
$model_1grams->{$model->[$i][$j]}>0&&
$peer_1grams->{$model->[$i][$j]}>0) {
$hitLen++;
if($j+1<@{$model->[$i]}&&$hitMask[$j+1]==0) {
$$hit+=&wlcsWeight($hitLen,$weightFactor);
$hitLen=0; # reset hit length
}
elsif($j+1==@{$model->[$i]}) {
# end of sentence
$$hit+=&wlcsWeight($hitLen,$weightFactor);
$hitLen=0; # reset hit length
}
#---------------------------------------------
# bookkeeping to clip over counting
# everytime a hit is found it is deducted
# from both model and peer unigram count
# if a unigram count already involve in
# one LCS match then it will not be counted
# if it match another token in the model
# unit. This will make sure LCS score
# is always lower than unigram score
$model_1grams->{$model->[$i][$j]}--;
$peer_1grams->{$model->[$i][$j]}--;
push(@LCS,$model->[$i][$j]);
}
}
}
if($debug) {
print "ROUGE-W: ";
if(@LCS) {
print join(" ",@LCS),"\n";
}
else {
print "-\n";
}
}
}
$$score=wlcsWeightInverse($$hit/$$base,$weightFactor);
}
sub wlcsWeight {
my $r=shift;
my $power=shift;
return $r**$power;
}
sub wlcsWeightInverse {
my $r=shift;
my $power=shift;
return $r**(1/$power);
}
sub wlcs_inner {
my $model=shift;
my $peer=shift;
my $hitMask=shift;
my $weightFactor=shift;
my $m=scalar @$model; # length of model
my $n=scalar @$peer; # length of peer
my ($i,$j);
my (@c,@b,@l);
if(@{$model}==0) {
return;
}
@c=();
@b=();
@l=(); # the length of consecutive matches so far
# initialize boundary condition and
# the DP array
for($i=0;$i<=$m;$i++) {
push(@c,[]);
push(@b,[]);
push(@l,[]);
for($j=0;$j<=$n;$j++) {
push(@{$c[$i]},0);
push(@{$b[$i]},0);
push(@{$l[$i]},0);
}
}
for($i=1;$i<=$m;$i++) {
for($j=1;$j<=$n;$j++) {
if($model->[$i-1] eq $peer->[$j-1]) {
# recursively solve the i-1 subproblem
$k=$l[$i-1][$j-1];
$c[$i][$j]=$c[$i-1][$j-1]+&wlcsWeight($k+1,$weightFactor)-&wlcsWeight($k,$weightFactor);
$b[$i][$j]="\\"; # go diagonal
$l[$i][$j]=$k+1; # extend the consecutive matching sequence
}
elsif($c[$i-1][$j]>=$c[$i][$j-1]) {
$c[$i][$j]=$c[$i-1][$j];
$b[$i][$j]="^"; # go up
$l[$i][$j]=0; # no match at this position
}
else {
$c[$i][$j]=$c[$i][$j-1];
$b[$i][$j]="<"; # go left
$l[$i][$j]=0; # no match at this position
}
}
}
&markLCS($hitMask,\@b,$m,$n);
}
sub markLCS {
my $hitMask=shift;
my $b=shift;
my $i=shift;
my $j=shift;
while($i!=0&&$j!=0) {
if($b->[$i][$j] eq "\\") {
$i--;
$j--;
$hitMask->[$i]=1; # mark current model position as a hit
}
elsif($b->[$i][$j] eq "^") {
$i--;
}
elsif($b->[$i][$j] eq "<") {
$j--;
}
else {
die "Illegal move in markLCS: ($i,$j): \"$b->[$i][$j]\".\n";
}
}
}
# currently only support simple lexical matching
sub getBEScore {
my $modelBEs=shift;
my $peerBEs=shift;
my $hit=shift;
my $score=shift;
my ($s,$t,@tokens);
$$hit=0;
@tokens=keys (%$modelBEs);
foreach $t (@tokens) {
if($t ne "_cn_") {
my $h;
$h=0;
if(exists($peerBEs->{$t})) {
$h=$peerBEs->{$t}<=$modelBEs->{$t}?
$peerBEs->{$t}:$modelBEs->{$t}; # clip
$$hit+=$h;
if(defined($opt_v)) {
print "* Match: $t\n";
}
}
}
}
if($modelBEs->{"_cn_"}!=0) {
$$score=sprintf("%07.5f",$$hit/$modelBEs->{"_cn_"});
}
else {
# no instance of BE at this length
$$score=0;
# die "model BE has zero instance\n";
}
}
sub MorphStem {
my $token=shift;
my ($os,$ltoken);
if(!defined($token)||length($token)==0) {
return undef;
}
$ltoken=$token;
$ltoken=~tr/A-Z/a-z/;
if(exists($exceptiondb{$ltoken})) {
return $exceptiondb{$ltoken};
}
$os=$ltoken;
return stem($os);
}
sub createNGram {
my $text=shift;
my $g=shift;
my $NSIZE=shift;
my @mx_tokens=();
my @m_tokens=();
my ($i,$j);
my ($gram);
my ($count);
my ($byteSize);
# remove stopwords
if($useStopwords) {
%stopwords=(); # consider stop words
}
unless(defined($text)) {
$g->{"_cn_"}=0;
return;
}
@mx_tokens=split(/\s+/,$text);
$byteSize=0;
for($i=0;$i<=$#mx_tokens;$i++) {
unless(exists($stopwords{$mx_tokens[$i]})) {
$byteSize+=length($mx_tokens[$i])+1; # the length of words in bytes so far + 1 space
if($mx_tokens[$i]=~/^[a-z0-9\$]/o) {
if(defined($opt_m)) {
# use stemmer
# only consider words starting with these characters
# use Porter stemmer
my $stem;
$stem=$mx_tokens[$i];
if(length($stem)>3) {
push(@m_tokens,&MorphStem($stem));
}
else { # no stemmer as default
push(@m_tokens,$mx_tokens[$i]);
}
}
else { # no stemmer
push(@m_tokens,$mx_tokens[$i]);
}
}
}
}
#-------------------------------------
# create ngram
$count=0;
for($i=0;$i<=$#m_tokens-$NSIZE+1;$i++) {
$gram=$m_tokens[$i];
for($j=$i+1;$j<=$i+$NSIZE-1;$j++) {
$gram.=" $m_tokens[$j]";
}
$count++;
unless(exists($g->{$gram})) {
$g->{$gram}=1;
}
else {
$g->{$gram}++;
}
}
# save total number of tokens
$g->{"_cn_"}=$count;
}
sub createSkipBigram {
my $text=shift;
my $g=shift;
my $skipDistance=shift;
my @mx_tokens=();
my @m_tokens=();
my ($i,$j);
my ($gram);
my ($count);
my ($byteSize);
# remove stopwords
if($useStopwords) {
%stopwords=(); # consider stop words
}
unless(defined($text)) {
$g->{"_cn_"}=0;
return;
}
@mx_tokens=split(/\s+/,$text);
$byteSize=0;
for($i=0;$i<=$#mx_tokens;$i++) {
unless(exists($stopwords{$mx_tokens[$i]})) {
$byteSize+=length($mx_tokens[$i])+1; # the length of words in bytes so far + 1 space
if($mx_tokens[$i]=~/^[a-z0-9\$]/o) {
if(defined($opt_m)) {
# use stemmer
# only consider words starting with these characters
# use Porter stemmer
my $stem;
$stem=$mx_tokens[$i];
if(length($stem)>3) {
push(@m_tokens,&MorphStem($stem));
}
else { # no stemmer as default
push(@m_tokens,$mx_tokens[$i]);
}
}
else { # no stemmer
push(@m_tokens,$mx_tokens[$i]);
}
}
}
}
#-------------------------------------
# create ngram
$count=0;
for($i=0;$i<$#m_tokens;$i++) {
if(defined($opt_u)) {
# add unigram count
$gram=$m_tokens[$i];
$count++;
unless(exists($g->{$gram})) {
$g->{$gram}=1;
}
else {
$g->{$gram}++;
}
}
for($j=$i+1;
$j<=$#m_tokens&&($skipDistance<0||$j<=$i+$skipDistance+1);
$j++) {
$gram=$m_tokens[$i];
$gram.=" $m_tokens[$j]";
$count++;
unless(exists($g->{$gram})) {
$g->{$gram}=1;
}
else {
$g->{$gram}++;
}
}
}
# save total number of tokens
$g->{"_cn_"}=$count;
}
sub createBE {
my $BEList=shift;
my $BEMap=shift;
my $BEMode=shift;
my ($i);
$BEMap->{"_cn_"}=0;
unless(scalar @{$BEList} > 0) {
return;
}
for($i=0;$i<=$#{$BEList};$i++) {
my (@fds);
my ($be,$stemH,$stemM);
$be=$BEList->[$i];
$be=~tr/A-Z/a-z/;
@fds=split(/\|/,$be);
if(@fds!=3) {
print STDERR "Basic Element (BE) input file is invalid: *$be*\n";
print STDERR "A BE file has to be in this format per line: HEAD|MODIFIER|RELATION\n";
die "For more infomation about BE, go to: http://www.isi.edu/~cyl/BE\n";
}
$stemH=$fds[0];
$stemM=$fds[1];
if(defined($opt_m)) {
# use stemmer
# only consider words starting with these characters
# use Porter stemmer
if(length($stemH)>3) {
$stemH=&MorphStemMulti($stemH);
}
if($stemM ne "NIL"&&
length($stemM)>3) {
$stemM=&MorphStemMulti($stemM);
}
}
if($BEMode eq "H"&&
$stemM eq "nil") {
unless(exists($BEMap->{$stemH})) {
$BEMap->{$stemH}=0;
}
$BEMap->{$stemH}++;
$BEMap->{"_cn_"}++;
}
elsif($BEMode eq "HM"&&
$stemM ne "nil") {
my $pair="$stemH|$stemM";
unless(exists($BEMap->{$pair})) {
$BEMap->{$pair}=0;
}
$BEMap->{$pair}++;
$BEMap->{"_cn_"}++;
}
elsif($BEMode eq "HMR"&&
$fds[2] ne "nil") {
my $triple="$stemH|$stemM|$fds[2]";
unless(exists($BEMap->{$triple})) {
$BEMap->{$triple}=0;
}
$BEMap->{$triple}++;
$BEMap->{"_cn_"}++;
}
elsif($BEMode eq "HM1") {
my $pair="$stemH|$stemM";
unless(exists($BEMap->{$pair})) {
$BEMap->{$pair}=0;
}
$BEMap->{$pair}++;
$BEMap->{"_cn_"}++;
}
elsif($BEMode eq "HMR1"&&
$fds[1] ne "nil") {
# relation can be "NIL" but modifier has to have value
my $triple="$stemH|$stemM|$fds[2]";
unless(exists($BEMap->{$triple})) {
$BEMap->{$triple}=0;
}
$BEMap->{$triple}++;
$BEMap->{"_cn_"}++;
}
elsif($BEMode eq "HMR2") {
# modifier and relation can be "NIL"
my $triple="$stemH|$stemM|$fds[2]";
unless(exists($BEMap->{$triple})) {
$BEMap->{$triple}=0;
}
$BEMap->{$triple}++;
$BEMap->{"_cn_"}++;
}
}
}
sub MorphStemMulti {
my $string=shift;
my (@tokens,@stems,$t,$i);
@tokens=split(/\s+/,$string);
foreach $t (@tokens) {
if($t=~/[A-Za-z0-9]/o&&
$t!~/(-LRB-|-RRB-|-LSB-|-RSB-|-LCB-|-RCB-)/o) {
my $s;
if(defined($s=&MorphStem($t))) {
$t=$s;
}
push(@stems,$t);
}
else {
push(@stems,$t);
}
}
return join(" ",@stems);
}
sub tokenizeText {
my $text=shift;
my $tokenizedText=shift;
my @mx_tokens=();
my ($i,$byteSize);
# remove stopwords
if($useStopwords) {
%stopwords=(); # consider stop words
}
unless(defined($text)) {
return;
}
@mx_tokens=split(/\s+/,$text);
$byteSize=0;
@{$tokenizedText}=();
for($i=0;$i<=$#mx_tokens;$i++) {
unless(exists($stopwords{$mx_tokens[$i]})) {
$byteSize+=length($mx_tokens[$i])+1; # the length of words in bytes so far + 1 space
if($mx_tokens[$i]=~/^[a-z0-9\$]/o) {
if(defined($opt_m)) {
# use stemmer
# only consider words starting with these characters
# use Porter stemmer
my $stem;
$stem=$mx_tokens[$i];
if(length($stem)>3) {
push(@{$tokenizedText},&MorphStem($stem));
}
else { # no stemmer as default
push(@{$tokenizedText},$mx_tokens[$i]);
}
}
else { # no stemmer
push(@{$tokenizedText},$mx_tokens[$i]);
}
}
}
}
}
sub tokenizeText_LCS {
my $text=shift;
my $tokenizedText=shift;
my $lengthLimit=shift;
my $byteLimit=shift;
my @mx_tokens=();
my ($i,$byteSize,$t,$done);
# remove stopwords
if($useStopwords) {
%stopwords=(); # consider stop words
}
if(@{$text}==0) {
return;
}
$byteSize=0;
@{$tokenizedText}=();
$done=0;
for($t=0;$t<@{$text}&&$done==0;$t++) {
@mx_tokens=split(/\s+/,$text->[$t]);
# tokenized array for each separate unit (for example, sentence)
push(@{$tokenizedText},[]);
for($i=0;$i<=$#mx_tokens;$i++) {
unless(exists($stopwords{$mx_tokens[$i]})) {
$byteSize+=length($mx_tokens[$i])+1; # the length of words in bytes so far + 1 space
if($mx_tokens[$i]=~/^[a-z0-9\$]/o) {
if(defined($opt_m)) {
# use stemmer
# only consider words starting with these characters
# use Porter stemmer
my $stem;
$stem=$mx_tokens[$i];
if(length($stem)>3) {
push(@{$tokenizedText->[$t]},&MorphStem($stem));
}
else { # no stemmer as default
push(@{$tokenizedText->[$t]},$mx_tokens[$i]);
}
}
else { # no stemmer
push(@{$tokenizedText->[$t]},$mx_tokens[$i]);
}
}
}
}
}
}
# Input file configuration is a list of peer/model pair for each evaluation
# instance. Each evaluation pair is in a line separated by white spaces
# characters.
sub readFileList {
my ($ROUGEEvals)=shift;
my ($ROUGEEvalIDs)=shift;
my ($ROUGEPeerIDTable)=shift;
my ($doc)=shift;
my ($evalID,$pair);
my ($inputFormat,$peerFile,$modelFile,$peerID,$modelID);
my (@files);
$evalID=1; # automatically generated evaluation ID starting from 1
$peerID=$systemID;
$modelID="M";
unless(exists($ROUGEPeerIDTable->{$peerID})) {
$ROUGEPeerIDTable->{$peerID}=1;
}
while(defined($pair=<$doc>)) {
my ($peerPath,$modelPath);
if($pair!~/^\#/o&&
$pair!~/^\s*$/o) { # Lines start with '#' is a comment line
chomp($pair);
$pair=~s/^\s+//;
$pair=~s/\s+$//;
@files=split(/\s+/,$pair);
if(scalar @files < 2) {
die "File list has to have at least 2 filenames per line (peer model1 model2 ... modelN)\n";
}
$peerFile=$files[0];
unless(exists($ROUGEEvals->{$evalID})) {
$ROUGEEvals->{$evalID}={};
push(@{$ROUGEEvalIDs},$evalID);
$ROUGEEvals->{$evalID}{"IF"}=$opt_z;
}
unless(exists($ROUGEPeerIDTable->{$peerID})) {
$ROUGEPeerIDTable->{$peerID}=1; # save peer ID for reference
}
if(exists($ROUGEEvals->{$evalID})) {
unless(exists($ROUGEEvals->{$evalID}{"Ps"})) {
$ROUGEEvals->{$evalID}{"Ps"}={};
$ROUGEEvals->{$evalID}{"PIDList"}=[];
}
push(@{$ROUGEEvals->{$evalID}{"PIDList"}},$peerID); # save peer IDs
}
else {
die "(PEERS) Evaluation database does not contain entry for this evaluation ID: $evalID\n";
}
# remove leading and trailing newlines and
# spaces
if(exists($ROUGEEvals->{$evalID}{"Ps"})) {
$ROUGEEvals->{$evalID}{"Ps"}{$peerID}=$peerFile; # save peer filename
}
else {
die "(P) Evaluation database does not contain entry for this evaluation ID: $evalID\n";
}
for($mid=1;$mid<=$#files;$mid++) {
$modelFile=$files[$mid];
if(exists($ROUGEEvals->{$evalID})) {
unless(exists($ROUGEEvals->{$evalID}{"Ms"})) {
$ROUGEEvals->{$evalID}{"Ms"}={};
$ROUGEEvals->{$evalID}{"MIDList"}=[];
}
push(@{$ROUGEEvals->{$evalID}{"MIDList"}},"$modelID.$mid"); # save model IDs
}
else {
die "(MODELS) Evaluation database does not contain entry for this evaluation ID: $evalID\n";
}
# remove leading and trailing newlines and
# spaces
if(exists($ROUGEEvals->{$evalID}{"Ms"})) {
$ROUGEEvals->{$evalID}{"Ms"}{"$modelID.$mid"}=$modelFile; # save peer filename
}
else {
die "(M) Evaluation database does not contain entry for this evaluation ID: $evalID\n";
}
}
$evalID++;
}
}
}
# read and parse ROUGE evaluation file
sub readEvals {
my ($ROUGEEvals)=shift;
my ($ROUGEEvalIDs)=shift;
my ($ROUGEPeerIDTable)=shift;
my ($node)=shift;
my ($evalID)=shift;
my ($inputFormat,$peerRoot,$modelRoot,$peerFile,$modelFile,$peerID,$modelID);
if(defined($opt_z)) {
# Input file configuration is a list of peer/model pair for each evaluation
# instance. Each evaluation pair is in a line separated by white spaces
# characters.
&readFileList($ROUGEEvals,$ROUGEEvalIDs,$ROUGEPeerIDTable,$node);
return;
}
# Otherwise, the input file is the standard ROUGE XML evaluation configuration
# file.
if($node->getNodeType==ELEMENT_NODE||
$node->getNodeType==DOCUMENT_NODE) {
if($node->getNodeType==ELEMENT_NODE) {
$nodeName=$node->getNodeName;
if($nodeName=~/^EVAL$/oi) {
$evalID=$node->getAttributeNode("ID")->getValue;
unless(exists($ROUGEEvals->{$evalID})) {
$ROUGEEvals->{$evalID}={};
push(@{$ROUGEEvalIDs},$evalID);
}
foreach my $child ($node->getChildNodes()) {
&readEvals($ROUGEEvals,$ROUGEEvalIDs,$ROUGEPeerIDTable,$child,$evalID);
}
}
elsif($nodeName=~/^INPUT-FORMAT$/oi) {
$inputFormat=$node->getAttributeNode("TYPE")->getValue;
if($inputFormat=~/^(SEE|ISI|SPL|SIMPLE)$/oi) { # SPL: one sentence per line
if(exists($ROUGEEvals->{$evalID})) {
$ROUGEEvals->{$evalID}{"IF"}=$inputFormat;
}
else {
die "(INPUT-FORMAT) Evaluation database does not contain entry for this evaluation ID: $evalID\n";
}
}
else {
die "Unknown input type: $inputFormat\n";
}
}
elsif($nodeName=~/^PEER-ROOT$/oi) {
foreach my $child ($node->getChildNodes()) {
if($child->getNodeType==TEXT_NODE) {
$peerRoot=$child->getData;
# remove leading and trailing newlines and
# spaces
$peerRoot=~s/^[\n\s]+//;
$peerRoot=~s/[\n\s]+$//;
if(exists($ROUGEEvals->{$evalID})) {
$ROUGEEvals->{$evalID}{"PR"}=$peerRoot;
}
else {
die "(PEER-ROOT) Evaluation database does not contain entry for this evaluation ID: $evalID\n";
}
}
}
}
elsif($nodeName=~/^MODEL-ROOT$/oi) {
foreach my $child ($node->getChildNodes()) {
if($child->getNodeType==TEXT_NODE) {
$modelRoot=$child->getData;
# remove leading and trailing newlines and
# spaces
$modelRoot=~s/^[\n\s]+//;
$modelRoot=~s/[\n\s]+$//;
if(exists($ROUGEEvals->{$evalID})) {
$ROUGEEvals->{$evalID}{"MR"}=$modelRoot;
}
else {
die "(MODEL-ROOT) Evaluation database does not contain entry for this evaluation ID: $evalID\n";
}
}
}
}
elsif($nodeName=~/^PEERS$/oi) {
foreach my $child ($node->getChildNodes()) {
if($child->getNodeType==ELEMENT_NODE&&
$child->getNodeName=~/^P$/oi) {
$peerID=$child->getAttributeNode("ID")->getValue;
unless(exists($ROUGEPeerIDTable->{$peerID})) {
$ROUGEPeerIDTable->{$peerID}=1; # save peer ID for reference
}
if(exists($ROUGEEvals->{$evalID})) {
unless(exists($ROUGEEvals->{$evalID}{"Ps"})) {
$ROUGEEvals->{$evalID}{"Ps"}={};
$ROUGEEvals->{$evalID}{"PIDList"}=[];
}
push(@{$ROUGEEvals->{$evalID}{"PIDList"}},$peerID); # save peer IDs
}
else {
die "(PEERS) Evaluation database does not contain entry for this evaluation ID: $evalID\n";
}
foreach my $grandchild ($child->getChildNodes()) {
if($grandchild->getNodeType==TEXT_NODE) {
$peerFile=$grandchild->getData;
# remove leading and trailing newlines and
# spaces
$peerFile=~s/^[\n\s]+//;
$peerFile=~s/[\n\s]+$//;
if(exists($ROUGEEvals->{$evalID}{"Ps"})) {
$ROUGEEvals->{$evalID}{"Ps"}{$peerID}=$peerFile; # save peer filename
}
else {
die "(P) Evaluation database does not contain entry for this evaluation ID: $evalID\n";
}
}
}
}
}
}
elsif($nodeName=~/^MODELS$/oi) {
foreach my $child ($node->getChildNodes()) {
if($child->getNodeType==ELEMENT_NODE&&
$child->getNodeName=~/^M$/oi) {
$modelID=$child->getAttributeNode("ID")->getValue;
if(exists($ROUGEEvals->{$evalID})) {
unless(exists($ROUGEEvals->{$evalID}{"Ms"})) {
$ROUGEEvals->{$evalID}{"Ms"}={};
$ROUGEEvals->{$evalID}{"MIDList"}=[];
}
push(@{$ROUGEEvals->{$evalID}{"MIDList"}},$modelID); # save model IDs
}
else {
die "(MODELS) Evaluation database does not contain entry for this evaluation ID: $evalID\n";
}
foreach my $grandchild ($child->getChildNodes()) {
if($grandchild->getNodeType==TEXT_NODE) {
$modelFile=$grandchild->getData;
# remove leading and trailing newlines and
# spaces
$modelFile=~s/^[\n\s]+//;
$modelFile=~s/[\n\s]+$//;
if(exists($ROUGEEvals->{$evalID}{"Ms"})) {
$ROUGEEvals->{$evalID}{"Ms"}{$modelID}=$modelFile; # save peer filename
}
else {
die "(M) Evaluation database does not contain entry for this evaluation ID: $evalID\n";
}
}
}
}
}
}
else {
foreach my $child ($node->getChildNodes()) {
&readEvals($ROUGEEvals,$ROUGEEvalIDs,$ROUGEPeerIDTable,$child,$evalID);
}
}
}
else {
foreach my $child ($node->getChildNodes()) {
&readEvals($ROUGEEvals,$ROUGEEvalIDs,$ROUGEPeerIDTable,$child,$evalID);
}
}
}
else {
if(defined($node->getChildNodes())) {
foreach my $child ($node->getChildNodes()) {
&readEvals($ROUGEEvals,$ROUGEEvalIDs,$ROUGEPeerIDTable,$child,$evalID);
}
}
}
}
# Porter stemmer in Perl. Few comments, but it's easy to follow against the rules in the original
# paper, in
#
# Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
# no. 3, pp 130-137,
#
# see also http://www.tartarus.org/~martin/PorterStemmer
# Release 1
local %step2list;
local %step3list;
local ($c, $v, $C, $V, $mgr0, $meq1, $mgr1, $_v);
sub stem
{ my ($stem, $suffix, $firstch);
my $w = shift;
if (length($w) < 3) { return $w; } # length at least 3
# now map initial y to Y so that the patterns never treat it as vowel:
$w =~ /^./; $firstch = $&;
if ($firstch =~ /^y/) { $w = ucfirst $w; }
# Step 1a
if ($w =~ /(ss|i)es$/) { $w=$`.$1; }
elsif ($w =~ /([^s])s$/) { $w=$`.$1; }
# Step 1b
if ($w =~ /eed$/) { if ($` =~ /$mgr0/o) { chop($w); } }
elsif ($w =~ /(ed|ing)$/)
{ $stem = $`;
if ($stem =~ /$_v/o)
{ $w = $stem;
if ($w =~ /(at|bl|iz)$/) { $w .= "e"; }
elsif ($w =~ /([^aeiouylsz])\1$/) { chop($w); }
elsif ($w =~ /^${C}${v}[^aeiouwxy]$/o) { $w .= "e"; }
}
}
# Step 1c
if ($w =~ /y$/) { $stem = $`; if ($stem =~ /$_v/o) { $w = $stem."i"; } }
# Step 2
if ($w =~ /(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/)
{ $stem = $`; $suffix = $1;
if ($stem =~ /$mgr0/o) { $w = $stem . $step2list{$suffix}; }
}
# Step 3
if ($w =~ /(icate|ative|alize|iciti|ical|ful|ness)$/)
{ $stem = $`; $suffix = $1;
if ($stem =~ /$mgr0/o) { $w = $stem . $step3list{$suffix}; }
}
# Step 4
# CYL: Modified 02/14/2004, a word ended in -ement will not try the rules "-ment" and "-ent"
# if ($w =~ /(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/)
# elsif ($w =~ /(s|t)(ion)$/)
# { $stem = $` . $1; if ($stem =~ /$mgr1/o) { $w = $stem; } }
if ($w =~ /(al|ance|ence|er|ic|able|ible|ant|ement|ou|ism|ate|iti|ous|ive|ize)$/)
{ $stem = $`; if ($stem =~ /$mgr1/o) { $w = $stem; } }
if ($w =~ /ment$/)
{ $stem = $`; if ($stem =~ /$mgr1/o) { $w = $stem; } }
if ($w =~ /ent$/)
{ $stem = $`; if ($stem =~ /$mgr1/o) { $w = $stem; } }
elsif ($w =~ /(s|t)(ion)$/)
{ $stem = $` . $1; if ($stem =~ /$mgr1/o) { $w = $stem; } }
# Step 5
if ($w =~ /e$/)
{ $stem = $`;
if ($stem =~ /$mgr1/o or
($stem =~ /$meq1/o and not $stem =~ /^${C}${v}[^aeiouwxy]$/o))
{ $w = $stem; }
}
if ($w =~ /ll$/ and $w =~ /$mgr1/o) { chop($w); }
# and turn initial Y back to y
if ($firstch =~ /^y/) { $w = lcfirst $w; }
return $w;
}
sub initialise {
%step2list =
( 'ational'=>'ate', 'tional'=>'tion', 'enci'=>'ence', 'anci'=>'ance', 'izer'=>'ize', 'bli'=>'ble',
'alli'=>'al', 'entli'=>'ent', 'eli'=>'e', 'ousli'=>'ous', 'ization'=>'ize', 'ation'=>'ate',
'ator'=>'ate', 'alism'=>'al', 'iveness'=>'ive', 'fulness'=>'ful', 'ousness'=>'ous', 'aliti'=>'al',
'iviti'=>'ive', 'biliti'=>'ble', 'logi'=>'log');
%step3list =
('icate'=>'ic', 'ative'=>'', 'alize'=>'al', 'iciti'=>'ic', 'ical'=>'ic', 'ful'=>'', 'ness'=>'');
$c = "[^aeiou]"; # consonant
$v = "[aeiouy]"; # vowel
$C = "${c}[^aeiouy]*"; # consonant sequence
$V = "${v}[aeiou]*"; # vowel sequence
$mgr0 = "^(${C})?${V}${C}"; # [C]VC... is m>0
$meq1 = "^(${C})?${V}${C}(${V})?" . '$'; # [C]VC[V] is m=1
$mgr1 = "^(${C})?${V}${C}${V}${C}"; # [C]VCVC... is m>1
$_v = "^(${C})?${v}"; # vowel in stem
}