summaryrefslogtreecommitdiff
path: root/vest
diff options
context:
space:
mode:
authorChris Dyer <redpony@gmail.com>2009-12-14 20:35:11 -0500
committerChris Dyer <redpony@gmail.com>2009-12-14 20:35:11 -0500
commit851e389dffdd6996ea32d70defb8906de80b9edc (patch)
tree8c68ee77205badc056b8ab5b332e67e3e98017df /vest
parentdc6930c00b4b276883280cff1ed6dcd9ddef03c7 (diff)
few small fixes of alignment tools, add new orthographic similarity feature for word aligner, final naming of directories, libraries in cdec
Diffstat (limited to 'vest')
-rw-r--r--vest/Makefile.am16
-rwxr-xr-xvest/dist-vest.pl65
2 files changed, 26 insertions, 55 deletions
diff --git a/vest/Makefile.am b/vest/Makefile.am
index 87c2383a..d7d08133 100644
--- a/vest/Makefile.am
+++ b/vest/Makefile.am
@@ -8,25 +8,25 @@ bin_PROGRAMS = \
union_forests
union_forests_SOURCES = union_forests.cc
-union_forests_LDADD = $(top_srcdir)/src/libhg.a -lz
+union_forests_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
fast_score_SOURCES = fast_score.cc ter.cc comb_scorer.cc scorer.cc viterbi_envelope.cc
-fast_score_LDADD = $(top_srcdir)/src/libhg.a -lz
+fast_score_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
mr_vest_generate_mapper_input_SOURCES = mr_vest_generate_mapper_input.cc line_optimizer.cc
-mr_vest_generate_mapper_input_LDADD = $(top_srcdir)/src/libhg.a -lz
+mr_vest_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
mr_vest_map_SOURCES = viterbi_envelope.cc error_surface.cc mr_vest_map.cc scorer.cc ter.cc comb_scorer.cc line_optimizer.cc
-mr_vest_map_LDADD = $(top_srcdir)/src/libhg.a -lz
+mr_vest_map_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
mr_vest_reduce_SOURCES = error_surface.cc mr_vest_reduce.cc scorer.cc ter.cc comb_scorer.cc line_optimizer.cc viterbi_envelope.cc
-mr_vest_reduce_LDADD = $(top_srcdir)/src/libhg.a -lz
+mr_vest_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
scorer_test_SOURCES = scorer_test.cc scorer.cc ter.cc comb_scorer.cc viterbi_envelope.cc
-scorer_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/src/libhg.a -lz
+scorer_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/decoder/libcdec.a -lz
lo_test_SOURCES = lo_test.cc scorer.cc ter.cc comb_scorer.cc viterbi_envelope.cc error_surface.cc line_optimizer.cc
-lo_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/src/libhg.a -lz
+lo_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/decoder/libcdec.a -lz
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(BOOST_CPPFLAGS) $(GTEST_CPPFLAGS) -I$(top_srcdir)/src
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(BOOST_CPPFLAGS) $(GTEST_CPPFLAGS) -I$(top_srcdir)/decoder
AM_LDFLAGS = $(BOOST_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIB)
diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl
index 5528838c..31dbc61f 100755
--- a/vest/dist-vest.pl
+++ b/vest/dist-vest.pl
@@ -1,17 +1,16 @@
#!/usr/bin/env perl
+use strict;
+my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; }
use Getopt::Long;
use IPC::Open2;
use strict;
use POSIX ":sys_wait_h";
-my $mydir = `dirname $0`;
-chomp $mydir;
# Default settings
-my $srcFile = "/fs/cliplab/mteval/Evaluation/Chinese-English/mt03.src.txt";
-my $refFiles = "/fs/cliplab/mteval/Evaluation/Chinese-English/mt03.ref.txt.*";
-my $bin_dir = "/fs/clip-software/cdec/bin";
-$bin_dir = "/Users/redpony/cdyer-svn-root/cdec/vest/bin_dir";
+my $srcFile;
+my $refFiles;
+my $bin_dir = $SCRIPT_DIR;
die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir;
my $FAST_SCORE="$bin_dir/fast_score";
die "Can't find $FAST_SCORE" unless -x $FAST_SCORE;
@@ -22,7 +21,7 @@ my $SCORER = $FAST_SCORE;
die "Can't find $MAPPER" unless -x $MAPPER;
my $forestUnion = "$bin_dir/union_forests";
die "Can't find $forestUnion" unless -x $forestUnion;
-my $cdec = "$bin_dir/cdec";
+my $cdec = "$bin_dir/../decoder/cdec";
die "Can't find decoder in $cdec" unless -x $cdec;
my $decoder = $cdec;
my $lines_per_mapper = 440;
@@ -153,7 +152,7 @@ $SIG{HUP} = "cleanup";
my $decoderBase = `basename $decoder`; chomp $decoderBase;
my $newIniFile = "$dir/$decoderBase.ini";
-my $parallelize = "$mydir/parallelize.pl";
+my $parallelize = '/chomes/redpony/svn-trunk/sa-utils/parallelize.pl';
my $inputFileName = "$dir/input";
my $user = $ENV{"USER"};
@@ -254,15 +253,18 @@ while (1){
print LOGFILE "\nUNION FORESTS\n";
print LOGFILE `date`;
my $mergeLog="$logdir/prune-merge.log.$iteration";
- $cmd = "$forestUnion -r $dir/hgs -n $dir/hgs-current -s $devSize";
- print LOGFILE "COMMAND:\n$cmd\n";
- $result = system($cmd);
+ `rm -rf $dir/hgs`;
+ `mv $dir/hgs-current $dir/hgs`;
+ #$cmd = "$forestUnion -r $dir/hgs -n $dir/hgs-current -s $devSize";
+ #print LOGFILE "COMMAND:\n$cmd\n";
+ #$result = system($cmd);
unless ($result == 0){
cleanup();
print LOGFILE "ERROR: merge command returned non-zero exit code $result\n";
die;
}
`rm -f $dir/hgs-current/*.json.gz`; # clean up old HGs, they've been moved to the repository
+ `mkdir -p $dir/hgs-current`;
my $score = 0;
my $icc = 0;
@@ -303,7 +305,7 @@ while (1){
my $mapoutput = $shard;
my $client_name = $shard;
$client_name =~ s/mapinput.//;
- $client_name = "fmert.$client_name";
+ $client_name = "vest.$client_name";
$mapoutput =~ s/mapinput/mapoutput/;
push @mapoutputs, "$dir/splag.$im1/$mapoutput";
$o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard";
@@ -548,36 +550,9 @@ Options:
--decoder <decoder path>
Decoder binary to use.
- --decode-nodes <nodelist>
- A list of nodes used for parallel decoding. If specific nodes
- are not desired, use "1" for each node requested. Defaults to
- "1 1 1 1 1 1 1 1 1 1 1 1 1 1 1", which indicates a request for
- 15 nodes.
-
- --dont-clean
- If present, this flag prevents intermediate files, including
- run files and cumulative files, from being automatically removed
- after a successful optimization run (these files are left if the
- run fails for any reason). If used, a makefile containing
- cleanup commands is written to the directory. To clean up
- the intermediate files, invoke make without any arguments.
-
- --dry-run
- Prints out the settings and exits without doing anything.
-
- --epsilon <epsilon>
- Require that the dev set BLEU score improve by at least <epsilon>
- within <interval> iterations (controlled by parameter --interval).
- If not specified, defaults to .002.
-
--help
Print this message and exit.
- --interval <i>
- Require that the dev set BLEU score improve by at least <epsilon>
- (controlled by parameter --epsilon) within <interval> iterations.
- If not specified, defaults to 5.
-
--iteration <I>
Starting iteration number. If not specified, defaults to 1.
@@ -586,18 +561,15 @@ Options:
to 10.
--pmem <N>
- Amount of physical memory requested for parallel decoding jobs,
- in the format expected by qsub. If not specified, defaults to
- 2g.
+ Amount of physical memory requested for parallel decoding jobs.
--ref-files <files>
Dev set ref files. This option takes only a single string argument.
To use multiple files (including file globbing), this argument should
- be quoted. If not specified, defaults to
- /fs/cliplab/mteval/Evaluation/Chinese-English/mt03.ref.txt.*
+ be quoted.
--metric <method>
- Metric to optimize. See fmert's --metric option for values.
+ Metric to optimize.
Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi
--normalize <feature-name>
@@ -609,8 +581,7 @@ Options:
set this parameter to explore other directions. Defaults to 5.
--source-file <file>
- Dev set source file. If not specified, defaults to
- /fs/cliplab/mteval/Evaluation/Chinese-English/mt03.src.txt
+ Dev set source file.
--weights <file>
A file specifying initial feature weights. The format is