diff options
author | Chris Dyer <redpony@gmail.com> | 2009-12-14 20:35:11 -0500 |
---|---|---|
committer | Chris Dyer <redpony@gmail.com> | 2009-12-14 20:35:11 -0500 |
commit | 851e389dffdd6996ea32d70defb8906de80b9edc (patch) | |
tree | 8c68ee77205badc056b8ab5b332e67e3e98017df /vest | |
parent | dc6930c00b4b276883280cff1ed6dcd9ddef03c7 (diff) |
few small fixes of alignment tools, add new orthographic similarity feature for word aligner, final naming of directories, libraries in cdec
Diffstat (limited to 'vest')
-rw-r--r-- | vest/Makefile.am | 16 | ||||
-rwxr-xr-x | vest/dist-vest.pl | 65 |
2 files changed, 26 insertions, 55 deletions
diff --git a/vest/Makefile.am b/vest/Makefile.am index 87c2383a..d7d08133 100644 --- a/vest/Makefile.am +++ b/vest/Makefile.am @@ -8,25 +8,25 @@ bin_PROGRAMS = \ union_forests union_forests_SOURCES = union_forests.cc -union_forests_LDADD = $(top_srcdir)/src/libhg.a -lz +union_forests_LDADD = $(top_srcdir)/decoder/libcdec.a -lz fast_score_SOURCES = fast_score.cc ter.cc comb_scorer.cc scorer.cc viterbi_envelope.cc -fast_score_LDADD = $(top_srcdir)/src/libhg.a -lz +fast_score_LDADD = $(top_srcdir)/decoder/libcdec.a -lz mr_vest_generate_mapper_input_SOURCES = mr_vest_generate_mapper_input.cc line_optimizer.cc -mr_vest_generate_mapper_input_LDADD = $(top_srcdir)/src/libhg.a -lz +mr_vest_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a -lz mr_vest_map_SOURCES = viterbi_envelope.cc error_surface.cc mr_vest_map.cc scorer.cc ter.cc comb_scorer.cc line_optimizer.cc -mr_vest_map_LDADD = $(top_srcdir)/src/libhg.a -lz +mr_vest_map_LDADD = $(top_srcdir)/decoder/libcdec.a -lz mr_vest_reduce_SOURCES = error_surface.cc mr_vest_reduce.cc scorer.cc ter.cc comb_scorer.cc line_optimizer.cc viterbi_envelope.cc -mr_vest_reduce_LDADD = $(top_srcdir)/src/libhg.a -lz +mr_vest_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a -lz scorer_test_SOURCES = scorer_test.cc scorer.cc ter.cc comb_scorer.cc viterbi_envelope.cc -scorer_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/src/libhg.a -lz +scorer_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/decoder/libcdec.a -lz lo_test_SOURCES = lo_test.cc scorer.cc ter.cc comb_scorer.cc viterbi_envelope.cc error_surface.cc line_optimizer.cc -lo_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/src/libhg.a -lz +lo_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/decoder/libcdec.a -lz -AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(BOOST_CPPFLAGS) $(GTEST_CPPFLAGS) -I$(top_srcdir)/src +AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(BOOST_CPPFLAGS) $(GTEST_CPPFLAGS) -I$(top_srcdir)/decoder AM_LDFLAGS = $(BOOST_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIB) diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index 5528838c..31dbc61f 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -1,17 +1,16 @@ #!/usr/bin/env perl +use strict; +my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; } use Getopt::Long; use IPC::Open2; use strict; use POSIX ":sys_wait_h"; -my $mydir = `dirname $0`; -chomp $mydir; # Default settings -my $srcFile = "/fs/cliplab/mteval/Evaluation/Chinese-English/mt03.src.txt"; -my $refFiles = "/fs/cliplab/mteval/Evaluation/Chinese-English/mt03.ref.txt.*"; -my $bin_dir = "/fs/clip-software/cdec/bin"; -$bin_dir = "/Users/redpony/cdyer-svn-root/cdec/vest/bin_dir"; +my $srcFile; +my $refFiles; +my $bin_dir = $SCRIPT_DIR; die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir; my $FAST_SCORE="$bin_dir/fast_score"; die "Can't find $FAST_SCORE" unless -x $FAST_SCORE; @@ -22,7 +21,7 @@ my $SCORER = $FAST_SCORE; die "Can't find $MAPPER" unless -x $MAPPER; my $forestUnion = "$bin_dir/union_forests"; die "Can't find $forestUnion" unless -x $forestUnion; -my $cdec = "$bin_dir/cdec"; +my $cdec = "$bin_dir/../decoder/cdec"; die "Can't find decoder in $cdec" unless -x $cdec; my $decoder = $cdec; my $lines_per_mapper = 440; @@ -153,7 +152,7 @@ $SIG{HUP} = "cleanup"; my $decoderBase = `basename $decoder`; chomp $decoderBase; my $newIniFile = "$dir/$decoderBase.ini"; -my $parallelize = "$mydir/parallelize.pl"; +my $parallelize = '/chomes/redpony/svn-trunk/sa-utils/parallelize.pl'; my $inputFileName = "$dir/input"; my $user = $ENV{"USER"}; @@ -254,15 +253,18 @@ while (1){ print LOGFILE "\nUNION FORESTS\n"; print LOGFILE `date`; my $mergeLog="$logdir/prune-merge.log.$iteration"; - $cmd = "$forestUnion -r $dir/hgs -n $dir/hgs-current -s $devSize"; - print LOGFILE "COMMAND:\n$cmd\n"; - $result = system($cmd); + `rm -rf $dir/hgs`; + `mv $dir/hgs-current $dir/hgs`; + #$cmd = "$forestUnion -r $dir/hgs -n $dir/hgs-current -s $devSize"; + #print LOGFILE "COMMAND:\n$cmd\n"; + #$result = system($cmd); unless ($result == 0){ cleanup(); print LOGFILE "ERROR: merge command returned non-zero exit code $result\n"; die; } `rm -f $dir/hgs-current/*.json.gz`; # clean up old HGs, they've been moved to the repository + `mkdir -p $dir/hgs-current`; my $score = 0; my $icc = 0; @@ -303,7 +305,7 @@ while (1){ my $mapoutput = $shard; my $client_name = $shard; $client_name =~ s/mapinput.//; - $client_name = "fmert.$client_name"; + $client_name = "vest.$client_name"; $mapoutput =~ s/mapinput/mapoutput/; push @mapoutputs, "$dir/splag.$im1/$mapoutput"; $o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard"; @@ -548,36 +550,9 @@ Options: --decoder <decoder path> Decoder binary to use. - --decode-nodes <nodelist> - A list of nodes used for parallel decoding. If specific nodes - are not desired, use "1" for each node requested. Defaults to - "1 1 1 1 1 1 1 1 1 1 1 1 1 1 1", which indicates a request for - 15 nodes. - - --dont-clean - If present, this flag prevents intermediate files, including - run files and cumulative files, from being automatically removed - after a successful optimization run (these files are left if the - run fails for any reason). If used, a makefile containing - cleanup commands is written to the directory. To clean up - the intermediate files, invoke make without any arguments. - - --dry-run - Prints out the settings and exits without doing anything. - - --epsilon <epsilon> - Require that the dev set BLEU score improve by at least <epsilon> - within <interval> iterations (controlled by parameter --interval). - If not specified, defaults to .002. - --help Print this message and exit. - --interval <i> - Require that the dev set BLEU score improve by at least <epsilon> - (controlled by parameter --epsilon) within <interval> iterations. - If not specified, defaults to 5. - --iteration <I> Starting iteration number. If not specified, defaults to 1. @@ -586,18 +561,15 @@ Options: to 10. --pmem <N> - Amount of physical memory requested for parallel decoding jobs, - in the format expected by qsub. If not specified, defaults to - 2g. + Amount of physical memory requested for parallel decoding jobs. --ref-files <files> Dev set ref files. This option takes only a single string argument. To use multiple files (including file globbing), this argument should - be quoted. If not specified, defaults to - /fs/cliplab/mteval/Evaluation/Chinese-English/mt03.ref.txt.* + be quoted. --metric <method> - Metric to optimize. See fmert's --metric option for values. + Metric to optimize. Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi --normalize <feature-name> @@ -609,8 +581,7 @@ Options: set this parameter to explore other directions. Defaults to 5. --source-file <file> - Dev set source file. If not specified, defaults to - /fs/cliplab/mteval/Evaluation/Chinese-English/mt03.src.txt + Dev set source file. --weights <file> A file specifying initial feature weights. The format is |