diff options
| author | Chris Dyer <redpony@gmail.com> | 2009-12-14 20:35:11 -0500 | 
|---|---|---|
| committer | Chris Dyer <redpony@gmail.com> | 2009-12-14 20:35:11 -0500 | 
| commit | 851e389dffdd6996ea32d70defb8906de80b9edc (patch) | |
| tree | 8c68ee77205badc056b8ab5b332e67e3e98017df /vest | |
| parent | dc6930c00b4b276883280cff1ed6dcd9ddef03c7 (diff) | |
few small fixes of alignment tools, add new orthographic similarity feature for word aligner, final naming of directories, libraries in cdec
Diffstat (limited to 'vest')
| -rw-r--r-- | vest/Makefile.am | 16 | ||||
| -rwxr-xr-x | vest/dist-vest.pl | 65 | 
2 files changed, 26 insertions, 55 deletions
| diff --git a/vest/Makefile.am b/vest/Makefile.am index 87c2383a..d7d08133 100644 --- a/vest/Makefile.am +++ b/vest/Makefile.am @@ -8,25 +8,25 @@ bin_PROGRAMS = \    union_forests  union_forests_SOURCES = union_forests.cc -union_forests_LDADD = $(top_srcdir)/src/libhg.a -lz +union_forests_LDADD = $(top_srcdir)/decoder/libcdec.a -lz  fast_score_SOURCES = fast_score.cc ter.cc comb_scorer.cc scorer.cc viterbi_envelope.cc -fast_score_LDADD = $(top_srcdir)/src/libhg.a -lz +fast_score_LDADD = $(top_srcdir)/decoder/libcdec.a -lz  mr_vest_generate_mapper_input_SOURCES = mr_vest_generate_mapper_input.cc line_optimizer.cc -mr_vest_generate_mapper_input_LDADD = $(top_srcdir)/src/libhg.a -lz +mr_vest_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a -lz  mr_vest_map_SOURCES = viterbi_envelope.cc error_surface.cc mr_vest_map.cc scorer.cc ter.cc comb_scorer.cc line_optimizer.cc -mr_vest_map_LDADD = $(top_srcdir)/src/libhg.a -lz +mr_vest_map_LDADD = $(top_srcdir)/decoder/libcdec.a -lz  mr_vest_reduce_SOURCES = error_surface.cc mr_vest_reduce.cc scorer.cc ter.cc comb_scorer.cc line_optimizer.cc viterbi_envelope.cc -mr_vest_reduce_LDADD = $(top_srcdir)/src/libhg.a -lz +mr_vest_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a -lz  scorer_test_SOURCES = scorer_test.cc scorer.cc ter.cc comb_scorer.cc viterbi_envelope.cc -scorer_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/src/libhg.a -lz +scorer_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/decoder/libcdec.a -lz  lo_test_SOURCES = lo_test.cc scorer.cc ter.cc comb_scorer.cc viterbi_envelope.cc error_surface.cc line_optimizer.cc -lo_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/src/libhg.a -lz +lo_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/decoder/libcdec.a -lz -AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(BOOST_CPPFLAGS) $(GTEST_CPPFLAGS) -I$(top_srcdir)/src +AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(BOOST_CPPFLAGS) $(GTEST_CPPFLAGS) -I$(top_srcdir)/decoder  AM_LDFLAGS = $(BOOST_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIB) diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index 5528838c..31dbc61f 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -1,17 +1,16 @@  #!/usr/bin/env perl +use strict; +my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; }  use Getopt::Long;  use IPC::Open2;  use strict;  use POSIX ":sys_wait_h"; -my $mydir = `dirname $0`; -chomp $mydir;  # Default settings -my $srcFile = "/fs/cliplab/mteval/Evaluation/Chinese-English/mt03.src.txt"; -my $refFiles = "/fs/cliplab/mteval/Evaluation/Chinese-English/mt03.ref.txt.*"; -my $bin_dir = "/fs/clip-software/cdec/bin"; -$bin_dir = "/Users/redpony/cdyer-svn-root/cdec/vest/bin_dir"; +my $srcFile; +my $refFiles; +my $bin_dir = $SCRIPT_DIR;  die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir;  my $FAST_SCORE="$bin_dir/fast_score";  die "Can't find $FAST_SCORE" unless -x $FAST_SCORE; @@ -22,7 +21,7 @@ my $SCORER = $FAST_SCORE;  die "Can't find $MAPPER" unless -x $MAPPER;  my $forestUnion = "$bin_dir/union_forests";  die "Can't find $forestUnion" unless -x $forestUnion; -my $cdec = "$bin_dir/cdec"; +my $cdec = "$bin_dir/../decoder/cdec";  die "Can't find decoder in $cdec" unless -x $cdec;  my $decoder = $cdec;  my $lines_per_mapper = 440; @@ -153,7 +152,7 @@ $SIG{HUP} = "cleanup";  my $decoderBase = `basename $decoder`; chomp $decoderBase;  my $newIniFile = "$dir/$decoderBase.ini"; -my $parallelize = "$mydir/parallelize.pl"; +my $parallelize = '/chomes/redpony/svn-trunk/sa-utils/parallelize.pl';  my $inputFileName = "$dir/input";  my $user = $ENV{"USER"}; @@ -254,15 +253,18 @@ while (1){  	print LOGFILE "\nUNION FORESTS\n";  	print LOGFILE `date`;  	my $mergeLog="$logdir/prune-merge.log.$iteration"; -	$cmd = "$forestUnion -r $dir/hgs -n $dir/hgs-current -s $devSize"; -	print LOGFILE "COMMAND:\n$cmd\n"; -	$result = system($cmd); +	`rm -rf $dir/hgs`; +	`mv $dir/hgs-current $dir/hgs`; +	#$cmd = "$forestUnion -r $dir/hgs -n $dir/hgs-current -s $devSize"; +	#print LOGFILE "COMMAND:\n$cmd\n"; +	#$result = system($cmd);  	unless ($result == 0){  		cleanup();  		print LOGFILE "ERROR: merge command returned non-zero exit code $result\n";  		die;  	}  	`rm -f $dir/hgs-current/*.json.gz`; # clean up old HGs, they've been moved to the repository +        `mkdir -p $dir/hgs-current`;  	my $score = 0;  	my $icc = 0; @@ -303,7 +305,7 @@ while (1){  			my $mapoutput = $shard;  			my $client_name = $shard;  			$client_name =~ s/mapinput.//; -			$client_name = "fmert.$client_name"; +			$client_name = "vest.$client_name";  			$mapoutput =~ s/mapinput/mapoutput/;  			push @mapoutputs, "$dir/splag.$im1/$mapoutput";  			$o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard"; @@ -548,36 +550,9 @@ Options:  	--decoder <decoder path>  		Decoder binary to use. -	--decode-nodes <nodelist> -		A list of nodes used for parallel decoding.  If specific nodes  -		are not desired, use "1" for each node requested.  Defaults to  -		"1 1 1 1 1 1 1 1 1 1 1 1 1 1 1", which indicates a request for  -		15 nodes. - -	--dont-clean -		 If present, this flag prevents intermediate files, including -		 run files and cumulative files, from being automatically removed -		 after a successful optimization run (these files are left if the -		 run fails for any reason).  If used, a makefile containing -		 cleanup commands is written to the directory.  To clean up -		 the intermediate files, invoke make without any arguments. - -	--dry-run -		Prints out the settings and exits without doing anything. - -	--epsilon <epsilon> -		Require that the dev set BLEU score improve by at least <epsilon> -		within <interval> iterations (controlled by parameter --interval). -		If not specified, defaults to .002. -  	--help  		Print this message and exit. -	--interval <i> -		Require that the dev set BLEU score improve by at least <epsilon> -		(controlled by parameter --epsilon) within <interval> iterations. -		If not specified, defaults to 5. -  	--iteration <I>   		Starting iteration number.  If not specified, defaults to 1. @@ -586,18 +561,15 @@ Options:  		to 10.  	--pmem <N> -		Amount of physical memory requested for parallel decoding jobs, -		in the format expected by qsub.  If not specified, defaults to -		2g. +		Amount of physical memory requested for parallel decoding jobs.  	--ref-files <files>   		Dev set ref files.  This option takes only a single string argument.   		To use multiple files (including file globbing), this argument should  -		be quoted.  If not specified, defaults to -		/fs/cliplab/mteval/Evaluation/Chinese-English/mt03.ref.txt.*  +		be quoted.  	--metric <method> -		Metric to optimize.  See fmert's --metric option for values. +		Metric to optimize.  		Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi  	--normalize <feature-name> @@ -609,8 +581,7 @@ Options:  		set this parameter to explore other directions. Defaults to 5.  	--source-file <file>  -		Dev set source file.  If not specified, defaults to -		/fs/cliplab/mteval/Evaluation/Chinese-English/mt03.src.txt +		Dev set source file.  	--weights <file>   		A file specifying initial feature weights.  The format is | 
