From 42c1346c6bce064601beb81bb954ea5e30e9f43d Mon Sep 17 00:00:00 2001 From: graehl Date: Tue, 20 Jul 2010 23:03:54 +0000 Subject: tokenization works. oops. git-svn-id: https://ws10smt.googlecode.com/svn/trunk@346 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/stringlib.h | 5 +++++ decoder/tdict.cc | 4 ++++ vest/dist-vest.pl | 4 +++- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/decoder/stringlib.h b/decoder/stringlib.h index a21ffd59..a7c6c3c4 100644 --- a/decoder/stringlib.h +++ b/decoder/stringlib.h @@ -137,6 +137,11 @@ void VisitTokens(char *p,char *const end,F f) { template void VisitTokens(std::string const& s,F f) { + std::vector ss=SplitOnWhitespace(s); + for (int i=0;i Ws; Ws *ids; explicit add_wordids(Ws *i) : ids(i) { } + add_wordids(const add_wordids& o) : ids(o.ids) { } void operator()(char const* s) { ids->push_back(TD::Convert(s)); } + void operator()(std::string const& s) { + ids->push_back(TD::Convert(s)); + } }; } diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index 8acec7a9..c9c11d23 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -78,6 +78,7 @@ if (GetOptions( "no-primary!" => \$noprimary, "max-similarity=s" => \$maxsim, "oracle-directions=i" => \$oraclen, + "n-oracle=i" => \$oraclen, "oracle-batch=i" => \$oracleb, "directions-args=s" => \$dirargs, "ref-files=s" => \$refFiles, @@ -266,7 +267,8 @@ while (1){ print STDERR `date`; $icc++; my $nop=$noprimary?"--no_primary":""; - $cmd="$MAPINPUT -w $inweights -r $dir/hgs -s $devSize -d $rand_directions --max_similarity=$maxsim --oracle_directions=$oraclen --oracle_batch=$oracleb $dirargs > $dir/agenda.$im1-$opt_iter"; + my $targs=$oraclen ? "--decoder_translations='$runFile'":""; + $cmd="$MAPINPUT -w $inweights -r $dir/hgs -s $devSize -d $rand_directions --max_similarity=$maxsim --oracle_directions=$oraclen --oracle_batch=$oracleb $targs $dirargs > $dir/agenda.$im1-$opt_iter"; print STDERR "COMMAND:\n$cmd\n"; $result = system($cmd); unless ($result == 0){ -- cgit v1.2.3