summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-20 23:03:54 +0000
committergraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-20 23:03:54 +0000
commit42c1346c6bce064601beb81bb954ea5e30e9f43d (patch)
tree7656e4c6f38cf17775edb707252518effdacb562
parent0720de0bee526e8e9b311bb91d0a3a1efa8c1438 (diff)
tokenization works. oops.
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@346 ec762483-ff6d-05da-a07a-a48fb63a330f
-rw-r--r--decoder/stringlib.h5
-rw-r--r--decoder/tdict.cc4
-rwxr-xr-xvest/dist-vest.pl4
3 files changed, 12 insertions, 1 deletions
diff --git a/decoder/stringlib.h b/decoder/stringlib.h
index a21ffd59..a7c6c3c4 100644
--- a/decoder/stringlib.h
+++ b/decoder/stringlib.h
@@ -137,6 +137,11 @@ void VisitTokens(char *p,char *const end,F f) {
template <class F>
void VisitTokens(std::string const& s,F f) {
+ std::vector<std::string> ss=SplitOnWhitespace(s);
+ for (int i=0;i<ss.size();++i)
+ f(ss[i]);
+ return;
+ //FIXME:
if (s.empty()) return;
mutable_c_str mp(s);
VisitTokens(mp.p,mp.p+s.size(),f);
diff --git a/decoder/tdict.cc b/decoder/tdict.cc
index d7fc7eb7..43bc4cbd 100644
--- a/decoder/tdict.cc
+++ b/decoder/tdict.cc
@@ -50,9 +50,13 @@ struct add_wordids {
typedef std::vector<WordID> Ws;
Ws *ids;
explicit add_wordids(Ws *i) : ids(i) { }
+ add_wordids(const add_wordids& o) : ids(o.ids) { }
void operator()(char const* s) {
ids->push_back(TD::Convert(s));
}
+ void operator()(std::string const& s) {
+ ids->push_back(TD::Convert(s));
+ }
};
}
diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl
index 8acec7a9..c9c11d23 100755
--- a/vest/dist-vest.pl
+++ b/vest/dist-vest.pl
@@ -78,6 +78,7 @@ if (GetOptions(
"no-primary!" => \$noprimary,
"max-similarity=s" => \$maxsim,
"oracle-directions=i" => \$oraclen,
+ "n-oracle=i" => \$oraclen,
"oracle-batch=i" => \$oracleb,
"directions-args=s" => \$dirargs,
"ref-files=s" => \$refFiles,
@@ -266,7 +267,8 @@ while (1){
print STDERR `date`;
$icc++;
my $nop=$noprimary?"--no_primary":"";
- $cmd="$MAPINPUT -w $inweights -r $dir/hgs -s $devSize -d $rand_directions --max_similarity=$maxsim --oracle_directions=$oraclen --oracle_batch=$oracleb $dirargs > $dir/agenda.$im1-$opt_iter";
+ my $targs=$oraclen ? "--decoder_translations='$runFile'":"";
+ $cmd="$MAPINPUT -w $inweights -r $dir/hgs -s $devSize -d $rand_directions --max_similarity=$maxsim --oracle_directions=$oraclen --oracle_batch=$oracleb $targs $dirargs > $dir/agenda.$im1-$opt_iter";
print STDERR "COMMAND:\n$cmd\n";
$result = system($cmd);
unless ($result == 0){