diff options
author | Patrick Simianer <p@simianer.de> | 2011-08-05 09:45:02 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2011-09-23 19:13:58 +0200 |
commit | 2001f2c1c96049b78f9aa5aaa05aeca26e3fc55a (patch) | |
tree | 50b226e0a1ee05c15577535430303a05c3409d96 /dtrain | |
parent | d0c482c1d69a5c26f7d1bc27cf5b3a252716cb2e (diff) |
bugfixing
Diffstat (limited to 'dtrain')
-rw-r--r-- | dtrain/dtest.cc | 4 | ||||
-rw-r--r-- | dtrain/dtrain.cc | 19 | ||||
-rw-r--r-- | dtrain/score.cc | 2 | ||||
-rwxr-xr-x | dtrain/scripts/run.sh | 2 |
4 files changed, 18 insertions, 9 deletions
diff --git a/dtrain/dtest.cc b/dtrain/dtest.cc index d1ff30c0..7674a3ca 100644 --- a/dtrain/dtest.cc +++ b/dtrain/dtest.cc @@ -61,11 +61,11 @@ main(int argc, char** argv) double overall = 0.0; double overall1 = 0.0; double overall2 = 0.0; - cerr << "(A dot equals " << DTRAIN_DOTOUT << " lines of input.)" << endl; + cerr << "(A dot represents " << DTRAIN_DOTOUT << " lines of input.)" << endl; while( getline(cin, in) ) { if ( (sn+1) % DTRAIN_DOTOUT == 0 ) { cerr << "."; - if ( (sn+1) % (20*DTRAIN_DOTOUT) == 0 ) cerr << endl; + if ( (sn+1) % (20*DTRAIN_DOTOUT) == 0 ) cerr << " " << sn+1 << endl; } //if ( sn > 5000 ) break; strs.clear(); diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc index 16b83a70..6023638a 100644 --- a/dtrain/dtrain.cc +++ b/dtrain/dtrain.cc @@ -25,16 +25,17 @@ init(int argc, char** argv, po::variables_map* conf) ( "ngrams,n", po::value<size_t>(&N)->default_value(DTRAIN_DEFAULT_N), "n for Ngrams" ) ( "filter,f", po::value<string>(), "filter kbest list" ) // FIXME ( "epochs,t", po::value<size_t>(&T)->default_value(DTRAIN_DEFAULT_T), "# of iterations T" ) + ( "input,i", po::value<string>(), "input file" ) #ifndef DTRAIN_DEBUG ; #else - ( "test", "run tests and exit"); + ( "test", "run tests and exit"); #endif po::options_description cmdline_options; cmdline_options.add(opts); po::store( parse_command_line(argc, argv, cmdline_options), *conf ); po::notify( *conf ); - if ( ! conf->count("decoder-config") ) { + if ( ! conf->count("decoder-config") || ! conf->count("input") ) { cerr << cmdline_options << endl; return false; } @@ -83,15 +84,21 @@ main(int argc, char** argv) vector<WordID> ref_ids; string in, psg; size_t sn = 0; - cerr << "(A dot equals " << DTRAIN_DOTOUT << " lines of input.)" << endl; + cerr << "(A dot represents " << DTRAIN_DOTOUT << " lines of input.)" << endl; + + string fname = conf["input"].as<string>(); + ifstream input; + input.open( fname.c_str() ); for ( size_t t = 0; t < T; t++ ) { + input.seekg(0); + cerr << "Iteration #" << t+1 << " of " << T << "." << endl; - while( getline(cin, in) ) { + while( getline(input, in) ) { if ( (sn+1) % DTRAIN_DOTOUT == 0 ) { cerr << "."; - if ( (sn+1) % (20*DTRAIN_DOTOUT) == 0 ) cerr << endl; + if ( (sn+1) % (20*DTRAIN_DOTOUT) == 0 ) cerr << " " << sn+1 << endl; } //if ( sn > 5000 ) break; // weights @@ -146,7 +153,7 @@ main(int argc, char** argv) } // outer loop cerr << endl; - weights.WriteToFile( "data/weights-vanilla", false ); + weights.WriteToFile( "output/weights-vanilla", true ); return 0; } diff --git a/dtrain/score.cc b/dtrain/score.cc index 72e6db71..35a659a1 100644 --- a/dtrain/score.cc +++ b/dtrain/score.cc @@ -151,6 +151,8 @@ smooth_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len, /* * approx_bleu * as in "Online Large-Margin Training for Statistical Machine Translation" (Watanabe et al. '07) + * CHIANG, RESNIK, synt struct features + * .9* * page TODO * */ diff --git a/dtrain/scripts/run.sh b/dtrain/scripts/run.sh index f2b6d600..62d6617e 100755 --- a/dtrain/scripts/run.sh +++ b/dtrain/scripts/run.sh @@ -1,4 +1,4 @@ #!/bin/sh -./dtrain -c ./data/cdec.ini -k 200 -n 3 -t 10 < ./data/in.blunsom08 #< data/in.toy +./dtrain -c ./data/cdec.ini -k 200 -n 3 -t 10 -i ./data/in.blunsom08.5k #< data/in.toy |