summaryrefslogtreecommitdiff
path: root/dtrain
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2011-08-05 09:45:02 +0200
committerPatrick Simianer <p@simianer.de>2011-09-23 19:13:58 +0200
commit7a562c98d69abfe9ec921f37828f827f44d5dc82 (patch)
treee767f515e4c7c74fccfefa101ef8a617e1230653 /dtrain
parent2e605eb2745e56619b16fdbcb8095e0a6543ab27 (diff)
bugfixing
Diffstat (limited to 'dtrain')
-rw-r--r--dtrain/dtest.cc4
-rw-r--r--dtrain/dtrain.cc19
-rw-r--r--dtrain/score.cc2
-rwxr-xr-xdtrain/scripts/run.sh2
4 files changed, 18 insertions, 9 deletions
diff --git a/dtrain/dtest.cc b/dtrain/dtest.cc
index d1ff30c0..7674a3ca 100644
--- a/dtrain/dtest.cc
+++ b/dtrain/dtest.cc
@@ -61,11 +61,11 @@ main(int argc, char** argv)
double overall = 0.0;
double overall1 = 0.0;
double overall2 = 0.0;
- cerr << "(A dot equals " << DTRAIN_DOTOUT << " lines of input.)" << endl;
+ cerr << "(A dot represents " << DTRAIN_DOTOUT << " lines of input.)" << endl;
while( getline(cin, in) ) {
if ( (sn+1) % DTRAIN_DOTOUT == 0 ) {
cerr << ".";
- if ( (sn+1) % (20*DTRAIN_DOTOUT) == 0 ) cerr << endl;
+ if ( (sn+1) % (20*DTRAIN_DOTOUT) == 0 ) cerr << " " << sn+1 << endl;
}
//if ( sn > 5000 ) break;
strs.clear();
diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc
index 16b83a70..6023638a 100644
--- a/dtrain/dtrain.cc
+++ b/dtrain/dtrain.cc
@@ -25,16 +25,17 @@ init(int argc, char** argv, po::variables_map* conf)
( "ngrams,n", po::value<size_t>(&N)->default_value(DTRAIN_DEFAULT_N), "n for Ngrams" )
( "filter,f", po::value<string>(), "filter kbest list" ) // FIXME
( "epochs,t", po::value<size_t>(&T)->default_value(DTRAIN_DEFAULT_T), "# of iterations T" )
+ ( "input,i", po::value<string>(), "input file" )
#ifndef DTRAIN_DEBUG
;
#else
- ( "test", "run tests and exit");
+ ( "test", "run tests and exit");
#endif
po::options_description cmdline_options;
cmdline_options.add(opts);
po::store( parse_command_line(argc, argv, cmdline_options), *conf );
po::notify( *conf );
- if ( ! conf->count("decoder-config") ) {
+ if ( ! conf->count("decoder-config") || ! conf->count("input") ) {
cerr << cmdline_options << endl;
return false;
}
@@ -83,15 +84,21 @@ main(int argc, char** argv)
vector<WordID> ref_ids;
string in, psg;
size_t sn = 0;
- cerr << "(A dot equals " << DTRAIN_DOTOUT << " lines of input.)" << endl;
+ cerr << "(A dot represents " << DTRAIN_DOTOUT << " lines of input.)" << endl;
+
+ string fname = conf["input"].as<string>();
+ ifstream input;
+ input.open( fname.c_str() );
for ( size_t t = 0; t < T; t++ )
{
+ input.seekg(0);
+ cerr << "Iteration #" << t+1 << " of " << T << "." << endl;
- while( getline(cin, in) ) {
+ while( getline(input, in) ) {
if ( (sn+1) % DTRAIN_DOTOUT == 0 ) {
cerr << ".";
- if ( (sn+1) % (20*DTRAIN_DOTOUT) == 0 ) cerr << endl;
+ if ( (sn+1) % (20*DTRAIN_DOTOUT) == 0 ) cerr << " " << sn+1 << endl;
}
//if ( sn > 5000 ) break;
// weights
@@ -146,7 +153,7 @@ main(int argc, char** argv)
} // outer loop
cerr << endl;
- weights.WriteToFile( "data/weights-vanilla", false );
+ weights.WriteToFile( "output/weights-vanilla", true );
return 0;
}
diff --git a/dtrain/score.cc b/dtrain/score.cc
index 72e6db71..35a659a1 100644
--- a/dtrain/score.cc
+++ b/dtrain/score.cc
@@ -151,6 +151,8 @@ smooth_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len,
/*
* approx_bleu
* as in "Online Large-Margin Training for Statistical Machine Translation" (Watanabe et al. '07)
+ * CHIANG, RESNIK, synt struct features
+ * .9*
* page TODO
*
*/
diff --git a/dtrain/scripts/run.sh b/dtrain/scripts/run.sh
index f2b6d600..62d6617e 100755
--- a/dtrain/scripts/run.sh
+++ b/dtrain/scripts/run.sh
@@ -1,4 +1,4 @@
#!/bin/sh
-./dtrain -c ./data/cdec.ini -k 200 -n 3 -t 10 < ./data/in.blunsom08 #< data/in.toy
+./dtrain -c ./data/cdec.ini -k 200 -n 3 -t 10 -i ./data/in.blunsom08.5k #< data/in.toy